From 9946040b8dda743ef1e04bfcca453d67b2cf59ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maty=C3=A1=C5=A1=20Kopp?= Date: Thu, 21 Nov 2024 14:29:44 +0100 Subject: [PATCH 1/6] add Hebrew into list of languages to be translated (related to #881) --- Build/Taxonomies/taxonomy-translation-include.tsv | 1 + Build/Taxonomies/taxonomy-translation-responsibility.tsv | 1 + 2 files changed, 2 insertions(+) diff --git a/Build/Taxonomies/taxonomy-translation-include.tsv b/Build/Taxonomies/taxonomy-translation-include.tsv index a72f21749..6d0bb3ee2 100644 --- a/Build/Taxonomies/taxonomy-translation-include.tsv +++ b/Build/Taxonomies/taxonomy-translation-include.tsv @@ -15,6 +15,7 @@ fi ParlaMint-FI fr ParlaMint-BE fr ParlaMint-FR gl ParlaMint-ES-GA +he ParlaMint-IL hr ParlaMint-HR hu ParlaMint-HU is ParlaMint-IS diff --git a/Build/Taxonomies/taxonomy-translation-responsibility.tsv b/Build/Taxonomies/taxonomy-translation-responsibility.tsv index 67b313f5c..68ae40c2b 100644 --- a/Build/Taxonomies/taxonomy-translation-responsibility.tsv +++ b/Build/Taxonomies/taxonomy-translation-responsibility.tsv @@ -12,6 +12,7 @@ eu ParlaMint-ES-PV fi ParlaMint-FI fr ParlaMint-FR gl ParlaMint-ES-GA +he ParlaMint-IL hr ParlaMint-HR hu ParlaMint-HU is ParlaMint-IS From aad77ea10919e3e15551912e16308581b0952985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maty=C3=A1=C5=A1=20Kopp?= Date: Mon, 25 Nov 2024 07:50:21 +0100 Subject: [PATCH 2/6] add ParlaMint-IL with lang=he to common files --- Makefile | 2 +- Scripts/parlamint-add-common-content.xsl | 4 ++++ Scripts/parlamint-factorize-corpora.pl | 1 + Scripts/parlamint2conllu.pl | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f6962cbf9..64b3c5eda 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ ##$PARLIAMENTS: Space separated list of country codes #Parliaments for V4.1 -PARLIAMENTS = AT BE BG CZ DK EE ES ES-CT ES-GA ES-PV FI FR GB GR HR HU IS IT LV NL NO PL PT SE SI TR BA RS UA +PARLIAMENTS = AT BE BG CZ DK EE ES ES-CT ES-GA ES-PV FI FR GB GR HR HU IS IT LV NL NO PL PT SE SI TR BA RS UA IL ##$JAVA-MEMORY## Set a java memory maxsize in GB JAVA-MEMORY = diff --git a/Scripts/parlamint-add-common-content.xsl b/Scripts/parlamint-add-common-content.xsl index 77731cb76..cb043fdc5 100755 --- a/Scripts/parlamint-add-common-content.xsl +++ b/Scripts/parlamint-add-common-content.xsl @@ -171,6 +171,10 @@ Legislature Unicameralism + + Legislature + Unicameralism + Legislature Unicameralism diff --git a/Scripts/parlamint-factorize-corpora.pl b/Scripts/parlamint-factorize-corpora.pl index 69fe3b239..6656b7352 100755 --- a/Scripts/parlamint-factorize-corpora.pl +++ b/Scripts/parlamint-factorize-corpora.pl @@ -54,6 +54,7 @@ $country2lang{'SK'} = 'sk'; $country2lang{'TR'} = 'tr'; $country2lang{'UA'} = 'uk, ru'; +$country2lang{'IL'} = 'he'; $bkpName = "BKP"; $Saxon = "java -jar $Bin/bin/saxon.jar"; diff --git a/Scripts/parlamint2conllu.pl b/Scripts/parlamint2conllu.pl index 11cf49e75..fe02903fe 100755 --- a/Scripts/parlamint2conllu.pl +++ b/Scripts/parlamint2conllu.pl @@ -63,6 +63,7 @@ sub usage $country2lang{'SK'} = 'sk'; $country2lang{'TR'} = 'tr'; $country2lang{'UA'} = 'uk, ru'; +$country2lang{'IL'} = 'he'; print STDERR "INFO: Converting directory $inDir\n"; my $rootAnaFile = ''; From 72da8810305d6d9b0dd4130a12505a9f6e68b01b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maty=C3=A1=C5=A1=20Kopp?= Date: Mon, 25 Nov 2024 07:53:46 +0100 Subject: [PATCH 3/6] add SK/sk to taxonomy translations setup --- Build/Taxonomies/taxonomy-translation-include.tsv | 1 + Build/Taxonomies/taxonomy-translation-responsibility.tsv | 1 + 2 files changed, 2 insertions(+) diff --git a/Build/Taxonomies/taxonomy-translation-include.tsv b/Build/Taxonomies/taxonomy-translation-include.tsv index 6d0bb3ee2..dfa172a63 100644 --- a/Build/Taxonomies/taxonomy-translation-include.tsv +++ b/Build/Taxonomies/taxonomy-translation-include.tsv @@ -27,6 +27,7 @@ nl ParlaMint-BE nl ParlaMint-NL pl ParlaMint-PL pt ParlaMint-PT +sk ParlaMint-SK sl ParlaMint-SI ro ParlaMint-RO sr ParlaMint-RS diff --git a/Build/Taxonomies/taxonomy-translation-responsibility.tsv b/Build/Taxonomies/taxonomy-translation-responsibility.tsv index 68ae40c2b..fb2369fd7 100644 --- a/Build/Taxonomies/taxonomy-translation-responsibility.tsv +++ b/Build/Taxonomies/taxonomy-translation-responsibility.tsv @@ -23,6 +23,7 @@ nb ParlaMint-NO nl ParlaMint-NL pl ParlaMint-PL pt ParlaMint-PT +sk ParlaMint-SK sl ParlaMint-SI ro ParlaMint-RO sr ParlaMint-RS From 4fb5e382c495e71501c53127ba3361075b1b4513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maty=C3=A1=C5=A1=20Kopp?= Date: Thu, 28 Nov 2024 10:49:16 +0100 Subject: [PATCH 4/6] fix path to common taxonomies in GitHub action --- .github/actions/ParlaMintValidate/validate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/ParlaMintValidate/validate.sh b/.github/actions/ParlaMintValidate/validate.sh index 414b834f9..562b8275a 100755 --- a/.github/actions/ParlaMintValidate/validate.sh +++ b/.github/actions/ParlaMintValidate/validate.sh @@ -72,7 +72,7 @@ for parla in $(jq -r '.[]' <<< $1 ); do fi echo "::warning:: TMP restore taxonomy" - git checkout Corpora/Taxonomies/ParlaMint-taxonomy* + git checkout Build/Taxonomies/ParlaMint-taxonomy* git checkout ${DATADIR}/ParlaMint-$parla/ParlaMint-taxonomy* done From 2c4fb474289a77d1a6c53047797995ae2cbce7da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=C5=BE=20Erjavec?= Date: Sat, 28 Dec 2024 16:44:12 +0100 Subject: [PATCH 5/6] Update with LREV publication. --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 82ac53161..c95c53321 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # ParlaMint: Comparable Parliamentary Corpora The [CLARIN ParlaMint project](https://www.clarin.eu/parlamint) -is compiling comparable parliamentary corpora for a number of countries and languages. +compiled comparable parliamentary corpora for a number of countries and languages. ParlaMint corpora are interoperable, i.e. encoded to a very constrained common ParlaMint schema, a specialisation of the [Parla-CLARIN recommendations](https://clarin-eric.github.io/parla-clarin/), @@ -20,8 +20,8 @@ translated to English, and is available from the CLARIN.SI repository: Publications connected to ParlaMint are available at the [ParlaMint project page](https://www.clarin.eu/parlamint#publications-and%C2%A0presentations). -The two most comprehensive publication on ParlaMint corpora are the LREV preprint describing version 4.1 -and the LREV publication describing version 2.1: +The two most comprehensive publication on ParlaMint corpora are the two open access LREV papers describing +versions 4.1 and 2.1: - Tomaž Erjavec, Matyáš Kopp, Nikola Ljubešić, Taja Kuzman, Paul Rayson, Petya Osenova, Maciej Ogrodniczuk, Çağrı Çöltekin, Danijel Koržinek, Katja Meden, Jure Skubic, Peter Rupnik, Tommaso @@ -29,17 +29,17 @@ and the LREV publication describing version 2.1: Roberts Darģis, Sascha Diwersy, Maria Gavriilidou, van Ruben Heusden, Mikel Iruskieta, Neeme Kahusk, Anna Kryvenko, Noémi Ligeti-Nagy, Carmen Magariños, Martin Mölder, Costanza Navarretta, Kiril Simov, Lars Magne Tungland, Jouni Tuominen, John Vidler, Adina Ioana Vladu, - Tanja Wissik, Väinö Yrjänäinen, Darja Fišer. - *ParlaMint II: Advancing Comparable Parliamentary Corpora Across Europe*. - (2024). - DOI: [10.21203/rs.3.rs-4176128/v1](https://doi.org/10.21203/rs.3.rs-4176128/v1). + Tanja Wissik, Väinö Yrjänäinen & Darja Fišer. + _ParlaMint II: Advancing Comparable Parliamentary Corpora Across Europe_. + *Language Resources & Evaluation* (2024). + DOI: [10.1007/s10579-024-09798-w](https://doi.org/10.1007/s10579-024-09798-w). - Tomaž Erjavec, Maciej Ogrodniczuk, Petya Osenova, Nikola Ljubešić, Kiril Simov, Andrej Pančur, Michał Rudolf, Matyáš Kopp, Starkaður Barkarson, Steinþór Steingrímsson, Çağrı Çöltekin, Jesse de Does, Katrien Depuydt, Tommaso Agnoloni, Giulia Venturi, María Calzada Pérez, Luciana D. de Macedo, Costanza Navarretta, Giancarlo Luxardo, Matthew Coole, Paul Rayson, Vaidas Morkevičius, Tomas Krilavičius, Roberts Darǵis, Orsolya Ring, Ruben van Heusden, Maarten Marx & Darja Fišer. - The ParlaMint corpora of parliamentary proceedings. + _The ParlaMint corpora of parliamentary proceedings_. *Language Resources & Evaluation* 57:415–448 (2023). DOI: [10.1007/s10579-021-09574-0](https://doi.org/10.1007/s10579-021-09574-0). From f0d7ee9e435dab6e57344256ac5aabbabd45eb8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=C5=BE=20Erjavec?= Date: Sat, 28 Dec 2024 16:45:30 +0100 Subject: [PATCH 6/6] Change italic to bold. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c95c53321..7cd5e0e1b 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ versions 4.1 and 2.1: Kahusk, Anna Kryvenko, Noémi Ligeti-Nagy, Carmen Magariños, Martin Mölder, Costanza Navarretta, Kiril Simov, Lars Magne Tungland, Jouni Tuominen, John Vidler, Adina Ioana Vladu, Tanja Wissik, Väinö Yrjänäinen & Darja Fišer. - _ParlaMint II: Advancing Comparable Parliamentary Corpora Across Europe_. + **ParlaMint II: Advancing Comparable Parliamentary Corpora Across Europe**. *Language Resources & Evaluation* (2024). DOI: [10.1007/s10579-024-09798-w](https://doi.org/10.1007/s10579-024-09798-w). @@ -39,7 +39,7 @@ versions 4.1 and 2.1: de Does, Katrien Depuydt, Tommaso Agnoloni, Giulia Venturi, María Calzada Pérez, Luciana D. de Macedo, Costanza Navarretta, Giancarlo Luxardo, Matthew Coole, Paul Rayson, Vaidas Morkevičius, Tomas Krilavičius, Roberts Darǵis, Orsolya Ring, Ruben van Heusden, Maarten Marx & Darja Fišer. - _The ParlaMint corpora of parliamentary proceedings_. + **The ParlaMint corpora of parliamentary proceedings**. *Language Resources & Evaluation* 57:415–448 (2023). DOI: [10.1007/s10579-021-09574-0](https://doi.org/10.1007/s10579-021-09574-0).