From 176c9f5b3423d67a101af5088cbb3fa3807015db Mon Sep 17 00:00:00 2001 From: Dan Zeman Date: Wed, 20 Nov 2024 21:52:58 +0100 Subject: [PATCH] Revert "Secondary prepositions should now use ExtPos=ADP, too." This reverts commit 5d0d02869fb5ef16ea5f5f33e35df72579b2c84a. https://github.com/UniversalDependencies/docs/issues/1062 --- validate.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/validate.py b/validate.py index c9a8beab..53f5802a 100755 --- a/validate.py +++ b/validate.py @@ -1515,16 +1515,18 @@ def validate_upos_vs_deprel(id, tree): testmessage = "'cop' should be 'AUX' or 'PRON'/'DET' but it is '%s'" % (upos) warn(testmessage, testclass, testlevel, testid, nodeid=id, lineno=tree['linenos'][id]) # Case is normally an adposition, maybe particle. - # Secondary prepositions ([cs] NOUN pomocí, prostřednictvím; [en] VERB including) - # may keep their original UPOS tag if they use the ExtPos=ADP feature to signal - # that they are acting as preposition. + # However, there are also secondary adpositions and they may have the original POS tag: + # NOUN: [cs] pomocí, prostřednictvím + # VERB: [en] including # Interjection can also act as case marker for vocative, as in Sanskrit: भोः भगवन् / bhoḥ bhagavan / oh sir. - if deprel == 'case' and re.match(r"^(NOUN|PROPN|ADJ|PRON|DET|NUM|VERB|AUX)", upos): + if deprel == 'case' and re.match(r"^(PROPN|ADJ|PRON|DET|NUM|AUX)", upos): testid = 'rel-upos-case' testmessage = "'case' should not be '%s'" % (upos) warn(testmessage, testclass, testlevel, testid, nodeid=id, lineno=tree['linenos'][id]) # Mark is normally a conjunction or adposition, maybe particle but definitely not a pronoun. - if deprel == 'mark' and re.match(r"^(NOUN|PROPN|ADJ|PRON|DET|NUM|VERB|AUX|INTJ)", upos): + ###!!! February 2022: Temporarily allow mark+VERB ("regarding"). In the future, it should be banned again + ###!!! by default (and case+VERB too), but there should be a language-specific list of exceptions. + if deprel == 'mark' and re.match(r"^(NOUN|PROPN|ADJ|PRON|DET|NUM|AUX|INTJ)", upos): testid = 'rel-upos-mark' testmessage = "'mark' should not be '%s'" % (upos) warn(testmessage, testclass, testlevel, testid, nodeid=id, lineno=tree['linenos'][id])