From 5d0d02869fb5ef16ea5f5f33e35df72579b2c84a Mon Sep 17 00:00:00 2001 From: Dan Zeman Date: Tue, 19 Nov 2024 08:40:48 +0100 Subject: [PATCH] Secondary prepositions should now use ExtPos=ADP, too. https://github.com/UniversalDependencies/docs/issues/1062 --- validate.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/validate.py b/validate.py index a85d0964..40adf6a1 100755 --- a/validate.py +++ b/validate.py @@ -1515,18 +1515,16 @@ def validate_upos_vs_deprel(id, tree): testmessage = "'cop' should be 'AUX' or 'PRON'/'DET' but it is '%s'" % (upos) warn(testmessage, testclass, testlevel, testid, nodeid=id, lineno=tree['linenos'][id]) # Case is normally an adposition, maybe particle. - # However, there are also secondary adpositions and they may have the original POS tag: - # NOUN: [cs] pomocí, prostřednictvím - # VERB: [en] including + # Secondary prepositions ([cs] NOUN pomocí, prostřednictvím; [en] VERB including) + # may keep their original UPOS tag if they use the ExtPos=ADP feature to signal + # that they are acting as preposition. # Interjection can also act as case marker for vocative, as in Sanskrit: भोः भगवन् / bhoḥ bhagavan / oh sir. - if deprel == 'case' and re.match(r"^(PROPN|ADJ|PRON|DET|NUM|AUX)", upos): + if deprel == 'case' and re.match(r"^(NOUN|PROPN|ADJ|PRON|DET|NUM|VERB|AUX)", upos): testid = 'rel-upos-case' testmessage = "'case' should not be '%s'" % (upos) warn(testmessage, testclass, testlevel, testid, nodeid=id, lineno=tree['linenos'][id]) # Mark is normally a conjunction or adposition, maybe particle but definitely not a pronoun. - ###!!! February 2022: Temporarily allow mark+VERB ("regarding"). In the future, it should be banned again - ###!!! by default (and case+VERB too), but there should be a language-specific list of exceptions. - if deprel == 'mark' and re.match(r"^(NOUN|PROPN|ADJ|PRON|DET|NUM|AUX|INTJ)", upos) and not 'fixed' in childrels: + if deprel == 'mark' and re.match(r"^(NOUN|PROPN|ADJ|PRON|DET|NUM|VERB|AUX|INTJ)", upos) and not 'fixed' in childrels: testid = 'rel-upos-mark' testmessage = "'mark' should not be '%s'" % (upos) warn(testmessage, testclass, testlevel, testid, nodeid=id, lineno=tree['linenos'][id])