@@ -36,6 +36,9 @@ def __init__(self, neg=True, **kwargs):
3636
3737 def process_node (self , node ):
3838
39+ if node .misc ['Phrase' ] != '' :
40+ return
41+
3942 cop = [x for x in node .children if x .udeprel == 'cop' ]
4043
4144 # only expl or expl:pv, no expl:impers or expl:pass
@@ -117,6 +120,7 @@ def process_node(self, node):
117120 phrase_ords = [node .ord ] + [r .ord for r in refl ]
118121 phrase_ords .sort ()
119122
123+ self .process_phrases_with_ir_aller (node , expl , polarity , phrase_ords , node )
120124 self .process_simple_verb_forms (node , expl , polarity , phrase_ords , node )
121125
122126
@@ -147,6 +151,7 @@ def process_node(self, node):
147151 phrase_ords .sort ()
148152
149153 # TODO phrase-level features are currently determined based on the first passive auxiliary, but it can happen that there are more than one passive auxiliary
154+ self .process_phrases_with_ir_aller (auxes [0 ], expl , polarity , phrase_ords , node )
150155 self .process_simple_verb_forms (auxes [0 ], expl , polarity , phrase_ords , node )
151156
152157 # head verb has passive auxiliary and also other auxiliaries
@@ -224,6 +229,7 @@ def process_modal_verbs(self, modals, modal_auxes, modal_neg):
224229 polarity = 'Neg'
225230 else :
226231 phrase_ords = [modals [0 ].ord ]
232+ self .process_phrases_with_ir_aller (modals [0 ], '' , polarity , phrase_ords , modals [0 ])
227233 self .process_simple_verb_forms (modals [0 ], '' , polarity , phrase_ords , modals [0 ])
228234
229235 else :
@@ -238,6 +244,66 @@ def process_modal_verbs(self, modals, modal_auxes, modal_neg):
238244
239245 self .process_periphrastic_verb_forms (modals [0 ], modal_auxes , '' , polarity , phrase_ords , modals [0 ])
240246
247+ def process_phrases_with_ir_aller (self , node , expl , polarity , phrase_ords , head_node ):
248+ aspect = ''
249+ tense = node .feats ['Tense' ]
250+
251+ # phrase already annotated
252+ if head_node .misc ['Phrase' ] != '' :
253+ return
254+
255+ xcomps = [x for x in node .children if x .udeprel == 'xcomp' ]
256+ if node .lemma in ['ir' , 'aller' ] and node .upos == 'VERB' and xcomps :
257+ node .misc ['PhraseAux' ] = 'Yes'
258+
259+ voice = node .feats ['Voice' ]
260+ auxes = [x for x in xcomps [0 ].children if x .udeprel == 'aux' ]
261+ aux_pass = [x for x in auxes if x .deprel == 'aux:pass' ]
262+ auxes_without_pass = [x for x in auxes if x .deprel != 'aux:pass' ]
263+
264+ if node .feats ['Tense' ] == 'Pres' :
265+ tense = 'Fut'
266+
267+ elif node .feats ['Tense' ] == 'Imp' :
268+ tense = 'PastFut'
269+ aspect = 'Imp'
270+
271+ elif node .feats ['Tense' ] == 'Fut' :
272+ tense = 'FutFut'
273+
274+ elif node .feats ['Tense' ] == 'Past' :
275+ tense = 'PastFut'
276+ aspect = 'Perf'
277+
278+ if auxes_without_pass :
279+ if auxes [0 ].lemma == 'estar' :
280+ aspect += 'Prog'
281+ if auxes [0 ].lemma == 'haber' :
282+ aspect += 'Perf'
283+
284+ adp_a = [x for x in xcomps [0 ].children if x .lemma == 'a' and x .upos == 'ADP' and x .udeprel == 'mark' ]
285+ cop = [x for x in xcomps [0 ].children if x .udeprel == 'cop' ]
286+ phrase_ords = [node .ord , xcomps [0 ].ord ] + [x .ord for x in auxes ] + [x .ord for x in cop ]
287+ if adp_a :
288+ phrase_ords .append (adp_a [0 ].ord )
289+
290+ if aux_pass :
291+ voice = 'Pass'
292+
293+ phrase_ords .sort ()
294+
295+ self .write_node_info (xcomps [0 ],
296+ tense = tense ,
297+ number = node .feats ['Number' ],
298+ person = node .feats ['Person' ],
299+ aspect = aspect ,
300+ mood = node .feats ['Mood' ],
301+ form = 'Fin' ,
302+ voice = voice ,
303+ expl = expl ,
304+ polarity = polarity ,
305+ ords = phrase_ords )
306+ return
241307
242308 def process_simple_verb_forms (self , node , expl , polarity , phrase_ords , head_node ):
243309 """
@@ -251,6 +317,9 @@ def process_simple_verb_forms(self, node, expl, polarity, phrase_ords, head_node
251317 head_node (udapi.core.node.Node): The node that should receive the Phrase* attributes, i.e., the head of the phrase.
252318 """
253319
320+ if node .misc ['PhraseAux' ] != '' :
321+ return
322+
254323 # Portuguese
255324 # presente -> PhraseTense=Pres, PhraseAspect=''
256325 # Futuro do presente -> PhraseTense=Fut, PhraseAspect=''
@@ -266,7 +335,7 @@ def process_simple_verb_forms(self, node, expl, polarity, phrase_ords, head_node
266335 aspect = ''
267336 tense = node .feats ['Tense' ]
268337 form = node .feats ['VerbForm' ]
269-
338+
270339 if node .feats ['Mood' ] == 'Ind' :
271340
272341 # Portuguese
@@ -358,7 +427,6 @@ def process_simple_verb_forms(self, node, expl, polarity, phrase_ords, head_node
358427 ords = phrase_ords
359428 )
360429
361-
362430 def process_periphrastic_verb_forms (self , node , auxes , expl , polarity , phrase_ords , head_node ):
363431 """
364432 Annotate periphrastic verb forms with the Phrase* attributes.
@@ -372,6 +440,10 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
372440 head_node (udapi.core.node.Node): The node that should receive the Phrase* attributes, i.e., the head of the phrase.
373441 """
374442
443+ # phrase already annotated
444+ if head_node .misc ['Phrase' ] != '' :
445+ return
446+
375447 if len (auxes ) == 1 :
376448 # Cnd
377449 if auxes [0 ].feats ['Mood' ] == 'Cnd' and (node .feats ['VerbForm' ] == 'Part' or node .feats ['VerbForm' ] == 'Ger' ):
@@ -572,7 +644,7 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
572644
573645 # auxiliary 'ir' followed by infinitive
574646 # TODO solve these verb forms for Spanish (VERB 'ir' + ADP 'a' + infinitive)
575- if auxes [0 ].lemma == 'ir' and node .feats ['VerbForm' ] == 'Inf' :
647+ if auxes [0 ].lemma in [ 'ir' ] and node .feats ['VerbForm' ] == 'Inf' :
576648
577649 tense = node .feats ['Tense' ]
578650 aspect = ''
@@ -595,7 +667,7 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
595667 # Futuro perifrástico passado perf -> PhraseTense=PastFut, PhraseAspect=Perf
596668 elif auxes [0 ].feats ['Tense' ] == 'Past' :
597669 tense = Tense .PASTFUT .value
598- aspect = Aspect .PERF .value
670+ aspect = Aspect .PERF .value
599671
600672 self .write_node_info (head_node ,
601673 tense = tense ,
@@ -732,6 +804,7 @@ def process_copulas(self, node, cop, expl, polarity, phrase_ords):
732804 """
733805
734806 # classify the morphological features of the copula node and propagate them to the entire phrase (treating the copula as the content verb)
807+ self .process_phrases_with_ir_aller (cop [0 ], expl , polarity , phrase_ords , node )
735808 self .process_simple_verb_forms (cop [0 ], expl , polarity , phrase_ords , node )
736809
737810 # adjust PhraseAspect based on the lemma of the copula
0 commit comments