Skip to content

Commit 4ba6c10

Browse files
Add annotation for periphrasis with ir and aller
1 parent ecd5f88 commit 4ba6c10

1 file changed

Lines changed: 77 additions & 4 deletions

File tree

udapi/block/msf/romance/romance.py

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ def __init__(self, neg=True, **kwargs):
3636

3737
def process_node(self, node):
3838

39+
if node.misc['Phrase'] != '':
40+
return
41+
3942
cop = [x for x in node.children if x.udeprel == 'cop']
4043

4144
# only expl or expl:pv, no expl:impers or expl:pass
@@ -117,6 +120,7 @@ def process_node(self, node):
117120
phrase_ords = [node.ord] + [r.ord for r in refl]
118121
phrase_ords.sort()
119122

123+
self.process_phrases_with_ir_aller(node, expl, polarity, phrase_ords, node)
120124
self.process_simple_verb_forms(node, expl, polarity, phrase_ords, node)
121125

122126

@@ -147,6 +151,7 @@ def process_node(self, node):
147151
phrase_ords.sort()
148152

149153
# TODO phrase-level features are currently determined based on the first passive auxiliary, but it can happen that there are more than one passive auxiliary
154+
self.process_phrases_with_ir_aller(auxes[0], expl, polarity, phrase_ords, node)
150155
self.process_simple_verb_forms(auxes[0], expl, polarity, phrase_ords, node)
151156

152157
# head verb has passive auxiliary and also other auxiliaries
@@ -224,6 +229,7 @@ def process_modal_verbs(self, modals, modal_auxes, modal_neg):
224229
polarity='Neg'
225230
else:
226231
phrase_ords = [modals[0].ord]
232+
self.process_phrases_with_ir_aller(modals[0], '', polarity, phrase_ords, modals[0])
227233
self.process_simple_verb_forms(modals[0], '', polarity, phrase_ords, modals[0])
228234

229235
else:
@@ -238,6 +244,66 @@ def process_modal_verbs(self, modals, modal_auxes, modal_neg):
238244

239245
self.process_periphrastic_verb_forms(modals[0], modal_auxes, '', polarity, phrase_ords, modals[0])
240246

247+
def process_phrases_with_ir_aller(self, node, expl, polarity, phrase_ords, head_node):
248+
aspect = ''
249+
tense = node.feats['Tense']
250+
251+
# phrase already annotated
252+
if head_node.misc['Phrase'] != '':
253+
return
254+
255+
xcomps = [x for x in node.children if x.udeprel == 'xcomp']
256+
if node.lemma in ['ir', 'aller'] and node.upos == 'VERB' and xcomps:
257+
node.misc['PhraseAux'] = 'Yes'
258+
259+
voice = node.feats['Voice']
260+
auxes = [x for x in xcomps[0].children if x.udeprel == 'aux']
261+
aux_pass = [x for x in auxes if x.deprel == 'aux:pass']
262+
auxes_without_pass = [x for x in auxes if x.deprel != 'aux:pass']
263+
264+
if node.feats['Tense'] == 'Pres':
265+
tense = 'Fut'
266+
267+
elif node.feats['Tense'] == 'Imp':
268+
tense = 'PastFut'
269+
aspect = 'Imp'
270+
271+
elif node.feats['Tense'] == 'Fut':
272+
tense = 'FutFut'
273+
274+
elif node.feats['Tense'] == 'Past':
275+
tense = 'PastFut'
276+
aspect = 'Perf'
277+
278+
if auxes_without_pass:
279+
if auxes[0].lemma == 'estar':
280+
aspect += 'Prog'
281+
if auxes[0].lemma == 'haber':
282+
aspect += 'Perf'
283+
284+
adp_a = [x for x in xcomps[0].children if x.lemma == 'a' and x.upos == 'ADP' and x.udeprel == 'mark']
285+
cop = [x for x in xcomps[0].children if x.udeprel == 'cop']
286+
phrase_ords = [node.ord, xcomps[0].ord] + [x.ord for x in auxes] + [x.ord for x in cop]
287+
if adp_a:
288+
phrase_ords.append(adp_a[0].ord)
289+
290+
if aux_pass:
291+
voice='Pass'
292+
293+
phrase_ords.sort()
294+
295+
self.write_node_info(xcomps[0],
296+
tense = tense,
297+
number = node.feats['Number'],
298+
person = node.feats['Person'],
299+
aspect = aspect,
300+
mood = node.feats['Mood'],
301+
form = 'Fin',
302+
voice=voice,
303+
expl = expl,
304+
polarity = polarity,
305+
ords=phrase_ords)
306+
return
241307

242308
def process_simple_verb_forms(self, node, expl, polarity, phrase_ords, head_node):
243309
"""
@@ -251,6 +317,9 @@ def process_simple_verb_forms(self, node, expl, polarity, phrase_ords, head_node
251317
head_node (udapi.core.node.Node): The node that should receive the Phrase* attributes, i.e., the head of the phrase.
252318
"""
253319

320+
if node.misc['PhraseAux'] != '':
321+
return
322+
254323
# Portuguese
255324
# presente -> PhraseTense=Pres, PhraseAspect=''
256325
# Futuro do presente -> PhraseTense=Fut, PhraseAspect=''
@@ -266,7 +335,7 @@ def process_simple_verb_forms(self, node, expl, polarity, phrase_ords, head_node
266335
aspect = ''
267336
tense = node.feats['Tense']
268337
form = node.feats['VerbForm']
269-
338+
270339
if node.feats['Mood'] == 'Ind':
271340

272341
# Portuguese
@@ -358,7 +427,6 @@ def process_simple_verb_forms(self, node, expl, polarity, phrase_ords, head_node
358427
ords=phrase_ords
359428
)
360429

361-
362430
def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_ords, head_node):
363431
"""
364432
Annotate periphrastic verb forms with the Phrase* attributes.
@@ -372,6 +440,10 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
372440
head_node (udapi.core.node.Node): The node that should receive the Phrase* attributes, i.e., the head of the phrase.
373441
"""
374442

443+
# phrase already annotated
444+
if head_node.misc['Phrase'] != '':
445+
return
446+
375447
if len(auxes) == 1:
376448
# Cnd
377449
if auxes[0].feats['Mood'] == 'Cnd' and (node.feats['VerbForm'] == 'Part' or node.feats['VerbForm'] == 'Ger'):
@@ -572,7 +644,7 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
572644

573645
# auxiliary 'ir' followed by infinitive
574646
# TODO solve these verb forms for Spanish (VERB 'ir' + ADP 'a' + infinitive)
575-
if auxes[0].lemma == 'ir' and node.feats['VerbForm'] == 'Inf':
647+
if auxes[0].lemma in ['ir'] and node.feats['VerbForm'] == 'Inf':
576648

577649
tense=node.feats['Tense']
578650
aspect=''
@@ -595,7 +667,7 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
595667
# Futuro perifrástico passado perf -> PhraseTense=PastFut, PhraseAspect=Perf
596668
elif auxes[0].feats['Tense'] == 'Past':
597669
tense=Tense.PASTFUT.value
598-
aspect=Aspect.PERF.value
670+
aspect=Aspect.PERF.value
599671

600672
self.write_node_info(head_node,
601673
tense=tense,
@@ -732,6 +804,7 @@ def process_copulas(self, node, cop, expl, polarity, phrase_ords):
732804
"""
733805

734806
# classify the morphological features of the copula node and propagate them to the entire phrase (treating the copula as the content verb)
807+
self.process_phrases_with_ir_aller(cop[0], expl, polarity, phrase_ords, node)
735808
self.process_simple_verb_forms(cop[0], expl, polarity, phrase_ords, node)
736809

737810
# adjust PhraseAspect based on the lemma of the copula

0 commit comments

Comments
 (0)