Skip to content

Commit 0a6c328

Browse files
Extend Romance block to support Romanian
1 parent 6c30a5b commit 0a6c328

1 file changed

Lines changed: 136 additions & 30 deletions

File tree

udapi/block/msf/romance/romance.py

Lines changed: 136 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -265,34 +265,34 @@ def process_phrases_with_ir_aller(self, node, expl, polarity, phrase_ords, head_
265265
if node.lemma == 'estar':
266266

267267
if node.feats['Tense'] == 'Pres':
268-
tense = 'Pres'
269-
aspect = 'Prog'
268+
tense=Tense.PRES.value
269+
aspect =Aspect.PROG.value
270270

271271
elif node.feats['Tense'] == 'Imp':
272-
tense = 'Past'
273-
aspect = 'ImpProg'
272+
tense=Tense.PAST.value
273+
aspect=Aspect.IMPPROG.value
274274

275275
elif node.feats['Tense'] == 'Past':
276-
tense = 'Past'
277-
aspect = 'PerfProg'
276+
tense=Tense.PAST.value
277+
aspect=Aspect.PERFPROG.value
278278

279279
elif node.feats['Tense'] == 'Fut':
280-
tense = 'Fut'
281-
aspect = 'Prog'
280+
tense=Tense.FUT.value
281+
aspect=Aspect.PROG.value
282282

283283
elif node.feats['Tense'] == 'Pres':
284-
tense = 'Fut'
284+
tense=Tense.FUT.value
285285

286286
elif node.feats['Tense'] == 'Imp':
287-
tense = 'PastFut'
288-
aspect = 'Imp'
287+
tense=Tense.PASTFUT.value
288+
aspect=Aspect.IMP.value
289289

290290
elif node.feats['Tense'] == 'Fut':
291-
tense = 'FutFut'
291+
tense=Tense.FUTFUT.value
292292

293293
elif node.feats['Tense'] == 'Past':
294-
tense = 'PastFut'
295-
aspect = 'Perf'
294+
tense=Tense.PASTFUT.value
295+
aspect=Aspect.PERF.value
296296

297297

298298
if auxes_without_pass:
@@ -378,15 +378,30 @@ def process_simple_verb_forms(self, node, expl, polarity, phrase_ords, head_node
378378

379379
# Italian
380380
# pass remoto -> PhraseTense=Past, PhraseAspect=Perf
381-
if node.feats['Tense'] == 'Past':
381+
elif node.feats['Tense'] == 'Past':
382382
aspect=Aspect.PERF.value
383383

384384
# Portuguese
385385
# pretérito mais que perfeito simples -> PhraseTense=Past, PhraseAspect=Pqp
386-
if node.feats['Tense'] == 'Pqp':
386+
elif node.feats['Tense'] == 'Pqp':
387387
tense=Tense.PAST.value
388388
aspect=Aspect.PQP.value
389-
389+
390+
else:
391+
# viitorul popular/colocvial (intentional future) -> PhraseTense=Fut, PhraseAspect=''
392+
o = [x for x in node.children if x.lemma == 'o' and x.upos == 'PART']
393+
sa = [x for x in node.children if x.lemma == 'să' and x.upos == 'PART']
394+
395+
396+
if o and sa:
397+
tense = Tense.FUT.value
398+
phrase_ords.append(o[0].ord)
399+
phrase_ords.append(sa[0].ord)
400+
401+
phrase_ords.sort()
402+
403+
404+
390405
# Portuguese
391406
# subjunctive presente -> PhraseTense=Pres, PhraseAspect=''
392407
# subjunctive futuro -> PhraseTense=Fut, PhraseAspect=''
@@ -464,7 +479,7 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
464479
if head_node.misc['Phrase'] != '':
465480
return
466481

467-
if len(auxes) == 1:
482+
if len(auxes) == 1:
468483
# Cnd
469484
if auxes[0].feats['Mood'] == 'Cnd' and (node.feats['VerbForm'] == 'Part' or node.feats['VerbForm'] == 'Ger'):
470485

@@ -599,6 +614,7 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
599614
aspect=Aspect.PERF.value
600615
tense=auxes[0].feats['Tense']
601616
form='Fin'
617+
mood=auxes[0].feats['Mood']
602618

603619
adp_en = [x for x in node.children if x.lemma == 'en' and x.upos == 'ADP' and x.udeprel == 'mark']
604620
if auxes[0].feats['VerbForm'] == 'Part' and adp_en:
@@ -609,6 +625,13 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
609625
form='Ger'
610626

611627

628+
# Romanian
629+
# Perfect compus -> PhraseTense=Past, PhraseAspect=Perf
630+
elif auxes[0].lemma == 'avea':
631+
tense = Tense.PAST.value
632+
aspect = Aspect.PERF.value
633+
form = 'Fin'
634+
612635
# Spanish
613636
# Pretérito perfecto compuesto ante presente -> PhraseTense=Past, PhraseAspect=Perf
614637

@@ -619,8 +642,12 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
619642
# Portuguese
620643
# pretérito perfeito composto (aux ter) -> PhraseTense=PastPres, PhraseAspect=Perf
621644
# subjonctive pretérito perfeito composto (aux ter) -> PhraseTense=PastPres, PhraseAspect=Perf, PhraseMood=Sub
622-
if auxes[0].lemma == 'ter' or auxes[0].feats['Mood'] == 'Sub':
645+
if auxes[0].lemma in ['ter', 'fi'] or auxes[0].feats['Mood'] == 'Sub':
623646
tense = Tense.PASTPRES.value
647+
648+
# subjonctive mood not annotated in Romanian data
649+
if auxes[0].lemma == 'fi':
650+
mood='Sub'
624651
else:
625652
tense=Tense.PAST.value
626653

@@ -653,7 +680,7 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
653680
tense=tense,
654681
number=auxes[0].feats['Number'],
655682
person=auxes[0].feats['Person'],
656-
mood=auxes[0].feats['Mood'],
683+
mood=mood,
657684
aspect=aspect,
658685
form=form,
659686
voice=head_node.feats['Voice'],
@@ -662,9 +689,8 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
662689
ords=phrase_ords)
663690
return
664691

665-
# auxiliary 'ir' followed by infinitive
666-
# TODO solve these verb forms for Spanish (VERB 'ir' + ADP 'a' + infinitive)
667-
if auxes[0].lemma in ['ir'] and node.feats['VerbForm'] == 'Inf':
692+
# auxiliary 'ir' or 'vrea' followed by infinitive
693+
if auxes[0].lemma in ['ir', 'vrea'] and node.feats['VerbForm'] == 'Inf':
668694

669695
tense=node.feats['Tense']
670696
aspect=''
@@ -687,7 +713,12 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
687713
# Futuro perifrástico passado perf -> PhraseTense=PastFut, PhraseAspect=Perf
688714
elif auxes[0].feats['Tense'] == 'Past':
689715
tense=Tense.PASTFUT.value
690-
aspect=Aspect.PERF.value
716+
aspect=Aspect.PERF.value
717+
718+
# Viitorul standard/literar/simplu -> PhraseTense=Fut, PhraseAspect=''
719+
if auxes[0].lemma == 'vrea':
720+
tense = Tense.FUT.value
721+
aspect = ''
691722

692723
self.write_node_info(head_node,
693724
tense=tense,
@@ -701,13 +732,90 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
701732
polarity=polarity,
702733
ords=phrase_ords)
703734

735+
return
736+
737+
# condițional-optativ prezent -> PhraseTense=Pres, PhraseAspect=''
738+
if auxes[0].lemma == 'avea' and node.feats['VerbForm'] == 'Inf':
739+
tense=Tense.PRES.value
740+
aspect=''
741+
self.write_node_info(head_node,
742+
tense=tense,
743+
aspect=aspect,
744+
number=auxes[0].feats['Number'],
745+
person=auxes[0].feats['Person'],
746+
mood='Cnd',
747+
form='Fin',
748+
voice=head_node.feats['Voice'],
749+
expl=expl,
750+
polarity=polarity,
751+
ords=phrase_ords)
752+
753+
return
754+
755+
# viitor popular/colloquial (obligative future) -> PhraseTense=Fut, PhraseAspect=''
756+
# viitor popular (potential future - contracted form) -> PhraseTense=Fut, PhraseAspect=''
757+
if node.feats['VerbForm'] == 'Fin':
758+
sa = [x for x in node.children if x.lemma == 'să' and x.upos == 'PART']
759+
760+
if sa:
761+
phrase_ords.append(sa[0].ord)
762+
phrase_ords.sort()
763+
764+
tense=Tense.FUT.value
765+
aspect=''
766+
767+
self.write_node_info(head_node,
768+
tense=tense,
769+
aspect=aspect,
770+
number=head_node.feats['Number'],
771+
person=head_node.feats['Person'],
772+
mood=head_node.feats['Mood'],
773+
form='Fin',
774+
voice=head_node.feats['Voice'],
775+
expl=expl,
776+
polarity=polarity,
777+
ords=phrase_ords)
778+
779+
return
704780

705781
elif len(auxes) == 2:
782+
# Romanian
783+
# viitor anterior -> PhraseTense=Fut, PhraseAsoect=Perf
784+
if auxes[0].lemma == 'vrea' and auxes[1].lemma == 'fi' and node.feats['VerbForm'] == 'Part':
785+
786+
self.write_node_info(head_node,
787+
tense=Tense.PAST.value,
788+
number=auxes[0].feats['Number'],
789+
person=auxes[0].feats['Person'],
790+
mood=auxes[0].feats['Mood'],
791+
form='Fin',
792+
aspect=Aspect.PERF.value,
793+
voice=head_node.feats['Voice'],
794+
expl=expl,
795+
polarity=polarity,
796+
ords=phrase_ords)
797+
798+
return
799+
800+
# condițional-optativ perfect -> PhraseTense=Past
801+
if auxes[0].lemma == 'avea' and auxes[1].lemma == 'fi' and node.feats['VerbForm'] == 'Part':
802+
803+
self.write_node_info(head_node,
804+
tense=Tense.PAST.value,
805+
number=auxes[0].feats['Number'],
806+
person=auxes[0].feats['Person'],
807+
mood='Cnd',
808+
form='Fin',
809+
aspect='',
810+
voice=head_node.feats['Voice'],
811+
expl=expl,
812+
polarity=polarity,
813+
ords=phrase_ords)
814+
815+
return
816+
706817
# Portuguese
707818
# auxiliry 'ir' followed by auxiliary 'estar' in infinitive and a gerund
708-
709-
# TODO Spanish
710-
# VERB 'ir' + ADP 'a' + AUX 'estar'.Inf + gerund
711819
if auxes[0].lemma == 'ir' and auxes[1].lemma == 'estar' and node.feats['VerbForm'] == 'Ger':
712820

713821
# Futuro perifrástico -> PhraseTense=Fut, PhraseAspect=Prog
@@ -765,9 +873,7 @@ def process_periphrastic_verb_forms(self, node, auxes, expl, polarity, phrase_or
765873
expl=expl,
766874
polarity=polarity,
767875
ords=phrase_ords)
768-
769-
770-
876+
771877
# Cnd (only ter/haber), Sub and Past,Pres,Fut tenses: 2 auxes - ter/haber + estar
772878
if auxes[0].lemma in AUXES_HAVE and auxes[1].lemma == 'estar' and node.feats['VerbForm'] == 'Ger':
773879

0 commit comments

Comments
 (0)