@@ -77,9 +77,79 @@ def is_mwt():
7777
7878 @property
7979 def no_space_after (self ):
80- """Boolean property as a shortcut for `node .misc["SpaceAfter"] == "No"`."""
80+ """Boolean property as a shortcut for `mwt .misc["SpaceAfter"] == "No"`."""
8181 return self .misc ["SpaceAfter" ] == "No"
8282
83+ @staticmethod
84+ def is_empty ():
85+ """Is this an Empty node?
86+
87+ Returns always False because multi-word tokens cannot be empty nodes.
88+ """
89+ return False
90+
91+ @staticmethod
92+ def is_leaf ():
93+ """Is this a node/mwt without any children?
94+
95+ Returns always True because multi-word tokens cannot have children.
96+ """
97+ return True
98+
99+ def _get_attr (self , name ): # pylint: disable=too-many-return-statements
100+ if name == 'form' :
101+ return self .form
102+ if name == 'ord' :
103+ return self .ord_range
104+ if name in ('edge' , 'children' , 'siblings' , 'depth' ):
105+ return 0
106+ if name == 'feats_split' :
107+ return str (self .feats ).split ('|' )
108+ if name == 'misc_split' :
109+ return str (self .misc ).split ('|' )
110+ if name .startswith ('feats[' ):
111+ return self .feats [name [6 :- 1 ]]
112+ if name .startswith ('misc[' ):
113+ return self .misc [name [5 :- 1 ]]
114+ return '<mwt>'
115+
116+ def get_attrs (self , attrs , undefs = None , stringify = True ):
117+ """Return multiple attributes or pseudo-attributes, possibly substituting empty ones.
118+
119+ MWTs do not have children nor parents nor prev/next nodes,
120+ so the pseudo-attributes: p_xy, c_xy, l_xy and r_xy are irrelevant (and return nothing).
121+ Other pseudo-attributes (e.g. dir) return always the string "<mwt>".
122+ The only relevant pseudo-attributes are
123+ feats_split and misc_split: a list of name=value formatted strings.
124+ The `ord` attribute returns actually `mwt.ord_range`.
125+
126+ Args:
127+ attrs: A list of attribute names, e.g. ``['form', 'ord', 'feats_split']``.
128+ undefs: A value to be used instead of None for empty (undefined) values.
129+ stringify: Apply `str()` on each value (except for None)
130+ """
131+ values = []
132+ for name in attrs :
133+ nodes = [self ]
134+ if name [1 ] == '_' :
135+ nodes , name = [], name [2 :]
136+ for node in (n for n in nodes if n is not None ):
137+ if name in {'feats_split' , 'misc_split' }:
138+ values .extend (node ._get_attr (name ))
139+ else :
140+ values .append (node ._get_attr (name ))
141+
142+ if undefs is not None :
143+ values = [x if x is not None else undefs for x in values ]
144+ if stringify :
145+ values = [str (x ) if x is not None else None for x in values ]
146+ return values
147+
148+ @property
149+ def _ord (self ):
150+ self .words .sort ()
151+ return self .words [0 ]._ord
152+
83153# TODO: node.remove() should check if the node is not part of any MWT
84154# TODO: Document that editing words by mwt.words.append(node), del or remove(node) is not supported
85155# TODO: Make mwt._words private and provide a setter
0 commit comments