Skip to content

Commit fc44de7

Browse files
committed
add mwt.get_attrs(), mwt.is_empty(), mwt.is_leaf()
These three methods make the MWT objects more similar to Node, which is useful when working with lists where both object types are mixed, e.g. the result of `node.descendants(add_mwt=True)`.
1 parent 381a2d1 commit fc44de7

1 file changed

Lines changed: 71 additions & 1 deletion

File tree

udapi/core/mwt.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,79 @@ def is_mwt():
7777

7878
@property
7979
def no_space_after(self):
80-
"""Boolean property as a shortcut for `node.misc["SpaceAfter"] == "No"`."""
80+
"""Boolean property as a shortcut for `mwt.misc["SpaceAfter"] == "No"`."""
8181
return self.misc["SpaceAfter"] == "No"
8282

83+
@staticmethod
84+
def is_empty():
85+
"""Is this an Empty node?
86+
87+
Returns always False because multi-word tokens cannot be empty nodes.
88+
"""
89+
return False
90+
91+
@staticmethod
92+
def is_leaf():
93+
"""Is this a node/mwt without any children?
94+
95+
Returns always True because multi-word tokens cannot have children.
96+
"""
97+
return True
98+
99+
def _get_attr(self, name): # pylint: disable=too-many-return-statements
100+
if name == 'form':
101+
return self.form
102+
if name == 'ord':
103+
return self.ord_range
104+
if name in ('edge', 'children', 'siblings', 'depth'):
105+
return 0
106+
if name == 'feats_split':
107+
return str(self.feats).split('|')
108+
if name == 'misc_split':
109+
return str(self.misc).split('|')
110+
if name.startswith('feats['):
111+
return self.feats[name[6:-1]]
112+
if name.startswith('misc['):
113+
return self.misc[name[5:-1]]
114+
return '<mwt>'
115+
116+
def get_attrs(self, attrs, undefs=None, stringify=True):
117+
"""Return multiple attributes or pseudo-attributes, possibly substituting empty ones.
118+
119+
MWTs do not have children nor parents nor prev/next nodes,
120+
so the pseudo-attributes: p_xy, c_xy, l_xy and r_xy are irrelevant (and return nothing).
121+
Other pseudo-attributes (e.g. dir) return always the string "<mwt>".
122+
The only relevant pseudo-attributes are
123+
feats_split and misc_split: a list of name=value formatted strings.
124+
The `ord` attribute returns actually `mwt.ord_range`.
125+
126+
Args:
127+
attrs: A list of attribute names, e.g. ``['form', 'ord', 'feats_split']``.
128+
undefs: A value to be used instead of None for empty (undefined) values.
129+
stringify: Apply `str()` on each value (except for None)
130+
"""
131+
values = []
132+
for name in attrs:
133+
nodes = [self]
134+
if name[1] == '_':
135+
nodes, name = [], name[2:]
136+
for node in (n for n in nodes if n is not None):
137+
if name in {'feats_split', 'misc_split'}:
138+
values.extend(node._get_attr(name))
139+
else:
140+
values.append(node._get_attr(name))
141+
142+
if undefs is not None:
143+
values = [x if x is not None else undefs for x in values]
144+
if stringify:
145+
values = [str(x) if x is not None else None for x in values]
146+
return values
147+
148+
@property
149+
def _ord(self):
150+
self.words.sort()
151+
return self.words[0]._ord
152+
83153
# TODO: node.remove() should check if the node is not part of any MWT
84154
# TODO: Document that editing words by mwt.words.append(node), del or remove(node) is not supported
85155
# TODO: Make mwt._words private and provide a setter

0 commit comments

Comments
 (0)