@@ -45,7 +45,7 @@ def parse_comment_line(self, line, root):
4545 match = RE_BEGIN .match (line )
4646 if match :
4747 docname = match .group (1 )
48- # LitBank uses e.g.
48+ # LitBank and FantasyCoref use e.g.
4949 # #begin document (1023_bleak_house_brat); part 0
5050 if docname .startswith ('(' ) and docname .endswith (');' ):
5151 docname = docname [1 :- 2 ]
@@ -56,6 +56,9 @@ def parse_comment_line(self, line, root):
5656 # Corref-PT-SemEval uses e.g.
5757 # #begin document D1_C30_Folha_07-08-2007_09h19.txt.xml
5858 docname = docname .replace ('.txt' , '' ).replace ('.xml' , '' )
59+ # FantasyCoref may use parentheses within the document ID e.g.
60+ # #begin document (051_Fundevogel_(Bird-foundling)); part 000
61+ docname = docname .replace ('(' , '' ).replace (')' , '' )
5962
6063 root .newdoc = docname
6164 self ._global_entity = 'eid-etype-head-other'
@@ -77,6 +80,8 @@ def parse_node_line(self, line, root, nodes):
7780 for (n_attribute , attribute_name ) in enumerate (self .node_attributes ):
7881 value = fields [n_attribute ]
7982 if attribute_name == 'docname' :
83+ # FantasyCoref may use parentheses within the document ID
84+ value = value .replace ('(' , '' ).replace (')' , '' )
8085 if value != self ._docname :
8186 logging .warning (f"Document name mismatch { value } != { self ._docname } " )
8287
0 commit comments