1313import logging
1414from bisect import bisect_left
1515
16+ def _m (range_s , range_e , offset ):
17+ return f"{ range_s } -{ offset } :{ range_e } -{ offset } " if offset else f"{ range_s } :{ range_e } "
1618
1719class AddBratAnn (Block ):
1820
@@ -47,7 +49,7 @@ def process_document(self, document):
4749 offset += 1
4850
4951 for line in ann_filehandle :
50- line = line .rstrip ()
52+ line = line .rstrip (' \n ' )
5153 if not "\t " in line :
5254 logging .warning (f"Unexpected line without tabs: { line } " )
5355 elif line .startswith ("T" ):
@@ -57,16 +59,18 @@ def process_document(self, document):
5759 # Usually range are two numbers, but can be more, e.g. type_and_range="Abstract 605 653;654 703"
5860 # Let's take the first and last number only.´
5961 parts = type_and_range .split ()
60- ne_type , range_s , range_e = parts [0 ], parts [1 ], parts [- 1 ]
62+ ne_type , range_s , range_e = parts [0 ], int ( parts [1 ]), int ( parts [- 1 ])
6163
6264 # If form ends with spaces, remove them and adjust range_e
6365 stripped_form = form .rstrip (" " )
6466 if form != stripped_form :
6567 num_spaces = len (form ) - len (stripped_form )
68+ logging .debug (f"Stripping { num_spaces } space{ 's' if num_spaces > 1 else '' } from { mention_id } '{ form } ' ({ _m (range_s ,range_e ,offset )} ->{ range_e - num_spaces } )" )
6669 form = stripped_form
67- range_e = int ( range_e ) - num_spaces
70+ range_e = range_e - num_spaces
6871
69- mentions [mention_id ] = [ne_type , int (range_s ), int (range_e ), form ]
72+
73+ mentions [mention_id ] = [ne_type , range_s , range_e , form ]
7074 if self .keep_mention_id :
7175 attrs .append (["mention_id" , mention_id , mention_id ])
7276 except Exception as e :
@@ -155,7 +159,7 @@ def process_document(self, document):
155159 break
156160
157161 if not ok_s or not ok_e :
158- logging .warning (f"Mention { mention_id } range { range_s } - { offset } : { range_e } - { offset } ({ form } )"
162+ logging .warning (f"Mention { mention_id } range { _m ( range_s , range_e , offset ) } ({ form } )"
159163 f" crosses token boundaries: { token_s .misc } ({ token_s .form } ) "
160164 f".. { token_e .misc } ({ token_e .form } )" )
161165
0 commit comments