@@ -206,7 +206,80 @@ def _raise_if_error(self):
206206 """
207207 Raise an exception if the gzip process has exited with an error.
208208
209- Raise IOError if process is not running anymore and the
209+ Raise OSError if process is not running anymore and the
210+ exit code is nonzero.
211+ """
212+ return_code = self .process .poll ()
213+ if return_code is not None and return_code != 0 :
214+ message = self ._stderr .read ().strip ()
215+ raise OSError (message )
216+
217+ def read (self , * args ):
218+ data = self ._file .read (* args )
219+ if len (args ) == 0 or args [0 ] <= 0 :
220+ # wait for process to terminate until we check the exit code
221+ self .process .wait ()
222+ self ._raise_if_error ()
223+ return data
224+
225+
226+ class PipedBAllCReader (Closing ):
227+ def __init__ (self , path , cmeta_path = None , region = None , mode = "r" ):
228+ if mode not in ("r" ):
229+ raise ValueError (f"Mode can only be 'r' for ballc file" )
230+ if cmeta_path is None :
231+ raise NotImplementedError
232+ cmeta_path = pathlib .Path (cmeta_path ).resolve ()
233+ if not cmeta_path .exists ():
234+ raise FileNotFoundError (f"{ cmeta_path } does not exist." )
235+ cmeta_path = str (cmeta_path )
236+
237+ if region is None :
238+ self .process = Popen (
239+ ["ballcools" , "query" , path , '"*"' , "-c" , cmeta_path ],
240+ # stdout=PIPE, stderr=PIPE, encoding="utf8")
241+ stdout = PIPE ,
242+ stderr = PIPE ,
243+ encoding = None ,
244+ )
245+ else :
246+ self .process = Popen (
247+ ["ballcools" , "query" , path ] + region .split (" " ) + ["-c" , cmeta_path ],
248+ stdout = PIPE ,
249+ stderr = PIPE ,
250+ # encoding="utf8",)
251+ encoding = None ,
252+ )
253+
254+ self .name = path
255+ self ._file = self .process .stdout
256+ self ._stderr = self .process .stderr
257+ self .closed = False
258+ # Give ballcools a little bit of time to report any errors
259+ # (such as a non-existing file)
260+ time .sleep (0.01 )
261+ self ._raise_if_error ()
262+
263+ def close (self ):
264+ self .closed = True
265+ return_code = self .process .poll ()
266+ if return_code is None :
267+ # still running
268+ self .process .terminate ()
269+ self ._raise_if_error ()
270+
271+ def __iter__ (self ):
272+ for line in self ._file :
273+ yield line .decode ("utf-8" )
274+ self .process .wait ()
275+ self ._raise_if_error ()
276+
277+ def readline (self ):
278+ return self ._file .readline ().decode ("utf-8" )
279+
280+ def _raise_if_error (self ):
281+ """
282+ Raise OSError if process is not running anymore and the
210283 exit code is nonzero.
211284 """
212285 return_code = self .process .poll ()
@@ -275,7 +348,7 @@ def readline(self):
275348 return self .file .readline ()
276349
277350 def _raise_if_error (self ):
278- """Raise IOError if process is not running anymore and the exit code is nonzero."""
351+ """Raise OSError if process is not running anymore and the exit code is nonzero."""
279352 return_code = self .process .poll ()
280353 if return_code is not None and return_code != 0 :
281354 message = self ._stderr .read ().strip ()
@@ -382,7 +455,11 @@ def open_gz(file_path, mode="r", compresslevel=3, threads=1, region=None):
382455 return gzip .open (file_path , mode , compresslevel = compresslevel )
383456
384457
385- def open_allc (file_path , mode = "r" , compresslevel = 3 , threads = 1 , region = None ):
458+ def open_ballc (file_path , mode = "r" , compresslevel = None , threads = None , region = None , cmeta_path = None ):
459+ return PipedBAllCReader (file_path , cmeta_path = cmeta_path , region = region , mode = "r" )
460+
461+
462+ def open_allc (file_path , mode = "r" , compresslevel = 3 , threads = 1 , region = None , cmeta_path = None ):
386463 """
387464 Open a .allc file.
388465
@@ -422,7 +499,7 @@ def open_allc(file_path, mode="r", compresslevel=3, threads=1, region=None):
422499 raise ValueError (f"mode '{ mode } ' not supported" )
423500 if compresslevel not in range (1 , 10 ):
424501 raise ValueError ("compresslevel must be between 1 and 9" )
425- if region is not None :
502+ if ( region is not None ) and ( not file_path . endswith ( ".ballc" )) :
426503 # unzipped file
427504 if not file_path .endswith ("gz" ):
428505 raise ValueError (f"File must be compressed by bgzip to use region query. File path { file_path } " )
@@ -435,8 +512,10 @@ def open_allc(file_path, mode="r", compresslevel=3, threads=1, region=None):
435512 if not os .path .exists (file_path + ".tbi" ):
436513 raise FileNotFoundError ("region query provided, but .tbi index not found" )
437514
438- if file_path .endswith ("gz" ):
515+ if file_path .endswith (". gz" ):
439516 return open_gz (file_path , mode , compresslevel , threads , region = region )
517+ elif file_path .endswith (".ballc" ):
518+ return open_ballc (file_path , region = region , cmeta_path = cmeta_path )
440519 else :
441520 return open (file_path , mode )
442521
0 commit comments