@@ -98,7 +98,7 @@ def _determine_datasets(regions, quantifiers, chrom_size_path):
9898 "do not have index in its fourth column, adding it automatically. "
9999 "If this is not desired, add a fourth column containing UNIQUE IDs to the BED file." ,
100100 )
101- region_bed_df [name ] = [ f"{ name } _{ i } " for i in range (region_bed_df .shape [0 ])]
101+ region_bed_df [name ] = ( f"{ name } _{ i } " for i in range (region_bed_df .shape [0 ]))
102102 # check if name is unique()
103103 if region_bed_df .iloc [:, 3 ].duplicated ().sum () > 0 :
104104 raise ValueError (f"Region IDs in { region_path } (fourth column) are not unique." )
@@ -219,7 +219,6 @@ def _count_single_zarr(
219219 count_ds = _count_single_region_set (
220220 allc_table = allc_table , region_config = region_config , obs_dim = obs_dim , region_dim = region_dim
221221 )
222-
223222 # deal with count quantifiers
224223 count_mc_types = []
225224 for quant in region_config ["quant" ]:
@@ -326,8 +325,7 @@ def generate_dataset(
326325 subprocess .run (["cp" , "-f" , chrom_size_path , f"{ output_path } /chrom_sizes.txt" ], check = True )
327326 rgs = {}
328327 for region_dim , region_config in datasets .items ():
329- regiongroup = root .create_group (region_dim )
330- rgs [region_dim ] = regiongroup
328+ rgs [region_dim ] = root .create_group (region_dim )
331329 # save region coords to the ds
332330 bed = pd .read_csv (f"{ tmpdir } /{ region_dim } .regions.csv" , index_col = 0 )
333331 bed .columns = [f"{ region_dim } _chrom" , f"{ region_dim } _start" , f"{ region_dim } _end" ]
@@ -343,7 +341,7 @@ def generate_dataset(
343341 if ds .coords [k ].dtype == "O" :
344342 ds .coords [k ] = ds .coords [k ].astype (str )
345343 ds .to_zarr (f"{ output_path } /{ region_dim } " , mode = "w" , consolidated = False )
346- dsobs = regiongroup .empty (
344+ dsobs = rgs [ region_dim ] .empty (
347345 name = obs_dim , shape = allc_table .index .size , chunks = (chunk_size ), dtype = f"<U{ max_length } "
348346 )
349347 dsobs .attrs ["_ARRAY_DIMENSIONS" ] = [obs_dim ]
@@ -353,22 +351,22 @@ def generate_dataset(
353351 count_mc_types += quant .mc_types
354352 count_mc_types = list (set (count_mc_types ))
355353 if len (count_mc_types ) > 0 :
356- DA = regiongroup .empty (
354+ DA = rgs [ region_dim ] .empty (
357355 name = f"{ region_dim } _da" ,
358356 shape = (n_sample , region_size , len (count_mc_types ), 2 ),
359357 chunks = (chunk_size , region_size , len (count_mc_types ), 2 ),
360358 dtype = "uint32" ,
361359 )
362360 DA .attrs ["_ARRAY_DIMENSIONS" ] = [obs_dim , region_dim , "mc_type" , "count_type" ]
363- count = regiongroup .array (name = "count_type" , data = (["mc" , "cov" ]), dtype = "<U3" )
361+ count = rgs [ region_dim ] .array (name = "count_type" , data = (["mc" , "cov" ]), dtype = "<U3" )
364362 count .attrs ["_ARRAY_DIMENSIONS" ] = ["count_type" ]
365- mc = regiongroup .array (name = "mc_type" , data = count_mc_types , dtype = "<U3" )
363+ mc = rgs [ region_dim ] .array (name = "mc_type" , data = count_mc_types , dtype = "<U3" )
366364 mc .attrs ["_ARRAY_DIMENSIONS" ] = ["mc_type" ]
367365 # deal with hypo-score, hyper-score quantifiers
368366 for quant in region_config ["quant" ]:
369367 if quant .quant_type == "hypo-score" :
370368 for mc_type in quant .mc_types :
371- hypo = regiongroup .empty (
369+ hypo = rgs [ region_dim ] .empty (
372370 name = f"{ region_dim } _da_{ mc_type } -hypo-score" ,
373371 shape = (allc_table .size , region_size ),
374372 chunks = (chunk_size , region_size ),
@@ -377,7 +375,7 @@ def generate_dataset(
377375 hypo .attrs ["_ARRAY_DIMENSIONS" ] = [obs_dim , region_dim ]
378376 elif quant .quant_type == "hyper-score" :
379377 for mc_type in quant .mc_types :
380- hyper = regiongroup .empty (
378+ hyper = rgs [ region_dim ] .empty (
381379 name = f"{ region_dim } _da_{ mc_type } -hyper-score" ,
382380 shape = (allc_table .size , region_size ),
383381 chunks = (chunk_size , region_size ),
0 commit comments