@@ -211,7 +211,9 @@ def _calculate_pv(data, reverse_value, obs_dim, var_dim, cutoff=0.9):
211211 return pv
212212
213213
214- def _count_single_zarr (allc_table , region_config , obs_dim , obs_dim_dtype , region_dim , chunk_start , regiongroup , count_dtype = "uint32" ):
214+ def _count_single_zarr (
215+ allc_table , region_config , obs_dim , obs_dim_dtype , region_dim , chunk_start , regiongroup , count_dtype = "uint32"
216+ ):
215217 """Process single region set and its quantifiers."""
216218 # count all ALLC and mC types that's needed for quantifiers if this region_dim
217219 count_ds = _count_single_region_set (
@@ -257,7 +259,9 @@ def _count_single_zarr(allc_table, region_config, obs_dim, obs_dim_dtype, region
257259 regiongroup [f"{ region_dim } _da_{ mc_type } -hyper-score" ][
258260 chunk_start : chunk_start + allc_table .index .size , :
259261 ] = data .data
260- regiongroup [obs_dim ] = count_ds .coords [obs_dim ].astype (obs_dim_dtype )
262+ regiongroup [obs_dim ][chunk_start : chunk_start + allc_table .index .size ] = (
263+ count_ds .coords [obs_dim ].astype (obs_dim_dtype ).data
264+ )
261265 return True
262266
263267
@@ -327,10 +331,6 @@ def generate_dataset(
327331 bed .columns = [f"{ region_dim } _chrom" , f"{ region_dim } _start" , f"{ region_dim } _end" ]
328332 bed .index .name = region_dim
329333 region_size = bed .index .size
330- dsobs = regiongroup .array (
331- name = obs_dim , data = allc_table .index .values , chunks = (chunk_size ), dtype = f"<U{ max_length } "
332- )
333- dsobs .attrs ["_ARRAY_DIMENSIONS" ] = [obs_dim ]
334334 # append region bed to the saved ds
335335 ds = xr .Dataset ()
336336 for col , data in bed .items ():
@@ -340,7 +340,11 @@ def generate_dataset(
340340 for k in ds .coords .keys ():
341341 if ds .coords [k ].dtype == "O" :
342342 ds .coords [k ] = ds .coords [k ].astype (str )
343- ds .to_zarr (f"{ output_path } /{ region_dim } " , mode = "w" )
343+ ds .to_zarr (f"{ output_path } /{ region_dim } " , mode = "w" , consolidated = False )
344+ dsobs = regiongroup .empty (
345+ name = obs_dim , shape = allc_table .index .size , chunks = (chunk_size ), dtype = f"<U{ max_length } "
346+ )
347+ dsobs .attrs ["_ARRAY_DIMENSIONS" ] = [obs_dim ]
344348 count_mc_types = []
345349 for quant in region_config ["quant" ]:
346350 if quant .quant_type == "count" :
@@ -390,7 +394,7 @@ def generate_dataset(
390394 allc_table = allc_chunk ,
391395 region_config = region_config ,
392396 obs_dim = obs_dim ,
393- obs_dim_dtype = obs_dim_dtype ,
397+ obs_dim_dtype = obs_dim_dtype ,
394398 region_dim = region_dim ,
395399 chunk_start = chunk_start ,
396400 regiongroup = regiongroup ,
0 commit comments