Skip to content

Commit 7c88363

Browse files
committed
Update cool_ds.py
1 parent 8d27d46 commit 7c88363

1 file changed

Lines changed: 48 additions & 49 deletions

File tree

ALLCools/mcds/cool_ds.py

Lines changed: 48 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -208,27 +208,27 @@ def get_cooler(
208208
cooler_kwargs :
209209
Additional arguments to pass to create_cooler
210210
"""
211-
import subprocess
212-
213-
from cooler import create_cooler
214-
from scipy.sparse import coo_matrix
215-
216-
def _chrom_iterator(
217-
_samples,
218-
_value_type,
219-
_chrom_offset,
220-
_da_name,
221-
add_trans=False,
222-
):
223-
"""Iterate through the raw matrices and chromosomes of cells."""
224-
chrom_sizes = self.chrom_sizes
225-
226-
def _iter_1d(_chrom1, _chrom2):
227-
print(f"Saving {_chrom1} x {_chrom2 if _chrom2 is not None else _chrom1}...")
228-
chrom_ds = self.fetch(chrom=_chrom1, chrom2=_chrom2)
229-
230-
# get chrom 2D np.array
231-
with dask.config.set(**{"array.slicing.split_large_chunks": False}):
211+
with dask.config.set(scheduler="sync"):
212+
import subprocess
213+
214+
from cooler import create_cooler
215+
from scipy.sparse import coo_matrix
216+
217+
def _chrom_iterator(
218+
_samples,
219+
_value_type,
220+
_chrom_offset,
221+
_da_name,
222+
add_trans=False,
223+
):
224+
"""Iterate through the raw matrices and chromosomes of cells."""
225+
chrom_sizes = self.chrom_sizes
226+
227+
def _iter_1d(_chrom1, _chrom2):
228+
print(f"Saving {_chrom1} x {_chrom2 if _chrom2 is not None else _chrom1}...")
229+
chrom_ds = self.fetch(chrom=_chrom1, chrom2=_chrom2)
230+
231+
# get chrom 2D np.array
232232
matrix = chrom_ds.matrix(
233233
samples=_samples,
234234
value_type=_value_type,
@@ -244,32 +244,31 @@ def _iter_1d(_chrom1, _chrom2):
244244
if len(matrix.shape) > 2:
245245
matrix = matrix.squeeze()
246246

247-
# to coo then to pixel
248-
matrix = coo_matrix(matrix)
249-
_pixel_df = pd.DataFrame({"bin1_id": matrix.row, "bin2_id": matrix.col, "count": matrix.data})
250-
251-
# add chrom offset
252-
if _chrom2 is None:
253-
# both row and col are chrom1
254-
_pixel_df.iloc[:, :2] += _chrom_offset[_chrom1]
247+
# to coo then to pixel
248+
matrix = coo_matrix(matrix)
249+
_pixel_df = pd.DataFrame({"bin1_id": matrix.row, "bin2_id": matrix.col, "count": matrix.data})
250+
251+
# add chrom offset
252+
if _chrom2 is None:
253+
# both row and col are chrom1
254+
_pixel_df.iloc[:, :2] += _chrom_offset[_chrom1]
255+
else:
256+
# row is chrom1, add chrom1 offset
257+
_pixel_df.iloc[:, 0] += _chrom_offset[_chrom1]
258+
# col is chrom2, add chrom2 offset
259+
_pixel_df.iloc[:, 1] += _chrom_offset[_chrom2]
260+
return _pixel_df
261+
262+
if add_trans:
263+
raise NotImplementedError
255264
else:
256-
# row is chrom1, add chrom1 offset
257-
_pixel_df.iloc[:, 0] += _chrom_offset[_chrom1]
258-
# col is chrom2, add chrom2 offset
259-
_pixel_df.iloc[:, 1] += _chrom_offset[_chrom2]
260-
return _pixel_df
265+
for chrom in chrom_sizes.keys():
266+
pixel_df = _iter_1d(chrom, None)
267+
yield pixel_df
261268

262-
if add_trans:
263-
raise NotImplementedError
264-
else:
265-
for chrom in chrom_sizes.keys():
266-
pixel_df = _iter_1d(chrom, None)
267-
yield pixel_df
269+
bins_df = binnify(self.chrom_sizes, binsize=self.bin_size)
270+
chrom_offset = _get_chrom_offsets(bins_df)
268271

269-
bins_df = binnify(self.chrom_sizes, binsize=self.bin_size)
270-
chrom_offset = _get_chrom_offsets(bins_df)
271-
272-
with dask.config.set(scheduler="sync"):
273272
create_cooler(
274273
cool_uri=f"{output_prefix}.cool",
275274
bins=bins_df,
@@ -285,10 +284,10 @@ def _iter_1d(_chrom1, _chrom2):
285284
**(cooler_kwargs or {}),
286285
)
287286

288-
if zoomify:
289-
subprocess.run(["cooler", "zoomify", f"{output_prefix}.cool", "-p", str(zoomify_cpu)], check=True)
290-
# delete the original cooler file
291-
subprocess.run(["rm", f"{output_prefix}.cool"], check=True)
287+
if zoomify:
288+
subprocess.run(["cooler", "zoomify", f"{output_prefix}.cool", "-p", str(zoomify_cpu)], check=True)
289+
# delete the original cooler file
290+
subprocess.run(["rm", f"{output_prefix}.cool"], check=True)
292291

293292
return
294293

@@ -366,7 +365,7 @@ def matrix(
366365
-------
367366
np.ndarray
368367
"""
369-
with dask.config.set(scheduler="sync"):
368+
with dask.config.set(scheduler="synchronous"):
370369
sel_dict = {}
371370
if samples is not None:
372371
sel_dict[self.sample_dim] = samples

0 commit comments

Comments
 (0)