Skip to content

Commit 9881072

Browse files
shoyerXarray-Beam authors
authored andcommitted
Port xbeam_rechunk to use the new Datasets API
PiperOrigin-RevId: 813350345
1 parent 7a27923 commit 9881072

1 file changed

Lines changed: 6 additions & 18 deletions

File tree

examples/xbeam_rechunk.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -61,31 +61,19 @@ def _parse_chunks_str(chunks_str: str) -> dict[str, int]:
6161

6262

6363
def main(argv):
64-
source_dataset, source_chunks = xbeam.open_zarr(INPUT_PATH.value)
65-
template = xbeam.make_template(source_dataset)
66-
67-
target_chunks = source_chunks | _parse_chunks_str(TARGET_CHUNKS.value)
64+
target_chunks = _parse_chunks_str(TARGET_CHUNKS.value)
6865

6966
if TARGET_SHARDS.value is not None:
70-
target_shards = source_chunks | _parse_chunks_str(TARGET_SHARDS.value)
67+
target_shards = _parse_chunks_str(TARGET_SHARDS.value)
7168
else:
7269
target_shards = None
7370

74-
itemsize = max(variable.dtype.itemsize for variable in template.values())
75-
7671
with beam.Pipeline(runner=RUNNER.value, argv=argv) as root:
77-
(
78-
root
79-
| xbeam.DatasetToChunks(source_dataset, source_chunks, split_vars=True)
80-
| xbeam.Rechunk( # pytype: disable=wrong-arg-types
81-
source_dataset.sizes,
82-
source_chunks,
83-
target_chunks if target_shards is None else target_shards,
84-
itemsize=itemsize,
85-
)
86-
| xbeam.ChunksToZarr(
72+
root |= (
73+
xbeam.Dataset.from_zarr(INPUT_PATH.value, split_vars=True)
74+
.rechunk(target_chunks if target_shards is None else target_shards)
75+
.to_zarr(
8776
OUTPUT_PATH.value,
88-
template,
8977
zarr_chunks=target_chunks,
9078
zarr_shards=target_shards,
9179
zarr_format=ZARR_FORMAT.value,

0 commit comments

Comments
 (0)