Skip to content

Commit 69f05a7

Browse files
shoyerXarray-Beam authors
authored andcommitted
Add xbeam.normalize_chunks() and update xbeam.Dataset docstrings
PiperOrigin-RevId: 813800457
1 parent b38006f commit 69f05a7

5 files changed

Lines changed: 448 additions & 109 deletions

File tree

docs/api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,5 @@ guarantees.
103103
Dataset.mean
104104
Dataset.head
105105
Dataset.pipe
106+
normalize_chunks
106107
```

docs/high-level.ipynb

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,28 +24,6 @@
2424
"```"
2525
]
2626
},
27-
{
28-
"metadata": {
29-
"id": "Ht5100QIRpyd",
30-
"tags": [
31-
"hide-cell"
32-
]
33-
},
34-
"cell_type": "code",
35-
"source": [
36-
"# small formatting improvements\n",
37-
"import contextlib\n",
38-
"\n",
39-
"@contextlib.contextmanager\n",
40-
"def print_error():\n",
41-
" try:\n",
42-
" yield\n",
43-
" except Exception as e:\n",
44-
" print(f'{type(e).__name__}: {e}')"
45-
],
46-
"outputs": [],
47-
"execution_count": 2
48-
},
4927
{
5028
"metadata": {
5129
"id": "EkH9Na4ezuTi"
@@ -254,11 +232,13 @@
254232
},
255233
"cell_type": "code",
256234
"source": [
257-
"with print_error():\n",
235+
"try:\n",
258236
" (\n",
259237
" xbeam.Dataset.from_zarr('example_data.zarr')\n",
260238
" .map_blocks(lambda ds: ds.compute()) # load into memory\n",
261-
" )"
239+
" )\n",
240+
"except Exception as e:\n",
241+
" print(f'{type(e).__name__}: {e}')"
262242
],
263243
"outputs": [],
264244
"execution_count": 9
@@ -290,7 +270,7 @@
290270
},
291271
"cell_type": "markdown",
292272
"source": [
293-
"In other situations, you might want to perform an operation that returns something other than an `xarray.Dataset`, e.g., to write all chunks as individual files to disk. In these situations, you can switch to the lower-level Xarray-Beam [data model](data-model), and use raw Beam operations:"
273+
"Sometimes, your computation doesn't fit into the ``map_blocks`` paradigm because you don't want to create `xarray.Dataset` objects. For these cases, you can switch to the lower-level Xarray-Beam [data model](data-model), and use raw Beam operations:"
294274
]
295275
},
296276
{

xarray_beam/__init__.py

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,43 +13,46 @@
1313
# limitations under the License.
1414
"""Public API for Xarray-Beam."""
1515

16-
# pylint: disable=g-multiple-import
16+
# Note: import <name> as <name> is required for names to be exported.
17+
# See PEP 484 & https://github.com/jax-ml/jax/issues/7570
18+
# pylint: disable=g-multiple-import,useless-import-alias,g-importing-member
1719
from xarray_beam._src.combiners import (
18-
Mean,
19-
MeanCombineFn,
20+
Mean as Mean,
21+
MeanCombineFn as MeanCombineFn,
2022
)
2123
from xarray_beam._src.core import (
22-
Key,
23-
DatasetToChunks,
24-
ValidateEachChunk,
25-
offsets_to_slices,
26-
validate_chunk
24+
Key as Key,
25+
DatasetToChunks as DatasetToChunks,
26+
ValidateEachChunk as ValidateEachChunk,
27+
offsets_to_slices as offsets_to_slices,
28+
validate_chunk as validate_chunk,
2729
)
2830
from xarray_beam._src.dataset import (
29-
Dataset,
31+
Dataset as Dataset,
32+
normalize_chunks as normalize_chunks,
3033
)
3134
from xarray_beam._src.rechunk import (
32-
ConsolidateChunks,
33-
ConsolidateVariables,
34-
SplitChunks,
35-
SplitVariables,
36-
Rechunk,
37-
consolidate_chunks,
38-
consolidate_variables,
39-
consolidate_fully,
40-
split_chunks,
41-
split_variables,
42-
in_memory_rechunk,
35+
ConsolidateChunks as ConsolidateChunks,
36+
ConsolidateVariables as ConsolidateVariables,
37+
SplitChunks as SplitChunks,
38+
SplitVariables as SplitVariables,
39+
Rechunk as Rechunk,
40+
consolidate_chunks as consolidate_chunks,
41+
consolidate_variables as consolidate_variables,
42+
consolidate_fully as consolidate_fully,
43+
split_chunks as split_chunks,
44+
split_variables as split_variables,
45+
in_memory_rechunk as in_memory_rechunk,
4346
)
4447
from xarray_beam._src.zarr import (
45-
open_zarr,
46-
make_template,
47-
replace_template_dims,
48-
setup_zarr,
49-
validate_zarr_chunk,
50-
write_chunk_to_zarr,
51-
ChunksToZarr,
52-
DatasetToZarr,
48+
open_zarr as open_zarr,
49+
make_template as make_template,
50+
replace_template_dims as replace_template_dims,
51+
setup_zarr as setup_zarr,
52+
validate_zarr_chunk as validate_zarr_chunk,
53+
write_chunk_to_zarr as write_chunk_to_zarr,
54+
ChunksToZarr as ChunksToZarr,
55+
DatasetToZarr as DatasetToZarr,
5356
)
5457

55-
__version__ = '0.10.1' # automatically synchronized to pyproject.toml
58+
__version__ = '0.10.2' # automatically synchronized to pyproject.toml

0 commit comments

Comments
 (0)