From c91df359691c94dd967e4cb2f423e92aeb55907e Mon Sep 17 00:00:00 2001 From: d-v-b-agent Date: Mon, 29 Jun 2026 16:21:03 +0000 Subject: [PATCH 1/4] perf: cache Array shape/ndim and reuse chunk grid in transform resolvers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `AsyncArray.shape` recomputed `self._transform.domain.shape` (a zip+genexpr) on every access — ~10x slower than returning the stored tuple — and `ndim` went through it as well. Cache `_shape` wherever the transform is set and read it directly. Also thread the array's already-built `_chunk_grid` into the transform read/write resolvers instead of rebuilding it via `ChunkGrid.from_metadata` on every call. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_011D1uGKtHP9s7E4WqiUSYG4 --- src/zarr/core/array.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index fe70434029..e39f46f366 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -383,6 +383,7 @@ def __init__( create_codec_pipeline(metadata=metadata_parsed, store=store_path.store), ) object.__setattr__(self, "_transform", IndexTransform.from_shape(metadata_parsed.shape)) + object.__setattr__(self, "_shape", self._transform.domain.shape) @classmethod async def _create( @@ -805,6 +806,7 @@ def _with_transform(self, transform: IndexTransform) -> AsyncArray[T_ArrayMetada object.__setattr__(new, "_chunk_grid", self._chunk_grid) object.__setattr__(new, "codec_pipeline", self.codec_pipeline) object.__setattr__(new, "_transform", transform) + object.__setattr__(new, "_shape", transform.domain.shape) return new @property @@ -825,7 +827,7 @@ def ndim(self) -> int: int The number of dimensions in the Array. """ - return len(self.shape) + return len(self._shape) @property def shape(self) -> tuple[int, ...]: @@ -836,7 +838,7 @@ def shape(self) -> tuple[int, ...]: tuple The shape of the Array. """ - return self._transform.domain.shape + return self._shape @property def storage_shape(self) -> tuple[int, ...]: @@ -1617,6 +1619,7 @@ async def _get_selection_t( self.codec_pipeline, prototype=prototype, out=out, + chunk_grid=self._chunk_grid, ) async def _set_selection_t( @@ -1634,6 +1637,7 @@ async def _set_selection_t( value, self.codec_pipeline, prototype=prototype, + chunk_grid=self._chunk_grid, ) async def setitem( @@ -5645,9 +5649,11 @@ async def _get_selection_via_transform( *, prototype: BufferPrototype, out: NDBuffer | None = None, + chunk_grid: ChunkGrid | None = None, ) -> NDArrayLikeOrScalar: """Read data using an IndexTransform.""" - chunk_grid = ChunkGrid.from_metadata(metadata) + if chunk_grid is None: + chunk_grid = ChunkGrid.from_metadata(metadata) # Get dtype (same logic as existing _get_selection) if metadata.zarr_format == 2: @@ -5743,9 +5749,11 @@ async def _set_selection_via_transform( codec_pipeline: CodecPipeline, *, prototype: BufferPrototype, + chunk_grid: ChunkGrid | None = None, ) -> None: """Write data using an IndexTransform.""" - chunk_grid = ChunkGrid.from_metadata(metadata) + if chunk_grid is None: + chunk_grid = ChunkGrid.from_metadata(metadata) # Get dtype from metadata if metadata.zarr_format == 2: @@ -6404,6 +6412,7 @@ async def _delete_key(key: str) -> None: object.__setattr__(array, "metadata", new_metadata) object.__setattr__(array, "_chunk_grid", new_chunk_grid) object.__setattr__(array, "_transform", IndexTransform.from_shape(new_shape)) + object.__setattr__(array, "_shape", array._transform.domain.shape) async def _append( From 580a956965621b04b7fee33b859c19732a60c602 Mon Sep 17 00:00:00 2001 From: d-v-b-agent Date: Mon, 29 Jun 2026 16:21:03 +0000 Subject: [PATCH 2/4] perf: skip the transform resolver for eager (identity-transform) indexing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A freshly-opened array carries the identity transform, so eager indexing yields exactly the coordinates the original indexers compute. Route such arrays straight to the legacy Basic/Orthogonal/Mask/Coordinate indexer path; only non-identity transforms (opt-in `.lazy[...]` views) go through the transform resolver. This removes the per-chunk transform-resolution overhead from the common eager path while preserving lazy semantics. Also restore the incompatible-shape ValueError in the coordinate-selection legacy branch — the transform path carried it but the fields-only branch had dropped it, and the eager fast-path now exercises that branch. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_011D1uGKtHP9s7E4WqiUSYG4 --- src/zarr/core/array.py | 72 +++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 12 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index e39f46f366..1b611bc34c 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -384,6 +384,12 @@ def __init__( ) object.__setattr__(self, "_transform", IndexTransform.from_shape(metadata_parsed.shape)) object.__setattr__(self, "_shape", self._transform.domain.shape) + # A freshly-opened array has the identity transform: input coord i maps to + # storage coord i over the full storage domain. Eager indexing on such an + # array can use the original (legacy) indexers directly, avoiding the + # transform-resolution overhead. Lazy views (created via _with_transform) + # carry a non-identity transform and must go through the transform path. + object.__setattr__(self, "_is_identity", True) @classmethod async def _create( @@ -807,6 +813,9 @@ def _with_transform(self, transform: IndexTransform) -> AsyncArray[T_ArrayMetada object.__setattr__(new, "codec_pipeline", self.codec_pipeline) object.__setattr__(new, "_transform", transform) object.__setattr__(new, "_shape", transform.domain.shape) + object.__setattr__( + new, "_is_identity", _transform_is_identity(transform, self.metadata.shape) + ) return new @property @@ -2904,8 +2913,9 @@ def get_basic_selection( if prototype is None: prototype = default_buffer_prototype() - if fields is not None: - # Fall back to legacy path for structured dtype field selection + if fields is not None or self._async_array._is_identity: + # Eager (identity-transform) arrays and structured-dtype field + # selection use the original indexer path directly. return sync( self.async_array._get_selection( BasicIndexer(selection, self.shape, self._chunk_grid), @@ -3018,8 +3028,9 @@ def set_basic_selection( """ if prototype is None: prototype = default_buffer_prototype() - if fields is not None: - # Fall back to legacy path for structured dtype field selection + if fields is not None or self._async_array._is_identity: + # Eager (identity-transform) arrays and structured-dtype field + # selection use the original indexer path directly. indexer = BasicIndexer(selection, self.shape, self._chunk_grid) sync( self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype) @@ -3154,8 +3165,9 @@ def get_orthogonal_selection( """ if prototype is None: prototype = default_buffer_prototype() - if fields is not None or not is_basic_selection(selection): - # Fall back to legacy path for structured dtypes or advanced selections + if fields is not None or self._async_array._is_identity or not is_basic_selection(selection): + # Eager (identity) arrays, structured dtypes, and advanced selections + # use the original indexer path directly. indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) return sync( self.async_array._get_selection( @@ -3277,8 +3289,9 @@ def set_orthogonal_selection( """ if prototype is None: prototype = default_buffer_prototype() - if fields is not None or not is_basic_selection(selection): - # Fall back to legacy path for structured dtypes or advanced selections + if fields is not None or self._async_array._is_identity or not is_basic_selection(selection): + # Eager (identity) arrays, structured dtypes, and advanced selections + # use the original indexer path directly. indexer = OrthogonalIndexer(selection, self.shape, self._chunk_grid) sync( self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype) @@ -3371,7 +3384,7 @@ def get_mask_selection( if prototype is None: prototype = default_buffer_prototype() - if fields is not None: + if fields is not None or self._async_array._is_identity: indexer = MaskIndexer(mask, self.shape, self._chunk_grid) return sync( self.async_array._get_selection( @@ -3476,7 +3489,7 @@ def set_mask_selection( """ if prototype is None: prototype = default_buffer_prototype() - if fields is not None: + if fields is not None or self._async_array._is_identity: indexer = MaskIndexer(mask, self.shape, self._chunk_grid) sync( self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype) @@ -3581,7 +3594,7 @@ def get_coordinate_selection( """ if prototype is None: prototype = default_buffer_prototype() - if fields is not None: + if fields is not None or self._async_array._is_identity: indexer = CoordinateIndexer(selection, self.shape, self._chunk_grid) out_array = sync( self.async_array._get_selection( @@ -3704,7 +3717,7 @@ def set_coordinate_selection( # Normalize empty fields list to None if not fields: fields = None - if fields is not None: + if fields is not None or self._async_array._is_identity: indexer = CoordinateIndexer(selection, self.shape, self._chunk_grid) if not is_scalar(value, self.dtype): try: @@ -3715,6 +3728,13 @@ def set_coordinate_selection( value = np.array(value) if hasattr(value, "shape") and len(value.shape) > 1: value = np.array(value).reshape(-1) + if not is_scalar(value, self.dtype) and ( + isinstance(value, NDArrayLike) and indexer.shape != value.shape + ): + raise ValueError( + f"Attempting to set a selection of {indexer.sel_shape[0]} " + f"elements with an array of {value.shape[0]} elements." + ) sync( self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype) ) @@ -5606,6 +5626,33 @@ def _get_chunk_spec( ) +def _transform_is_identity(transform: IndexTransform, storage_shape: tuple[int, ...]) -> bool: + """Return True if ``transform`` is the identity over the full storage domain. + + An identity transform maps input coordinate ``i`` to storage coordinate ``i`` + across the array's whole storage shape (origin 0, unit stride, dimensions in + order). Such an array is an ordinary eager array — indexing it produces the + same coordinates the legacy indexers compute, so the legacy fast path is + safe. Any narrowing, striding, reordering, or fancy selection (i.e. a lazy + view) yields a non-identity transform that must go through the transform + resolver. Cheap: O(ndim), no array work. + """ + from zarr.core.transforms.output_map import DimensionMap + + domain = transform.domain + ndim = len(storage_shape) + if domain.ndim != ndim or len(transform.output) != ndim: + return False + if domain.inclusive_min != (0,) * ndim or domain.exclusive_max != storage_shape: + return False + for i, m in enumerate(transform.output): + if not ( + type(m) is DimensionMap and m.input_dimension == i and m.offset == 0 and m.stride == 1 + ): + return False + return True + + def _is_complete_chunk( sub_transform: IndexTransform, chunk_grid: ChunkGrid, chunk_coords: tuple[int, ...] ) -> bool: @@ -6413,6 +6460,7 @@ async def _delete_key(key: str) -> None: object.__setattr__(array, "_chunk_grid", new_chunk_grid) object.__setattr__(array, "_transform", IndexTransform.from_shape(new_shape)) object.__setattr__(array, "_shape", array._transform.domain.shape) + object.__setattr__(array, "_is_identity", True) async def _append( From 621c94fc7bb96d963791cba163e5d4eca521b53f Mon Sep 17 00:00:00 2001 From: d-v-b-agent Date: Mon, 29 Jun 2026 16:21:03 +0000 Subject: [PATCH 3/4] perf(lazy): resolve chunk_grid[coords] once per chunk in transform read/write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_is_complete_chunk` and `_get_chunk_spec` each looked up `chunk_grid[chunk_coords]` — the single most expensive line in the per-chunk loop. Split the ArraySpec builder out (`_array_spec_from_chunk_spec`), look the ChunkSpec up once in the read/write loops, and pass it to both. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_011D1uGKtHP9s7E4WqiUSYG4 --- src/zarr/core/array.py | 64 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 1b611bc34c..745cbd48df 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -41,6 +41,7 @@ from zarr.core.chunk_grids import ( SHARDED_INNER_CHUNK_MAX_BYTES, ChunkGrid, + ChunkSpec, _is_rectilinear_chunks, as_regular_shape, guess_chunks, @@ -5606,17 +5607,18 @@ async def _nbytes_stored( return await store_path.store.getsize_prefix(store_path.path) -def _get_chunk_spec( +def _array_spec_from_chunk_spec( metadata: ArrayMetadata, - chunk_grid: ChunkGrid, - chunk_coords: tuple[int, ...], + spec: ChunkSpec, array_config: ArrayConfig, prototype: BufferPrototype, ) -> ArraySpec: - """Build an ArraySpec for a single chunk using the ChunkGrid.""" - spec = chunk_grid[chunk_coords] - if spec is None: - raise IndexError(f"Chunk coordinates {chunk_coords} are out of bounds.") + """Build an ArraySpec from an already-resolved ChunkSpec. + + Split out from :func:`_get_chunk_spec` so the transform read/write path can + resolve ``chunk_grid[chunk_coords]`` once per chunk and feed the same + ``spec`` to both this and :func:`_is_complete_chunk`. + """ return ArraySpec( shape=spec.codec_shape, dtype=metadata.dtype, @@ -5626,6 +5628,20 @@ def _get_chunk_spec( ) +def _get_chunk_spec( + metadata: ArrayMetadata, + chunk_grid: ChunkGrid, + chunk_coords: tuple[int, ...], + array_config: ArrayConfig, + prototype: BufferPrototype, +) -> ArraySpec: + """Build an ArraySpec for a single chunk using the ChunkGrid.""" + spec = chunk_grid[chunk_coords] + if spec is None: + raise IndexError(f"Chunk coordinates {chunk_coords} are out of bounds.") + return _array_spec_from_chunk_spec(metadata, spec, array_config, prototype) + + def _transform_is_identity(transform: IndexTransform, storage_shape: tuple[int, ...]) -> bool: """Return True if ``transform`` is the identity over the full storage domain. @@ -5653,25 +5669,25 @@ def _transform_is_identity(transform: IndexTransform, storage_shape: tuple[int, return True -def _is_complete_chunk( - sub_transform: IndexTransform, chunk_grid: ChunkGrid, chunk_coords: tuple[int, ...] -) -> bool: - """Check if a sub-transform covers an entire chunk.""" +def _is_complete_chunk(sub_transform: IndexTransform, spec: ChunkSpec) -> bool: + """Check if a sub-transform covers an entire chunk. + + ``spec`` is the chunk's already-resolved :class:`ChunkSpec` (the caller looks + it up once and shares it with :func:`_array_spec_from_chunk_spec`). + """ from zarr.core.transforms.output_map import ConstantMap, DimensionMap - spec = chunk_grid[chunk_coords] - if spec is None: - return False + shape = spec.shape for out_dim, m in enumerate(sub_transform.output): if isinstance(m, ConstantMap): # A ConstantMap means a single element is selected along this output dimension, # so the write does not cover the full chunk along this dimension. - chunk_dim_size = spec.shape[out_dim] + chunk_dim_size = shape[out_dim] if chunk_dim_size > 1: return False continue # chunk dim size is 1, so selecting the single element is complete if isinstance(m, DimensionMap): - chunk_dim_size = spec.shape[out_dim] + chunk_dim_size = shape[out_dim] # Compute actual storage range: storage = offset + stride * input_coord dim_lo = sub_transform.domain.inclusive_min[m.input_dimension] dim_hi = sub_transform.domain.exclusive_max[m.input_dimension] @@ -5746,16 +5762,18 @@ async def _get_selection_via_transform( for chunk_coords, sub_transform, out_indices in iter_chunk_transforms( transform, chunk_grid ): + chunk_spec = chunk_grid[chunk_coords] + if chunk_spec is None: + continue chunk_sel, out_sel, da = sub_transform_to_selections(sub_transform, out_indices) drop_axes = da # same for all chunks - is_complete = _is_complete_chunk(sub_transform, chunk_grid, chunk_coords) batch_info.append( ( store_path / metadata.encode_chunk_key(chunk_coords), - _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), + _array_spec_from_chunk_spec(metadata, chunk_spec, _config, prototype), chunk_sel, out_sel, - is_complete, + _is_complete_chunk(sub_transform, chunk_spec), ) ) @@ -5876,16 +5894,18 @@ async def _set_selection_via_transform( batch_info = [] drop_axes: tuple[int, ...] = () for chunk_coords, sub_transform, out_indices in iter_chunk_transforms(transform, chunk_grid): + chunk_spec = chunk_grid[chunk_coords] + if chunk_spec is None: + continue chunk_sel, out_sel, da = sub_transform_to_selections(sub_transform, out_indices) drop_axes = da # same for all chunks - is_complete = _is_complete_chunk(sub_transform, chunk_grid, chunk_coords) batch_info.append( ( store_path / metadata.encode_chunk_key(chunk_coords), - _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype), + _array_spec_from_chunk_spec(metadata, chunk_spec, _config, prototype), chunk_sel, out_sel, - is_complete, + _is_complete_chunk(sub_transform, chunk_spec), ) ) From 04d3dae708ba67812942e445cd811891a2ad5624 Mon Sep 17 00:00:00 2001 From: d-v-b-agent Date: Mon, 29 Jun 2026 16:21:03 +0000 Subject: [PATCH 4/4] perf(lazy): single-pass sub_transform_to_selections with hoisted domain bounds Build chunk_sel and out_sel in a single pass over the output maps instead of two, with the domain min/max tuples hoisted to locals. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_011D1uGKtHP9s7E4WqiUSYG4 --- src/zarr/core/transforms/chunk_resolution.py | 59 ++++++++------------ 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/src/zarr/core/transforms/chunk_resolution.py b/src/zarr/core/transforms/chunk_resolution.py index db066a2525..e3997a3071 100644 --- a/src/zarr/core/transforms/chunk_resolution.py +++ b/src/zarr/core/transforms/chunk_resolution.py @@ -157,51 +157,40 @@ def sub_transform_to_selections( ``(chunk_selection, out_selection, drop_axes)`` """ chunk_sel: list[int | slice | np.ndarray[tuple[int, ...], np.dtype[np.intp]]] = [] - drop_axes: list[int] = [] + out_sel: list[slice | np.ndarray[tuple[int, ...], np.dtype[np.intp]]] = [] + + # Hoist the per-dimension domain bounds out of the loop (attribute-chain + + # tuple indexing per output dim otherwise). Build chunk_sel and out_sel in a + # single pass; ConstantMap dims are dropped (no out_sel entry). + inclusive_min = sub_transform.domain.inclusive_min + exclusive_max = sub_transform.domain.exclusive_max + n_array_maps = 0 for m in sub_transform.output: - if isinstance(m, ConstantMap): + t = type(m) + if t is ConstantMap: chunk_sel.append(m.offset) - elif isinstance(m, DimensionMap): - dim_lo = sub_transform.domain.inclusive_min[m.input_dimension] - dim_hi = sub_transform.domain.exclusive_max[m.input_dimension] + elif t is DimensionMap: + d = m.input_dimension + dim_lo = inclusive_min[d] + dim_hi = exclusive_max[d] start = m.offset + m.stride * dim_lo stop = m.offset + m.stride * dim_hi if m.stride < 0: start, stop = stop + 1, start + 1 chunk_sel.append(slice(start, stop, m.stride)) - elif isinstance(m, ArrayMap): + out_sel.append(slice(dim_lo, dim_hi)) + else: # ArrayMap + n_array_maps += 1 if m.offset == 0 and m.stride == 1: chunk_sel.append(m.index_array) else: - storage_coords = m.offset + m.stride * m.index_array - chunk_sel.append(storage_coords.astype(np.intp)) + chunk_sel.append((m.offset + m.stride * m.index_array).astype(np.intp)) + # Orthogonal ArrayMap: out_indices holds the surviving positions. + out_sel.append(out_indices if out_indices is not None else slice(0, len(m.index_array))) - # Build out_sel: one entry per non-dropped output dim. - out_sel: list[slice | np.ndarray[tuple[int, ...], np.dtype[np.intp]]] = [] + # Vectorized: ≥2 correlated ArrayMaps scatter through a single shared index. + if out_indices is not None and n_array_maps >= 2: + out_sel = [out_indices] - # Vectorized: multiple correlated ArrayMaps share one scatter index - is_vectorized = ( - out_indices is not None - and sum(1 for m in sub_transform.output if isinstance(m, ArrayMap)) >= 2 - ) - - if is_vectorized: - assert out_indices is not None - out_sel.append(out_indices) - else: - for m in sub_transform.output: - if isinstance(m, ConstantMap): - continue - if isinstance(m, DimensionMap): - lo = sub_transform.domain.inclusive_min[m.input_dimension] - hi = sub_transform.domain.exclusive_max[m.input_dimension] - out_sel.append(slice(lo, hi)) - elif isinstance(m, ArrayMap): - if out_indices is not None: - # Orthogonal ArrayMap: out_indices has the surviving positions - out_sel.append(out_indices) - else: - out_sel.append(slice(0, len(m.index_array))) - - return tuple(chunk_sel), tuple(out_sel), tuple(drop_axes) + return tuple(chunk_sel), tuple(out_sel), ()