From d5405627d21e5d5c3e9290ee2883f2cdf80f1c4d Mon Sep 17 00:00:00 2001 From: anon Date: Sat, 20 Jun 2026 11:58:01 +0200 Subject: [PATCH 1/9] Migrate supported Python to 3.12, 3.13, 3.14 Drop Python 3.11 (anndata>=0.12 already requires >=3.12, so 3.11 was effectively broken) and add 3.14. - pyproject.toml: requires-python ">=3.12", ruff target-version py312 - .mypy.ini: python_version 3.12 - test.yaml: matrix 3.12/3.13/3.14; repoint bleeding-edge deps job to 3.14 and drop the obsolete requires-python sed hack Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/test.yaml | 8 ++++---- .mypy.ini | 2 +- pyproject.toml | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6d1027b29..838e81691 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -19,11 +19,12 @@ jobs: fail-fast: false matrix: include: - - {os: windows-latest, python: "3.11", dask-version: "2026.3.0", name: "min dask"} + - {os: windows-latest, python: "3.12", dask-version: "2026.3.0", name: "min dask"} - {os: windows-latest, python: "3.14", dask-version: "latest"} - - {os: ubuntu-latest, python: "3.11", dask-version: "latest"} + - {os: ubuntu-latest, python: "3.12", dask-version: "latest"} + - {os: ubuntu-latest, python: "3.13", dask-version: "latest"} - {os: ubuntu-latest, python: "3.14", dask-version: "latest"} - - {os: macos-latest, python: "3.11", dask-version: "latest"} + - {os: macos-latest, python: "3.12", dask-version: "latest"} - {os: macos-latest, python: "3.14", prerelease: "allow", name: "prerelease"} env: OS: ${{ matrix.os }} @@ -41,7 +42,6 @@ jobs: - name: Install dependencies run: | if [[ "${PRERELEASE}" == "allow" ]]; then - sed -i '' 's/requires-python.*//' pyproject.toml # otherwise uv complains that anndata requires python>=3.12 and we only do >=3.11 😱 uv add git+https://github.com/scverse/anndata.git uv add pandas>=3.dev0 fi diff --git a/.mypy.ini b/.mypy.ini index 78edd09ca..64f98e8ba 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -1,5 +1,5 @@ [mypy] -python_version = 3.11 +python_version = 3.12 ignore_errors = False warn_redundant_casts = True diff --git a/pyproject.toml b/pyproject.toml index 353811c98..03181eadb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ maintainers = [ urls.Documentation = "https://spatialdata.scverse.org/en/latest" urls.Source = "https://github.com/scverse/spatialdata.git" urls.Home-page = "https://github.com/scverse/spatialdata.git" -requires-python = ">=3.11" +requires-python = ">=3.12" dynamic= [ "version" # allow version to be set by git tags ] @@ -145,7 +145,7 @@ exclude = [ ] line-length = 120 -target-version = "py311" +target-version = "py312" [tool.ruff.lint] ignore = [ From 2ee65063fb419196269e4043708765526d2ce745 Mon Sep 17 00:00:00 2001 From: anon Date: Sat, 20 Jun 2026 12:04:53 +0200 Subject: [PATCH 2/9] Adopt PEP 695 type aliases for py312 (ruff UP040) The target-version bump to py312 enables ruff UP040. Rewrite the four explicit TypeAlias declarations to the `type` keyword and drop the now unused TypeAlias imports. Annotation-only aliases (the repo uses `from __future__ import annotations`), so no runtime behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/spatialdata/_types.py | 4 ++-- src/spatialdata/models/_utils.py | 4 ++-- src/spatialdata/models/chunks_utils.py | 4 ++-- src/spatialdata/models/models.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/spatialdata/_types.py b/src/spatialdata/_types.py index c5bd76de9..da4443afc 100644 --- a/src/spatialdata/_types.py +++ b/src/spatialdata/_types.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, TypeAlias +from typing import Any import numpy as np from xarray import DataArray, DataTree @@ -12,5 +12,5 @@ ArrayLike = NDArray[np.floating[Any]] IntArrayLike = NDArray[np.integer[Any]] -Raster_T: TypeAlias = DataArray | DataTree +type Raster_T = DataArray | DataTree ColorLike = tuple[float, ...] | str diff --git a/src/spatialdata/models/_utils.py b/src/spatialdata/models/_utils.py index 4c71b063a..4a1d122f1 100644 --- a/src/spatialdata/models/_utils.py +++ b/src/spatialdata/models/_utils.py @@ -2,7 +2,7 @@ import warnings from functools import singledispatch -from typing import TYPE_CHECKING, Any, TypeAlias +from typing import TYPE_CHECKING, Any import dask.dataframe as dd import geopandas @@ -17,7 +17,7 @@ from spatialdata._utils import _check_match_length_channels_c_dim from spatialdata.transformations.transformations import BaseTransformation -SpatialElement: TypeAlias = DataArray | DataTree | GeoDataFrame | DaskDataFrame +type SpatialElement = DataArray | DataTree | GeoDataFrame | DaskDataFrame TRANSFORM_KEY = "transform" DEFAULT_COORDINATE_SYSTEM = "global" ValidAxis_t = str diff --git a/src/spatialdata/models/chunks_utils.py b/src/spatialdata/models/chunks_utils.py index 474067011..03d6e6d99 100644 --- a/src/spatialdata/models/chunks_utils.py +++ b/src/spatialdata/models/chunks_utils.py @@ -1,9 +1,9 @@ from __future__ import annotations from collections.abc import Mapping, Sequence -from typing import Any, TypeAlias +from typing import Any -Chunks_t: TypeAlias = int | tuple[int, ...] | tuple[tuple[int, ...], ...] | Mapping[Any, None | int | tuple[int, ...]] +type Chunks_t = int | tuple[int, ...] | tuple[tuple[int, ...], ...] | Mapping[Any, None | int | tuple[int, ...]] def normalize_chunks( diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index ef903c3df..c458b6ab8 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -6,7 +6,7 @@ from collections.abc import Mapping, Sequence from functools import singledispatchmethod from pathlib import Path -from typing import Any, Literal, TypeAlias +from typing import Any, Literal import dask.dataframe as dd import numpy as np @@ -1252,7 +1252,7 @@ def parse( return adata -Schema_t: TypeAlias = ( +type Schema_t = ( type[Image2DModel] | type[Image3DModel] | type[Labels2DModel] From 5b4b01fb7a7498eb99db64e091976c745054bb67 Mon Sep 17 00:00:00 2001 From: anon Date: Sat, 20 Jun 2026 12:26:26 +0200 Subject: [PATCH 3/9] Fix infinite recursion in dataloader.__getattr__ The module __getattr__ fell through to `getattr(spatialdata.dataloader, attr_name)` for any unknown name, re-entering itself indefinitely (RecursionError) instead of raising AttributeError per PEP 562. This was latent until the docs build hit it: the PEP 695 `type` aliases live in private modules, so sphinx-autodoc-typehints probes every `spatialdata.*` submodule with getattr() looking for a public re-export, tripping the recursion and failing the RTD build. Raise AttributeError for unknown names; drop the now-unused `import spatialdata` and tighten the return type to type[ImageTilesDataset]. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/spatialdata/dataloader/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/spatialdata/dataloader/__init__.py b/src/spatialdata/dataloader/__init__.py index a0196f92f..a07af2702 100644 --- a/src/spatialdata/dataloader/__init__.py +++ b/src/spatialdata/dataloader/__init__.py @@ -1,8 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any - -import spatialdata +from typing import TYPE_CHECKING if TYPE_CHECKING: from spatialdata.dataloader.datasets import ImageTilesDataset @@ -12,10 +10,10 @@ ] -def __getattr__(attr_name: str) -> ImageTilesDataset | Any: +def __getattr__(attr_name: str) -> type[ImageTilesDataset]: if attr_name == "ImageTilesDataset": from spatialdata.dataloader.datasets import ImageTilesDataset return ImageTilesDataset - return getattr(spatialdata.dataloader, attr_name) + raise AttributeError(f"module {__name__!r} has no attribute {attr_name!r}") From cf7e1c5885418f45f96dd45307a4c038e3306de9 Mon Sep 17 00:00:00 2001 From: anon Date: Sat, 20 Jun 2026 12:40:26 +0200 Subject: [PATCH 4/9] Pin ome_zarr<0.18.0 ome_zarr 0.18 switched to the NGFF 0.5 layout: channel metadata moved out of the `omero` block, so overwrite_channel_names() in _io/_utils.py gets None and crashes (~48 IO test failures). This breaks main independently of the Python bump. Pin as a stopgap until NGFF 0.5 is supported. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 03181eadb..f15037d74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,9 @@ dependencies = [ "networkx", "numba>=0.55.0", "numpy", - "ome_zarr>=0.16.0", + # ome_zarr 0.18 switched to the NGFF 0.5 layout (channel metadata moved out of the `omero` block), + # which spatialdata's IO does not yet support. TODO: support NGFF 0.5 and drop this upper bound. + "ome_zarr>=0.16.0,<0.18.0", "pandas", "pooch", "pyarrow", From 25443bdae60194ee845bff8b40eb98db6803fca9 Mon Sep 17 00:00:00 2001 From: anon Date: Sat, 20 Jun 2026 12:58:46 +0200 Subject: [PATCH 5/9] Support ome_zarr 0.18 by writing omero channel metadata ourselves ome_zarr 0.18 refactored the functional write_image/write_multiscale entrypoints (ome/ome-zarr-py#515) to read omero from the top-level metadata dict; spatialdata passes it nested under metadata["metadata"], so 0.18 silently dropped it. Effects: write_channel_names() crashed (omero block absent) and plain write->read lost channel names entirely (["r","g","b"] came back as [0,1,2]). Instead of depending on ome-zarr-py to emit omero, write it ourselves: - _write_raster() now calls overwrite_channel_names() after every image write, so the omero block is always present (idempotent on 0.17). - overwrite_channel_names() defaults to an empty omero block when none exists yet. Pin ome_zarr>=0.18 so CI resolves the same version a fresh install gets (uv otherwise lands on 0.17, hiding 0.18 regressions). Verified: full tests/io suite (227) passes on 0.18. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 6 +++--- src/spatialdata/_io/_utils.py | 5 +++-- src/spatialdata/_io/io_raster.py | 5 +++++ 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f15037d74..716f6271b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,9 +35,9 @@ dependencies = [ "networkx", "numba>=0.55.0", "numpy", - # ome_zarr 0.18 switched to the NGFF 0.5 layout (channel metadata moved out of the `omero` block), - # which spatialdata's IO does not yet support. TODO: support NGFF 0.5 and drop this upper bound. - "ome_zarr>=0.16.0,<0.18.0", + # >=0.18: 0.18 stopped emitting the omero channel block via the functional writer (we write it + # ourselves in _io now); pin so CI tests the same version a fresh install resolves to. + "ome_zarr>=0.18.0", "pandas", "pooch", "pyarrow", diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index 3be56d67f..c38a41016 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -154,8 +154,9 @@ def overwrite_channel_names(group: zarr.Group, element: DataArray | DataTree) -> channel_names = element["scale0"]["image"].coords["c"].data.tolist() channel_metadata = [{"label": name} for name in channel_names] - # This is required here as we do not use the load node API of ome-zarr - omero_meta = group.attrs.get("omero", None) or group.attrs.get("ome", {}).get("omero") + # This is required here as we do not use the load node API of ome-zarr. + # ome-zarr-py >= 0.18 no longer emits an `omero` block, so default to an empty one. + omero_meta = group.attrs.get("omero", None) or group.attrs.get("ome", {}).get("omero") or {} omero_meta["channels"] = channel_metadata if ome_meta := group.attrs.get("ome", None): ome_meta["omero"] = omero_meta diff --git a/src/spatialdata/_io/io_raster.py b/src/spatialdata/_io/io_raster.py index 2feb7a779..dcf33496f 100644 --- a/src/spatialdata/_io/io_raster.py +++ b/src/spatialdata/_io/io_raster.py @@ -20,6 +20,7 @@ from spatialdata._io._utils import ( _get_transformations_from_ngff_dict, + overwrite_channel_names, overwrite_coordinate_transformations_raster, ) from spatialdata._io.format import ( @@ -334,6 +335,10 @@ def _write_raster( raise ValueError("Not a valid labels object") group = group["labels"][name] if raster_type == "labels" else group + if raster_type == "image": + # ome-zarr-py >= 0.18 no longer writes the omero channel metadata from the nested `metadata` dict we + # pass above, so we always (re)write it ourselves to keep channel names round-tripping across versions. + overwrite_channel_names(group, raster_data) if ATTRS_KEY not in group.attrs: group.attrs[ATTRS_KEY] = {} attrs = group.attrs[ATTRS_KEY] From 86b771b17c0ad95ce6fc8058e095503956a757d6 Mon Sep 17 00:00:00 2001 From: anon Date: Sat, 20 Jun 2026 13:08:52 +0200 Subject: [PATCH 6/9] Clean up: drop redundant omero metadata build, lean comments overwrite_channel_names() now writes the omero channel block on every image write, so building the same metadata to pass into the ome-zarr-py writer (which 0.18 ignores anyway) was dead duplication. Remove it along with the now-unused get_channel_names import, and tighten comments. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 3 +-- src/spatialdata/_io/_utils.py | 5 ++--- src/spatialdata/_io/io_raster.py | 11 +---------- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 716f6271b..9d63855d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,7 @@ dependencies = [ "networkx", "numba>=0.55.0", "numpy", - # >=0.18: 0.18 stopped emitting the omero channel block via the functional writer (we write it - # ourselves in _io now); pin so CI tests the same version a fresh install resolves to. + # >=0.18 dropped omero from the functional writer (we write it ourselves); pin so CI matches a fresh install. "ome_zarr>=0.18.0", "pandas", "pooch", diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index c38a41016..fa5af1dd7 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -154,9 +154,8 @@ def overwrite_channel_names(group: zarr.Group, element: DataArray | DataTree) -> channel_names = element["scale0"]["image"].coords["c"].data.tolist() channel_metadata = [{"label": name} for name in channel_names] - # This is required here as we do not use the load node API of ome-zarr. - # ome-zarr-py >= 0.18 no longer emits an `omero` block, so default to an empty one. - omero_meta = group.attrs.get("omero", None) or group.attrs.get("ome", {}).get("omero") or {} + # We don't use the ome-zarr load node API, and ome-zarr-py >= 0.18 emits no `omero` block, so default to empty. + omero_meta = group.attrs.get("omero") or group.attrs.get("ome", {}).get("omero") or {} omero_meta["channels"] = channel_metadata if ome_meta := group.attrs.get("ome", None): ome_meta["omero"] = omero_meta diff --git a/src/spatialdata/_io/io_raster.py b/src/spatialdata/_io/io_raster.py index dcf33496f..276f016bd 100644 --- a/src/spatialdata/_io/io_raster.py +++ b/src/spatialdata/_io/io_raster.py @@ -29,7 +29,6 @@ get_ome_zarr_format, ) from spatialdata._utils import get_pyramid_levels -from spatialdata.models._utils import get_channel_names from spatialdata.models.models import ATTRS_KEY from spatialdata.models.pyramids_utils import dask_arrays_to_datatree from spatialdata.transformations._utils import ( @@ -302,13 +301,6 @@ def _write_raster( metadata["name"] = name metadata["label_metadata"] = label_metadata - # convert channel names to channel metadata in omero - if raster_type == "image": - metadata["metadata"] = {"omero": {"channels": []}} - channels = get_channel_names(raster_data) - for c in channels: - metadata["metadata"]["omero"]["channels"].append({"label": c}) # type: ignore[union-attr, index, call-overload] - if isinstance(raster_data, DataArray): _write_raster_dataarray( raster_type, @@ -336,8 +328,7 @@ def _write_raster( group = group["labels"][name] if raster_type == "labels" else group if raster_type == "image": - # ome-zarr-py >= 0.18 no longer writes the omero channel metadata from the nested `metadata` dict we - # pass above, so we always (re)write it ourselves to keep channel names round-tripping across versions. + # ome-zarr-py >= 0.18 no longer writes the omero channel metadata, so we write it ourselves. overwrite_channel_names(group, raster_data) if ATTRS_KEY not in group.attrs: group.attrs[ATTRS_KEY] = {} From b468f7285f0bd379268012bf83b9d1b0379c9af7 Mon Sep 17 00:00:00 2001 From: anon Date: Sat, 20 Jun 2026 13:39:50 +0200 Subject: [PATCH 7/9] Require integer (or bool) dtype for labels ome_zarr 0.18's label writer auto-parses unique label values and validates each `label-value` as an integer (via ome-zarr-models), which rejected spatialdata's float-dtype labels. Float labels are meaningless for segmentation masks and inconsistent with the rest of the codebase (fixtures, rasterize, relabel_sequential all assume integers), so the correct fix is to enforce it: Labels{2,3}DModel.parse now rejects non-integer/bool data with a clear error. Tests that fed float data to label models (reusing image-style generators) now use integers; test_rasterize_bins_invalid casts a parsed integer label to float to still exercise rasterize_bins' own guard. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/spatialdata/models/models.py | 14 ++++++++++++-- tests/core/operations/test_rasterize_bins.py | 4 ++-- tests/core/query/test_spatial_query.py | 2 +- tests/models/test_models.py | 16 ++++++++-------- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index c458b6ab8..c863c06f2 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -399,6 +399,12 @@ def _check_chunk_size_not_too_large(cls, data: DataArray | DataTree) -> None: cls._check_chunk_size_not_too_large(data[d][name]) +def _validate_labels_dtype(data: DataArray | DataTree) -> None: + dtype = data.dtype if isinstance(data, DataArray) else data["scale0"]["image"].dtype + if not (np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_)): + raise ValueError(f"Labels must have an integer dtype, found {dtype}. Cast the data, e.g. `.astype(np.uint16)`.") + + class Labels2DModel(RasterSchema): dims = (Y, X) @@ -410,7 +416,9 @@ def parse( # noqa: D102 ) -> DataArray | DataTree: if kwargs.get("c_coords") is not None: raise ValueError("`c_coords` is not supported for labels") - return super().parse(*args, **kwargs) + parsed = super().parse(*args, **kwargs) + _validate_labels_dtype(parsed) + return parsed class Labels3DModel(RasterSchema): @@ -420,7 +428,9 @@ class Labels3DModel(RasterSchema): def parse(self, *args: Any, **kwargs: Any) -> DataArray | DataTree: # noqa: D102 if kwargs.get("c_coords") is not None: raise ValueError("`c_coords` is not supported for labels") - return super().parse(*args, **kwargs) + parsed = super().parse(*args, **kwargs) + _validate_labels_dtype(parsed) + return parsed class Image2DModel(RasterSchema): diff --git a/tests/core/operations/test_rasterize_bins.py b/tests/core/operations/test_rasterize_bins.py index 2918855db..fa72c1d03 100644 --- a/tests/core/operations/test_rasterize_bins.py +++ b/tests/core/operations/test_rasterize_bins.py @@ -276,8 +276,8 @@ def _get_sdata(n: int): value_key="instance_id", ) - # if bins is a DataArray, it should hold integers - image = Labels2DModel.parse(RNG.normal(size=(3, 3)), dims=("y", "x")) + # if bins is a DataArray, it should hold integers (cast after parsing, which itself rejects floats) + image = Labels2DModel.parse(RNG.integers(0, 10, size=(3, 3)), dims=("y", "x")).astype(float) del sdata["points"] sdata["points"] = image with pytest.raises( diff --git a/tests/core/query/test_spatial_query.py b/tests/core/query/test_spatial_query.py index b35ceae12..dd402b045 100644 --- a/tests/core/query/test_spatial_query.py +++ b/tests/core/query/test_spatial_query.py @@ -265,7 +265,7 @@ def test_query_raster( shape = (10,) + shape shape = (n_channels,) + shape if not is_labels else (1,) + shape - image = np.zeros(shape) + image = np.zeros(shape, dtype=int if is_labels else float) axes = ["y", "x"] if is_3d: image[:, 2:7, 5::, 0:5] = 1 diff --git a/tests/models/test_models.py b/tests/models/test_models.py index 041a222f2..3b173a15b 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -163,12 +163,12 @@ def test_raster_schema( converter = partial(converter, dims=dims) elif converter is to_spatial_image: converter = partial(converter, dims=model.dims) - if n_dims == 2: - image: ArrayLike = RNG.uniform(size=(10, 10)) - elif n_dims == 3: - image: ArrayLike = RNG.uniform(size=(3, 10, 10)) - elif n_dims == 4: - image: ArrayLike = RNG.uniform(size=(2, 3, 10, 10)) + # labels must be integer-valued, images can be float + shape = {2: (10, 10), 3: (3, 10, 10), 4: (2, 3, 10, 10)}[n_dims] + if model in [Labels2DModel, Labels3DModel]: + image: ArrayLike = RNG.integers(0, 100, size=shape) + else: + image = RNG.uniform(size=shape) image = converter(image) self._parse_transformation_from_multiple_places(model, image) spatial_image = model.parse(image) @@ -891,8 +891,8 @@ def test_label_no_c_coords(model: Labels2DModel | Labels3DModel): def test_warning_on_large_chunks(): - data_small = DataArray(dask.array.zeros((100, 100), chunks=(50, 50)), dims=["x", "y"]) - data_large = DataArray(dask.array.zeros((50000, 50000), chunks=(50000, 50000)), dims=["x", "y"]) + data_small = DataArray(dask.array.zeros((100, 100), chunks=(50, 50), dtype=np.int64), dims=["x", "y"]) + data_large = DataArray(dask.array.zeros((50000, 50000), chunks=(50000, 50000), dtype=np.int64), dims=["x", "y"]) assert np.array(data_large.shape).prod().item() > LARGE_CHUNK_THRESHOLD_BYTES # single and multiscale, small chunk size From f84be2688f88d48532797401ca4d4e2b03ad3d5d Mon Sep 17 00:00:00 2001 From: anon Date: Sat, 20 Jun 2026 14:27:38 +0200 Subject: [PATCH 8/9] Relax ome_zarr pin to >=0.16 Our omero self-write and integer-label fixes work across 0.16/0.17/0.18 (verified), so keep the wider lower bound for ecosystem co-installability rather than forcing >=0.18. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9d63855d1..03181eadb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,7 @@ dependencies = [ "networkx", "numba>=0.55.0", "numpy", - # >=0.18 dropped omero from the functional writer (we write it ourselves); pin so CI matches a fresh install. - "ome_zarr>=0.18.0", + "ome_zarr>=0.16.0", "pandas", "pooch", "pyarrow", From a22181c38228b97d4e8a029c669fa4a4f38a0e7c Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 22 Jun 2026 10:32:36 +0200 Subject: [PATCH 9/9] improve labels validation logic --- .../_core/operations/rasterize_bins.py | 12 +++----- src/spatialdata/_core/query/spatial_query.py | 2 +- src/spatialdata/models/models.py | 29 ++++++++++++------- tests/core/operations/test_rasterize_bins.py | 18 ------------ 4 files changed, 23 insertions(+), 38 deletions(-) diff --git a/src/spatialdata/_core/operations/rasterize_bins.py b/src/spatialdata/_core/operations/rasterize_bins.py index 87af14de7..7c4914ce7 100644 --- a/src/spatialdata/_core/operations/rasterize_bins.py +++ b/src/spatialdata/_core/operations/rasterize_bins.py @@ -87,14 +87,10 @@ def rasterize_bins( table = sdata.tables[table_name] if not isinstance(element, GeoDataFrame | DaskDataFrame | DataArray): raise ValueError("The bins should be a GeoDataFrame, a DaskDataFrame or a DataArray.") - if isinstance(element, DataArray): - if "c" in element.dims: - raise ValueError( - "If bins is a DataArray, it should hold labels; found a image element instead, with" - f" 'c': {element.dims}." - ) - if not np.issubdtype(element.dtype, np.integer): - raise ValueError(f"If bins is a DataArray, it should hold integers. Found dtype {element.dtype}.") + if isinstance(element, DataArray) and "c" in element.dims: + raise ValueError( + f"If bins is a DataArray, it should hold labels; found a image element instead, with 'c': {element.dims}." + ) _, region_key, instance_key = get_table_keys(table) if not table.obs[region_key].dtype == "category": diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 475c36f4f..07c33468e 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -391,7 +391,7 @@ def _bounding_box_mask_points( axes: tuple[str, ...], min_coordinate: list[Number] | ArrayLike, max_coordinate: list[Number] | ArrayLike, -) -> list[ArrayLike]: +) -> list[np.ndarray]: """Compute a mask that is true for the points inside axis-aligned bounding boxes. Parameters diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index c863c06f2..a818bdadc 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -398,11 +398,12 @@ def _check_chunk_size_not_too_large(cls, data: DataArray | DataTree) -> None: for d in data: cls._check_chunk_size_not_too_large(data[d][name]) - -def _validate_labels_dtype(data: DataArray | DataTree) -> None: - dtype = data.dtype if isinstance(data, DataArray) else data["scale0"]["image"].dtype - if not (np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_)): - raise ValueError(f"Labels must have an integer dtype, found {dtype}. Cast the data, e.g. `.astype(np.uint16)`.") + def _validate_labels_dtype(data: DataArray | DataTree) -> None: + dtype = data.dtype if isinstance(data, DataArray) else data["scale0"]["image"].dtype + if not (np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_)): + raise ValueError( + f"Labels must have an integer dtype, found {dtype}. Cast the data, e.g. `.astype(np.uint16)`." + ) class Labels2DModel(RasterSchema): @@ -416,9 +417,12 @@ def parse( # noqa: D102 ) -> DataArray | DataTree: if kwargs.get("c_coords") is not None: raise ValueError("`c_coords` is not supported for labels") - parsed = super().parse(*args, **kwargs) - _validate_labels_dtype(parsed) - return parsed + return super().parse(*args, **kwargs) + + @classmethod + def validate(cls, data: Any) -> None: + super().validate(data) + cls._validate_labels_dtype(data) class Labels3DModel(RasterSchema): @@ -428,9 +432,12 @@ class Labels3DModel(RasterSchema): def parse(self, *args: Any, **kwargs: Any) -> DataArray | DataTree: # noqa: D102 if kwargs.get("c_coords") is not None: raise ValueError("`c_coords` is not supported for labels") - parsed = super().parse(*args, **kwargs) - _validate_labels_dtype(parsed) - return parsed + return super().parse(*args, **kwargs) + + @classmethod + def validate(cls, data: Any) -> None: + super().validate(data) + cls._validate_labels_dtype(data) class Image2DModel(RasterSchema): diff --git a/tests/core/operations/test_rasterize_bins.py b/tests/core/operations/test_rasterize_bins.py index fa72c1d03..afd807657 100644 --- a/tests/core/operations/test_rasterize_bins.py +++ b/tests/core/operations/test_rasterize_bins.py @@ -24,7 +24,6 @@ from spatialdata._types import ArrayLike from spatialdata.models.models import ( Image2DModel, - Labels2DModel, PointsModel, ShapesModel, TableModel, @@ -276,23 +275,6 @@ def _get_sdata(n: int): value_key="instance_id", ) - # if bins is a DataArray, it should hold integers (cast after parsing, which itself rejects floats) - image = Labels2DModel.parse(RNG.integers(0, 10, size=(3, 3)), dims=("y", "x")).astype(float) - del sdata["points"] - sdata["points"] = image - with pytest.raises( - ValueError, - match=f"If bins is a DataArray, it should hold integers. Found dtype {image.dtype}.", - ): - _ = rasterize_bins( - sdata=sdata, - bins="points", - table_name="table", - col_key="col_index", - row_key="row_index", - value_key="instance_id", - ) - def test_relabel_labels(caplog): obs = DataFrame(