diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6d1027b29..be2ce5f73 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -19,11 +19,11 @@ jobs: fail-fast: false matrix: include: - - {os: windows-latest, python: "3.11", dask-version: "2026.3.0", name: "min dask"} + - {os: windows-latest, python: "3.12", dask-version: "2026.3.0", name: "min dask"} - {os: windows-latest, python: "3.14", dask-version: "latest"} - - {os: ubuntu-latest, python: "3.11", dask-version: "latest"} + - {os: ubuntu-latest, python: "3.12", dask-version: "latest"} - {os: ubuntu-latest, python: "3.14", dask-version: "latest"} - - {os: macos-latest, python: "3.11", dask-version: "latest"} + - {os: macos-latest, python: "3.12", dask-version: "latest"} - {os: macos-latest, python: "3.14", prerelease: "allow", name: "prerelease"} env: OS: ${{ matrix.os }} @@ -41,7 +41,7 @@ jobs: - name: Install dependencies run: | if [[ "${PRERELEASE}" == "allow" ]]; then - sed -i '' 's/requires-python.*//' pyproject.toml # otherwise uv complains that anndata requires python>=3.12 and we only do >=3.11 😱 + sed -i '' 's/requires-python.*//' pyproject.toml # drop the cap so uv can resolve prerelease deps uv add git+https://github.com/scverse/anndata.git uv add pandas>=3.dev0 fi diff --git a/docs/api/datasets.md b/docs/api/datasets.md index 7bf6d5a61..d0c43b56c 100644 --- a/docs/api/datasets.md +++ b/docs/api/datasets.md @@ -7,5 +7,6 @@ Convenience small datasets .. autofunction:: blobs .. autofunction:: blobs_annotating_element +.. autofunction:: cells .. autofunction:: raccoon ``` diff --git a/pyproject.toml b/pyproject.toml index 353811c98..34592a8e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ maintainers = [ urls.Documentation = "https://spatialdata.scverse.org/en/latest" urls.Source = "https://github.com/scverse/spatialdata.git" urls.Home-page = "https://github.com/scverse/spatialdata.git" -requires-python = ">=3.11" +requires-python = ">=3.12" dynamic= [ "version" # allow version to be set by git tags ] @@ -45,6 +45,7 @@ dependencies = [ "spatial_image>=1.2.3", "scikit-image", "scipy!=1.17.0", + "scverse-misc[datasets]>=0.0.10", "typing_extensions>=4.8.0", "universal_pathlib>=0.2.6", "xarray>=2024.10.0", @@ -145,7 +146,7 @@ exclude = [ ] line-length = 120 -target-version = "py311" +target-version = "py312" [tool.ruff.lint] ignore = [ diff --git a/src/spatialdata/datasets.py b/src/spatialdata/datasets.py index 37f529c72..be27bf988 100644 --- a/src/spatialdata/datasets.py +++ b/src/spatialdata/datasets.py @@ -31,7 +31,7 @@ ) from spatialdata.transformations import Identity -__all__ = ["blobs", "raccoon"] +__all__ = ["blobs", "cells", "raccoon"] def blobs( @@ -79,6 +79,37 @@ def raccoon() -> SpatialData: return RaccoonDataset().raccoon() +def cells(path: str | None = None) -> SpatialData: + """ + Cells dataset. + + Download the ``cells`` example dataset and load it as a :class:`~spatialdata.SpatialData` + object. The download is hash-verified and cached, so repeated calls reuse the local copy + instead of downloading again. + + Parameters + ---------- + path + Directory in which to cache the downloaded data. If `None`, the default OS cache + location is used (:func:`pooch.os_cache` for ``"spatialdata"``). + + Returns + ------- + SpatialData object with the cells dataset. + """ + import importlib.resources + from pathlib import Path + + import pooch + from scverse_misc.datasets import fetch, parse_registry + + cache_dir = Path(path) if path is not None else Path(pooch.os_cache("spatialdata")) + registry = importlib.resources.files("spatialdata").joinpath("datasets.yaml") + with importlib.resources.as_file(registry) as registry_path: + base_url, datasets = parse_registry(registry_path) + return fetch(datasets["cells"], cache_dir, base_url=base_url) + + class RaccoonDataset: """Raccoon dataset.""" diff --git a/src/spatialdata/datasets.yaml b/src/spatialdata/datasets.yaml new file mode 100644 index 000000000..9f5d235a1 --- /dev/null +++ b/src/spatialdata/datasets.yaml @@ -0,0 +1,15 @@ +# Registry of downloadable example datasets for ``spatialdata.datasets``. +# +# Parsed by ``scverse_misc.datasets.parse_registry`` and fetched (downloaded, +# hash-verified, cached and loaded) via ``scverse_misc.datasets.fetch``. +# +# type: spatialdata -> a .zip that extracts to a single .zarr store +base_url: https://exampledata.scverse.org/spatialdata/ +datasets: + cells: + type: spatialdata + doc_header: Cells dataset as a SpatialData object. + files: + - name: cells.zip + s3_key: cells.zip + sha256: dc9613cb9e16fd2cd8d83f3a9586eeda4af5ba8ba366f1066efb51305820c5fb diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py index 2237e253c..10337915f 100644 --- a/tests/datasets/test_datasets.py +++ b/tests/datasets/test_datasets.py @@ -1,6 +1,11 @@ from __future__ import annotations -from spatialdata.datasets import blobs, raccoon +import importlib.resources + +import pytest + +from spatialdata import SpatialData +from spatialdata.datasets import blobs, cells, raccoon def test_datasets() -> None: @@ -26,3 +31,26 @@ def test_datasets() -> None: assert sdata_raccoon.images["raccoon"].shape == (3, 768, 1024) assert sdata_raccoon.labels["segmentation"].shape == (768, 1024) _ = str(sdata_raccoon) + + +def test_cells_registry() -> None: + # Network-free: the shipped registry parses and exposes the cells dataset. + from scverse_misc.datasets import parse_registry + + registry = importlib.resources.files("spatialdata").joinpath("datasets.yaml") + with importlib.resources.as_file(registry) as registry_path: + base_url, datasets = parse_registry(registry_path) + + assert base_url == "https://exampledata.scverse.org/spatialdata/" + entry = datasets["cells"] + assert entry.type == "spatialdata" + file = entry.file(name="cells.zip") + assert file.sha256 == "dc9613cb9e16fd2cd8d83f3a9586eeda4af5ba8ba366f1066efb51305820c5fb" + assert file.resolve_url(base_url) == "https://exampledata.scverse.org/spatialdata/cells.zip" + + +@pytest.mark.slow +def test_cells_download(tmp_path) -> None: + # Downloads ~3 MB from the scverse example data bucket; opt out with `-m "not slow"`. + sdata = cells(path=str(tmp_path)) + assert isinstance(sdata, SpatialData)