From 8f62c61e58830f136089ea867a3ab3c9af2e32f1 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Wed, 17 Jun 2026 15:23:44 -0400 Subject: [PATCH 1/3] Implement modeler with grid domain support only Fixes #17 --- docs/01-design-overview.md | 4 +- docs/04-modeler-converter-design.md | 100 ++++++--- docs/05-implementation-roadmap.md | 6 +- docs/06-existing-libraries-analysis.md | 4 +- src/titiler_covjson/helpers.py | 3 + src/titiler_covjson/input.py | 57 +++++- src/titiler_covjson/modeler.py | 224 +++++++++++++++++++- tests/test_input.py | 43 +++- tests/test_modeler.py | 269 +++++++++++++++++++++++++ 9 files changed, 667 insertions(+), 43 deletions(-) create mode 100644 tests/test_modeler.py diff --git a/docs/01-design-overview.md b/docs/01-design-overview.md index 9de8b68..66a2eaf 100644 --- a/docs/01-design-overview.md +++ b/docs/01-design-overview.md @@ -48,7 +48,7 @@ numpy arrays + metadata (bands, CRS, bounds, timestamps) CoverageInput (intermediate representation) | v -RasterCovJSONModeler (conversion logic) +Modeler functions (to_coverage; conversion logic) | v covjson-pydantic Models (Coverage, Domain, Range, Parameter...) @@ -60,7 +60,7 @@ JSON Response (application/prs.coverage+json) ### Key Design Decisions - **TiTiler extension**: Implemented as a FastAPI router that plugs into TiTiler, not a standalone service -- **Data-agnostic modeler**: A `RasterCovJSONModeler` converts an intermediate `CoverageInput` to CovJSON, decoupled from specific readers +- **Data-agnostic modeler**: Stateless module-level functions (`to_coverage`) convert an intermediate `CoverageInput` to CovJSON, decoupled from specific readers - **covjson-pydantic**: Uses the [KNMI covjson-pydantic](https://github.com/KNMI/covjson-pydantic) library (Pydantic v2) for spec-compliant model serialization - **Domain type auto-detection**: Geometry type determines CovJSON domain type (Grid for raster, Point/PointSeries for point queries, Trajectory for transects, etc.) - **Grid-native**: Raster data naturally maps to CovJSON Grid domains, the most common case diff --git a/docs/04-modeler-converter-design.md b/docs/04-modeler-converter-design.md index b8fd394..0e6c9a8 100644 --- a/docs/04-modeler-converter-design.md +++ b/docs/04-modeler-converter-design.md @@ -5,7 +5,7 @@ The **Modeler** is the layer that converts raster data (rio-tiler ImageData, numpy arrays, STAC metadata) into CovJSON model objects (via covjson-pydantic). It follows a clean separation of concerns: ```plain -rio-tiler data --> CoverageInput (intermediate) --> RasterCovJSONModeler --> covjson-pydantic Coverage +rio-tiler data --> CoverageInput (intermediate) --> modeler (to_coverage) --> covjson-pydantic Coverage ``` ## 2. Conversion Flow @@ -72,7 +72,7 @@ class CoverageInput: crs: rasterio.CRS geometry: BaseGeometry | None = None # For non-grid domains - # Band/variable metadata (may be empty; modeler synthesizes identities) + # Band/variable metadata; resolved to one entry per band at construction bands: tuple[BandInfo, ...] = () # Temporal info (optional) @@ -89,6 +89,26 @@ Domain-dependent consistency (geometry vs. timestamps vs. array shape) is deferred to the modeler -- see Section 7 for the planned evolution that removes this split. +`__post_init__` also **resolves `bands` once, at construction**: when `bands` is +empty it synthesizes one `BandInfo` per leading-axis band (`b1, b2, ...`, +matching rio-tiler's default band naming), assigning through +`object.__setattr__` because the dataclass is frozen. So `bands` is never empty +afterwards and every consumer reads a populated tuple -- the default-band naming +convention lives here, in one place, rather than being duplicated in the modeler. + +This intentionally erases the distinction between "the caller supplied no band +metadata" and "the caller supplied bands". That is consistent with +`CoverageInput`'s role as the *post-resolution* representation: all precedence +and enrichment (explicit `bands` > per-attribute kwargs > the reader's own +`band_names`) is resolved upstream in the converters (Section 5), which still +hold the raw reader info. The realistic consumers of "were bands supplied?" -- +e.g., a strict mode that rejects placeholder parameters, or metadata enrichment +from a STAC item's `eo:bands` -- all live at that converter/endpoint layer, +where the signal is still available; none need it on `CoverageInput`. If such a +need ever does reach this layer, the clean fix is an explicit converter flag or +a `bands_supplied` field, not reconstructing intent from a resolved +`CoverageInput`. + ### 3.1 Single-array, data-cube constraint `CoverageInput.data` is a single masked array with a leading band axis: @@ -117,33 +137,63 @@ its own array and dtype) rather than a single `(bands, ...)` array. Defer this until a concrete endpoint requires it; the Section 7 union refactor is independent and addresses domain shape, not band heterogeneity. -## 4. RasterCovJSONModeler +## 4. Modeler + +The modeler is a set of **stateless module-level functions** in `modeler.py`, +not a class. The conversion holds no state and depends only on the neutral +`CoverageInput`, so a class would add ceremony without benefit. (If +configuration ever needs to be threaded through -- e.g., a `TiledNdArray` size +threshold in Story 12 -- introduce a function argument or a small frozen config +object rather than reviving a stateful class.) + +The public entry point is `to_coverage`: ```python -class RasterCovJSONModeler: - """Converts raster data to CovJSON Coverage objects.""" - - def to_coverage(self, input: CoverageInput) -> Coverage: - domain = self._create_domain(input) - parameters = self._create_parameters(input) - ranges = self._create_ranges(input, domain) - return Coverage(domain=domain, parameters=parameters, ranges=ranges) - - def to_coverage_collection(self, inputs: list[CoverageInput]) -> CoverageCollection: - parameters = self._create_parameters(inputs[0]) - references = self._get_references(inputs[0]) - coverages = [] - for inp in inputs: - cov = self.to_coverage(inp) - cov.parameters = {} # Hoisted to collection level - coverages.append(cov) - return CoverageCollection( - coverages=coverages, parameters=parameters, referencing=references - ) +def to_coverage(coverage_input: CoverageInput) -> Coverage: + domain = _create_grid_domain(coverage_input) + parameters = _create_parameters(coverage_input) + ranges = _create_grid_ranges(coverage_input) + return Coverage(domain=domain, parameters=parameters, ranges=ranges) +``` + +**Current status (Grid only).** `to_coverage` implements the Grid domain +(gridded rasters, `geometry is None`). It guards the cases it does not yet handle +with `NotImplementedError`: a non-`None` `geometry` (a non-grid domain), and data +that is not 3-D `(bands, height, width)` (`CoverageInput` also permits 2-D +point/profile data, which must not reach the grid path). Multi-domain support +arrives via the per-domain input union and `match` dispatch in Section 7 -- +chosen at the second domain type rather than building out the `_get_domain_type` +inference sketched in Section 4.1. + +`to_coverage_collection` is its planned sibling for multi-result responses (**not +yet implemented**): build one coverage per input, then hoist the shared +`parameters` and `referencing` to the collection level and clear them on the +member coverages. + +```python +def to_coverage_collection(inputs: list[CoverageInput]) -> CoverageCollection: + parameters = _create_parameters(inputs[0]) + references = _get_references(inputs[0]) + coverages = [] + for coverage_input in inputs: + cov = to_coverage(coverage_input) + cov.parameters = {} # Hoisted to collection level + coverages.append(cov) + return CoverageCollection( + coverages=coverages, parameters=parameters, referencing=references + ) ``` ### 4.1 Domain Type Detection +> **Status**: not implemented as written. The Grid-only modeler (Section 4) +> guards non-grid inputs with `NotImplementedError` instead of detecting a +> domain type. When the second domain type lands, the per-domain input union +> (Section 7) supersedes this inference entirely -- the variant is selected +> *explicitly* by the endpoint, not detected from `geometry` + `shape`. The +> sketch below is retained for the design rationale (and the Polygon discussion +> that follows). + ```python def _get_domain_type(self, input: CoverageInput) -> DomainType: has_time = input.timestamps is not None and len(input.timestamps) > 0 @@ -399,8 +449,8 @@ exhaustiveness checking: ```python from typing import assert_never # typing_extensions on Python < 3.11 -def to_coverage(self, input: CoverageInput) -> Coverage: - match input: +def to_coverage(coverage_input: CoverageInput) -> Coverage: + match coverage_input: case GridInput(): ... case GridSeriesInput(): diff --git a/docs/05-implementation-roadmap.md b/docs/05-implementation-roadmap.md index 2c4d68f..18b2ea4 100644 --- a/docs/05-implementation-roadmap.md +++ b/docs/05-implementation-roadmap.md @@ -60,7 +60,7 @@ Add CoverageJSON (CovJSON) as a new output format to TiTiler via the `titiler-co **Estimated effort**: S (1-2 days) -### Story 3: RasterCovJSONModeler - Core Conversion Logic +### Story 3: Modeler - Core Conversion Logic **Priority**: P0 (Foundation) @@ -68,8 +68,8 @@ Add CoverageJSON (CovJSON) as a new output format to TiTiler via the `titiler-co **Tasks**: -- [ ] Create `titiler_covjson/modeler.py` with RasterCovJSONModeler class -- [ ] Implement domain type detection (`_get_domain_type`) +- [x] Create `titiler_covjson/modeler.py` with the modeler conversion functions (`to_coverage`); stateless module functions, not a class +- [ ] Implement domain dispatch (per-domain input union + `match`, per doc 04 Section 7) -- the Grid-only path currently guards non-grid inputs rather than detecting a domain type - [ ] Implement axis creation for all domain types (`_create_axes`) - Grid (start/stop/num) - Point/PointSeries (x, y, optional z, optional t) diff --git a/docs/06-existing-libraries-analysis.md b/docs/06-existing-libraries-analysis.md index edd16f7..44c5b86 100644 --- a/docs/06-existing-libraries-analysis.md +++ b/docs/06-existing-libraries-analysis.md @@ -125,7 +125,7 @@ A JSON Schema-based validator with a Python CLI tool and runtime validation logi **No custom Pydantic models needed.** Add `covjson-pydantic` as a dependency and use its models directly. Custom code is limited to: 1. **`CoverageInput` dataclass** (bridge between rio-tiler and covjson-pydantic) -2. **`RasterCovJSONModeler`** (conversion logic: numpy arrays -> covjson-pydantic objects) +2. **Modeler functions** (`to_coverage`; conversion logic: numpy arrays -> covjson-pydantic objects) 3. **FastAPI routes** (endpoint definitions, parameter handling) 4. **TiTiler integration** (router extension, content type registration) 5. **Unit/CRS mapping helpers** (UCUM symbols, EPSG->OGC URI conversion) @@ -170,5 +170,5 @@ httpx # FastAPI test client | 2 | **Use `covjson-validator` in integration tests** | Catches spec violations that Pydantic alone might miss (axis/shape consistency, monotonicity) | | 3 | **Vendor validator schemas, don't depend on the repo** | The repo isn't pip-installable and has deprecated deps; extract the JSON Schema files | | 4 | **Contribute Polygon domain type upstream** | Benefits the community; reduces local maintenance burden | -| 5 | **Keep `CoverageInput` + `RasterCovJSONModeler` as custom code** | Neither library provides the raster->CovJSON conversion bridge | +| 5 | **Keep `CoverageInput` + the modeler functions as custom code** | Neither library provides the raster->CovJSON conversion bridge | | 6 | **Pin `covjson-pydantic` minor version** | Pre-1.0 library; avoid surprise breaking changes | diff --git a/src/titiler_covjson/helpers.py b/src/titiler_covjson/helpers.py index 9e4dd36..3b94c74 100644 --- a/src/titiler_covjson/helpers.py +++ b/src/titiler_covjson/helpers.py @@ -244,9 +244,12 @@ def numpy_dtype_to_ndarray( >>> nd = numpy_dtype_to_ndarray(arr, np.float32, ["y", "x"]) >>> nd.shape [2, 2] + >>> nd.dataType + 'float' >>> nd.values [1.5, 2.5, 3.5, 4.5] """ + covjson_dtype = numpy_to_covjson_dtype(dtype) shape = list(data.shape) diff --git a/src/titiler_covjson/input.py b/src/titiler_covjson/input.py index a71d5a1..2f9e539 100644 --- a/src/titiler_covjson/input.py +++ b/src/titiler_covjson/input.py @@ -94,8 +94,9 @@ class CoverageInput: geometry: Source geometry for non-grid domains -- e.g., the queried point, the transect line, or the aggregation polygon; ``None`` for gridded rasters. - bands: Per-band metadata. May be empty, in which case the modeler - synthesizes generic band identities. + bands: Per-band metadata, one entry per band. Resolved at construction: + when not supplied, generic ``b1, b2, ...`` identities are synthesized + (see ``__post_init__``), so this is always populated afterwards. timestamps: ISO 8601 / RFC 3339 timestamps for temporal data (e.g., one per STAC item in a time series); ``None`` for purely spatial data. @@ -132,7 +133,7 @@ class CoverageInput: item_ids: tuple[str, ...] | None = None def __post_init__(self) -> None: - """Validate array dimensionality, band count, and 2-D timestamps. + """Validate the data/band/timestamp invariants, then resolve ``bands``. Mostly domain-independent invariants. The one domain-shaped exception is timestamps against 2-D point/profile data: there the sample axis is @@ -143,11 +144,18 @@ def __post_init__(self) -> None: eventually, the per-domain input variants -- see ``docs/04-modeler-converter-design.md``, Section 7). + As a final step ``bands`` is resolved: when empty it is populated with + synthesized ``b1, b2, ...`` identities (assigned via + ``object.__setattr__``, as the dataclass is frozen), so it is never empty + after construction. + Raises: ValueError: If ``data`` is not 2-D or 3-D with at least 1 band; if - ``bands`` is non-empty and its length does not match - ``data.shape[0]``; or if ``data`` is 2-D and ``len(timestamps)`` - does not match the sample axis ``data.shape[-1]``. + any ``data`` axis is empty (size 0); if ``bands`` is non-empty and + its length does not match ``data.shape[0]``; if two ``bands`` share + a name (names become CoverageJSON keys, so must be unique); or if + ``data`` is 2-D and ``len(timestamps)`` does not match the sample + axis ``data.shape[-1]``. """ if self.data.ndim not in {2, 3} or self.data.shape[0] == 0: @@ -157,12 +165,31 @@ def __post_init__(self) -> None: f"with {self.data.shape[0]} band(s)" ) raise ValueError(msg) + + # No data axis may be empty: a zero-size height/width/sample axis is a + # degenerate coverage and would otherwise surface as an opaque CompactAxis + # error (num must be a positive cell count) deep in the modeler. + if 0 in self.data.shape: + msg = ( + "CoverageInput data axes must all be non-empty; " + f"got shape {self.data.shape}" + ) + raise ValueError(msg) + if self.bands and len(self.bands) != self.data.shape[0]: msg = ( f"Number of bands ({len(self.bands)}) does not match " f"data.shape[0] ({self.data.shape[0]})" ) raise ValueError(msg) + + # Band names become CoverageJSON range/parameter keys, so they must be + # unique; duplicates would silently collapse entries in the modeler. + if self.bands and len({band.name for band in self.bands}) != len(self.bands): + names = [band.name for band in self.bands] + msg = f"CoverageInput band names must be unique; got {names}" + raise ValueError(msg) + if ( self.timestamps is not None and self.data.ndim == 2 @@ -174,6 +201,24 @@ def __post_init__(self) -> None: ) raise ValueError(msg) + # Resolve bands once, at construction, so every consumer can read a + # populated `bands` tuple without re-deriving defaults. It arrives + # populated from converters (e.g., imagedata_to_coverage_input, via the + # image's band_names); it is empty only on direct array construction + # without metadata -- the modeler's array-only test path. Synthesize + # b1, b2, ... then, matching rio-tiler's default band naming so + # synthesized and converter-supplied identities are indistinguishable. + # (frozen dataclass: assign through object.__setattr__.) + if not self.bands: + object.__setattr__( + self, + "bands", + tuple( + BandInfo(name=f"b{i + 1}", dtype=self.data.dtype) + for i in range(self.data.shape[0]) + ), + ) + def band_info_from_reader_info(info: Info) -> list[BandInfo]: """Build per-band metadata from a rio-tiler reader ``info()`` result. diff --git a/src/titiler_covjson/modeler.py b/src/titiler_covjson/modeler.py index 4635092..5e61b63 100644 --- a/src/titiler_covjson/modeler.py +++ b/src/titiler_covjson/modeler.py @@ -1,6 +1,222 @@ -"""RasterCovJSONModeler: converts raster data to CovJSON Coverage objects. +"""Modeler: converts raster data to CovJSON Coverage objects. -Constructs covjson-pydantic model instances from CoverageInput data, -handling domain type detection, axis creation, parameter mapping, -and range serialization. +Constructs covjson-pydantic model instances from :class:`CoverageInput` data, +handling domain and axis construction, parameter mapping, and range +serialization. The conversion is stateless, so it is exposed as plain module +functions (:func:`to_coverage`) rather than a class; the logic depends only on +the neutral :class:`CoverageInput`, never on rio-tiler types, so every path can +be tested from plain numpy arrays. """ + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from covjson_pydantic.coverage import Coverage +from covjson_pydantic.domain import Axes, CompactAxis, Domain, DomainType +from covjson_pydantic.observed_property import ObservedProperty +from covjson_pydantic.parameter import Parameter, Parameters + +from titiler_covjson.helpers import ( + create_spatial_2d_reference, + create_unit, + numpy_dtype_to_ndarray, +) + +if TYPE_CHECKING: + from covjson_pydantic.ndarray import ( + NdArrayFloat, + NdArrayInt, + NdArrayStr, + TiledNdArray, + ) + from pydantic import AnyUrl + + from titiler_covjson.input import BandInfo, CoverageInput + + # The value type of ``Coverage.ranges``. A dict is invariant in its value + # type, so ``_create_grid_ranges`` must be annotated with the full union + # covjson-pydantic accepts -- not just the NdArray subtypes it produces. + RangeValue = NdArrayFloat | NdArrayInt | NdArrayStr | TiledNdArray | AnyUrl + +# Axis labels for a gridded range, in row-major order: rows (y) then columns (x). +_GRID_AXIS_NAMES = ("y", "x") + + +def to_coverage(coverage_input: CoverageInput) -> Coverage: + """Convert a :class:`CoverageInput` to a CovJSON ``Coverage``. + + Currently handles the Grid domain (gridded rasters, ``geometry is None``). + Other domain types are added in later stories. + + Args: + coverage_input: The intermediate representation to convert. + + Returns: + Coverage: A covjson-pydantic ``Coverage`` model. + + Raises: + NotImplementedError: If the input describes a non-grid domain, or holds + data that is not 3-D ``(bands, height, width)``. + + Examples: + >>> import numpy as np + >>> import rasterio + >>> from titiler_covjson.input import BandInfo, CoverageInput + >>> cov = to_coverage( + ... CoverageInput( + ... data=np.ma.MaskedArray( + ... np.array([[[1.0, 2.0], [3.0, 4.0]]], dtype="float32") + ... ), + ... bounds=(-10.0, -5.0, 10.0, 5.0), + ... crs=rasterio.CRS.from_epsg(4326), + ... bands=(BandInfo("temp", unit="Cel"),), + ... ) + ... ) + >>> cov.domain.domainType.value + 'Grid' + + ``x`` runs west->east and ``y`` north->south (raster row 0 is north): + + >>> cov.domain.axes.x.start, cov.domain.axes.x.stop, cov.domain.axes.x.num + (-10.0, 10.0, 2) + >>> cov.domain.axes.y.start, cov.domain.axes.y.stop, cov.domain.axes.y.num + (5.0, -5.0, 2) + + Each band becomes a parameter (here with its UCUM unit resolved) and a + matching range whose axes line up with the grid: + + >>> list(cov.parameters.root) + ['temp'] + >>> cov.parameters.root["temp"].unit.symbol.value + 'Cel' + >>> cov.ranges["temp"].axisNames, cov.ranges["temp"].shape + (['y', 'x'], [2, 2]) + >>> cov.ranges["temp"].values + [1.0, 2.0, 3.0, 4.0] + """ + if coverage_input.geometry is not None: + msg = ( + "Only gridded inputs (geometry=None) are supported so far; " + f"got geometry {coverage_input.geometry.geom_type!r}" + ) + raise NotImplementedError(msg) + + # CoverageInput also permits 2-D data (the future point/profile path); the + # grid conversion below assumes 3-D (bands, height, width), so reject 2-D + # here rather than emit a domain/range shape mismatch. + if coverage_input.data.ndim != 3: + msg = ( + "Grid coverages require 3-D data with shape (bands, height, width); " + f"got {coverage_input.data.ndim} dimension(s)" + ) + raise NotImplementedError(msg) + + return Coverage( + domain=_create_grid_domain(coverage_input), + parameters=_create_parameters(coverage_input), + ranges=_create_grid_ranges(coverage_input), + ) + + +def _compact_axis(first: float, last: float, num: int) -> CompactAxis: + """Build a CompactAxis spanning ``first``..``last`` over ``num`` cells. + + Args: + first: Coordinate of the first cell (e.g., the west or north edge). + last: Coordinate of the last cell (e.g., the east or south edge). + num: Number of cells along the axis. + + Returns: + CompactAxis: The axis. When ``num == 1`` the endpoints collapse to the + midpoint, since CompactAxis requires ``start == stop`` for a single cell. + """ + if num == 1: + midpoint = (first + last) / 2 + return CompactAxis(start=midpoint, stop=midpoint, num=1) + + return CompactAxis(start=first, stop=last, num=num) + + +def _create_grid_domain(coverage_input: CoverageInput) -> Domain: + """Build the Grid ``Domain`` (x/y CompactAxes plus spatial referencing). + + Args: + coverage_input: The gridded input being converted. + + Returns: + Domain: A Grid domain with ``x``/``y`` compact axes and referencing. + """ + west, south, east, north = coverage_input.bounds + height, width = coverage_input.data.shape[-2:] + + # CompactAxis describes a regular axis by its endpoints and cell count. + # x runs west->east; y runs north->south to match raster row order + # (row 0 is the north edge). Endpoints use the bounds edges; pixel-center + # offsets are a possible later refinement. + + return Domain( + domainType=DomainType.grid, + axes=Axes( + x=_compact_axis(west, east, width), + y=_compact_axis(north, south, height), + ), + referencing=[create_spatial_2d_reference(coverage_input.crs)], + ) + + +def _create_parameters(coverage_input: CoverageInput) -> Parameters: + """Build one CovJSON ``Parameter`` per band, keyed by band name. + + Args: + coverage_input: The input whose bands become parameters. + + Returns: + Parameters: Parameters mapping, one entry per band. + """ + return Parameters( + root={band.name: _create_parameter(band) for band in coverage_input.bands} + ) + + +def _create_parameter(band: BandInfo) -> Parameter: + """Build a single ``Parameter`` from band metadata. + + Args: + band: Band metadata (name, description, UCUM unit code). + + Returns: + Parameter: A parameter whose observed-property label is the band + description (falling back to its name) and whose unit is + resolved from the UCUM code when one is present. + """ + label = {"en": band.description or band.name} + + # An empty unit (the common "no unit" case) skips the UCUM parser; an + # unresolvable code makes create_unit return None. Either way unit is None + # and is dropped on serialization via model_dump_json(exclude_none=True). + return Parameter( + observedProperty=ObservedProperty(label=label), + unit=create_unit(band.unit) if band.unit else None, + ) + + +def _create_grid_ranges(coverage_input: CoverageInput) -> dict[str, RangeValue]: + """Build one range ``NdArray`` per band, keyed to match the parameters. + + Args: + coverage_input: The gridded input whose data becomes ranges. + + Returns: + dict[str, RangeValue]: Range arrays keyed by band name, each shaped + ``[height, width]``. + """ + # The i-th band describes data[i]: band order matches the data's leading + # (band) axis. CoverageInput resolves `bands` at construction and guarantees + # the counts match; this ordering is the contract the input converters build on. + return { + band.name: numpy_dtype_to_ndarray( + coverage_input.data[i], band.dtype, _GRID_AXIS_NAMES + ) + for i, band in enumerate(coverage_input.bands) + } diff --git a/tests/test_input.py b/tests/test_input.py index 2854243..26daff3 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -83,7 +83,8 @@ def test_minimal_construction(self) -> None: """A 3-D masked array with bounds and CRS is sufficient.""" cov = masked_input(np.ma.MaskedArray(np.zeros((1, 2, 2)))) - assert cov.bands == () + # bands is resolved at construction, so it is never empty afterwards. + assert [band.name for band in cov.bands] == ["b1"] assert cov.geometry is None assert cov.timestamps is None assert cov.collection_id is None @@ -164,6 +165,22 @@ def test_3d_timestamps_not_checked(self) -> None: assert len(cov.timestamps or ()) == 5 + def test_duplicate_band_names_raises(self) -> None: + """Band names become CovJSON keys, so they must be unique.""" + with pytest.raises(ValueError, match="unique"): + CoverageInput( + data=np.ma.MaskedArray(np.zeros((2, 2, 2))), + bounds=BOUNDS, + crs=CRS, + bands=(BandInfo("x"), BandInfo("x")), + ) + + @pytest.mark.parametrize("shape", [(1, 0, 2), (1, 2, 0), (2, 0)]) + def test_empty_data_axis_raises(self, shape: tuple[int, ...]) -> None: + """A zero-size data axis (e.g., an empty raster) is rejected early.""" + with pytest.raises(ValueError, match="non-empty"): + masked_input(np.ma.MaskedArray(np.zeros(shape))) + def test_frozen(self) -> None: """CoverageInput is immutable.""" cov = masked_input(np.ma.MaskedArray(np.zeros((1, 2, 2)))) @@ -172,6 +189,30 @@ def test_frozen(self) -> None: cov.data = np.ma.MaskedArray(np.zeros((1, 2, 2))) # type: ignore[misc] +class TestBandSynthesis: + """Test construction-time resolution of CoverageInput.bands.""" + + def test_supplied_bands_kept_unchanged(self) -> None: + """When bands are supplied, they are stored as-is (no synthesis).""" + bands = (BandInfo("red"), BandInfo("nir")) + cov = CoverageInput( + data=np.ma.MaskedArray(np.zeros((2, 2, 2))), + bounds=BOUNDS, + crs=CRS, + bands=bands, + ) + + assert cov.bands == bands + + def test_absent_bands_synthesized_at_construction(self) -> None: + """With no bands, one b1, b2, ... entry per band is synthesized.""" + cov = masked_input(np.ma.MaskedArray(np.zeros((3, 2, 2), dtype="int16"))) + + assert [band.name for band in cov.bands] == ["b1", "b2", "b3"] + # Synthesized bands share the data's dtype, so range typing is correct. + assert all(band.dtype == np.dtype("int16") for band in cov.bands) + + class TestImagedataToCoverageInput: """Test conversion from rio-tiler ImageData.""" diff --git a/tests/test_modeler.py b/tests/test_modeler.py new file mode 100644 index 0000000..8b5a75e --- /dev/null +++ b/tests/test_modeler.py @@ -0,0 +1,269 @@ +"""Tests for the modeler conversion functions.""" + +from __future__ import annotations + +import json +from typing import Any + +import numpy as np +import pytest +import rasterio +from conftest import assert_schema_valid +from covjson_pydantic.coverage import Coverage +from covjson_pydantic.domain import CompactAxis, DomainType +from covjson_pydantic.ndarray import NdArrayFloat, NdArrayInt, NdArrayStr +from covjson_pydantic.unit import Symbol +from shapely.geometry import Point + +from titiler_covjson.input import BandInfo, CoverageInput +from titiler_covjson.modeler import to_coverage + + +def _masked( + values: Any, + *, + mask: Any = False, + dtype: Any = None, +) -> np.ma.MaskedArray[Any, np.dtype[Any]]: + """Build a masked array for tests. + + Centralizes a numpy-version workaround in one place. ``np.ma.array`` is typed + only for some argument forms in the oldest supported numpy (2.2.6, the floor + on Python 3.10); the dtype / ndarray / list-mask forms these tests need fall + through to an untyped overload, which strict mypy rejects as + ``no-untyped-call`` on 3.10. The newer numpy resolved on Python 3.11+ types + the call, so there the ignore would itself be unused -- the paired + ``unused-ignore`` code keeps the comment valid on both. + + NOTE: when Python 3.10 support is dropped (and the numpy floor rises to a + version that types ``np.ma.array``), remove the ``# type: ignore`` below and, + if desired, inline this helper again. + + Args: + values: Array values as a nested sequence or an ndarray. + mask: Boolean mask, or ``False`` for no masked entries. + dtype: Optional dtype for the data. + + Returns: + np.ma.MaskedArray[Any, np.dtype[Any]]: The masked array. + """ + # Drop this ignore when Python 3.10 support is dropped (see the NOTE above). + return np.ma.array(values, mask=mask, dtype=dtype) # type: ignore[no-untyped-call, no-any-return, unused-ignore] + + +def _grid_input( + data: np.ma.MaskedArray[Any, np.dtype[Any]], + *, + bands: tuple[BandInfo, ...] = (), + crs: rasterio.CRS | None = None, +) -> CoverageInput: + """Build a gridded CoverageInput (geometry=None) for modeler tests. + + Args: + data: The masked data array, shaped ``(bands, height, width)``. + bands: Per-band metadata; empty to let CoverageInput synthesize it. + crs: CRS for the input; defaults to EPSG:4326 (WGS84). + + Returns: + CoverageInput: A gridded input over a fixed WGS84 bbox. + """ + return CoverageInput( + data=data, + bounds=(-10.0, -5.0, 10.0, 5.0), + crs=crs or rasterio.CRS.from_epsg(4326), + bands=bands, + ) + + +class TestGridCoverage: + """Conversion of gridded (raster) inputs to a Grid Coverage.""" + + def test_single_band_float_grid_is_schema_valid(self) -> None: + """A single-band float grid converts to a schema-valid Grid Coverage.""" + data = _masked([[[1.0, 2.0], [3.0, 4.0]]], dtype="float32") + cov = to_coverage(_grid_input(data, bands=(BandInfo("b1", unit="mm"),))) + + assert isinstance(cov, Coverage) + assert cov.domain.domainType == DomainType.grid + + # x spans west..east over `width` columns; y spans north..south over + # `height` rows (raster row 0 is the north edge). + assert isinstance(cov.domain.axes.x, CompactAxis) + assert cov.domain.axes.x.start == -10.0 + assert cov.domain.axes.x.stop == 10.0 + assert cov.domain.axes.x.num == 2 + assert isinstance(cov.domain.axes.y, CompactAxis) + assert cov.domain.axes.y.start == 5.0 + assert cov.domain.axes.y.stop == -5.0 + assert cov.domain.axes.y.num == 2 + + assert set(cov.ranges) == {"b1"} + nd = cov.ranges["b1"] + assert isinstance(nd, NdArrayFloat) + assert nd.axisNames == ["y", "x"] + assert nd.shape == [2, 2] + assert nd.values == [1.0, 2.0, 3.0, 4.0] + + assert_schema_valid(cov) + + @pytest.mark.parametrize( + ("shape", "axis", "expected"), + [ + # single column -> x collapses to the west/east midpoint + ((1, 2, 1), "x", (0.0, 0.0, 1)), + # single row -> y collapses to the north/south midpoint + ((1, 1, 2), "y", (0.0, 0.0, 1)), + ], + ids=("single-column", "single-row"), + ) + def test_single_cell_axis_collapses_to_midpoint( + self, + shape: tuple[int, ...], + axis: str, + expected: tuple[float, float, int], + ) -> None: + """A 1-cell axis collapses to its bounds midpoint. + + CompactAxis requires ``start == stop`` when ``num == 1``, so a single + row or column cannot use the edge endpoints; it uses their midpoint + (here 0.0, the centre of the symmetric -10..10 / -5..5 bounds). + """ + data = _masked(np.zeros(shape, dtype="float32")) + cov = to_coverage(_grid_input(data)) + + compact = getattr(cov.domain.axes, axis) + assert isinstance(compact, CompactAxis) + assert (compact.start, compact.stop, compact.num) == expected + + def test_multiple_bands_keep_order_and_names(self) -> None: + """Each band yields a parameter and range keyed by its name, in order.""" + data = _masked(np.arange(2 * 2 * 3, dtype="float32").reshape(2, 2, 3)) + cov = to_coverage(_grid_input(data, bands=(BandInfo("red"), BandInfo("nir")))) + + assert cov.parameters is not None + assert list(cov.parameters.root) == ["red", "nir"] + assert list(cov.ranges) == ["red", "nir"] + red = cov.ranges["red"] + assert isinstance(red, NdArrayFloat) + assert red.shape == [2, 3] + assert_schema_valid(cov) + + def test_integer_nodata_serializes_as_null(self) -> None: + """Masked integer entries serialize as JSON null in the range values.""" + data = _masked( + [[[10, 20], [30, 40]]], + mask=[[[False, True], [False, False]]], + dtype="int16", + ) + cov = to_coverage(_grid_input(data, bands=(BandInfo("b1", dtype="int16"),))) + + nd = cov.ranges["b1"] + assert isinstance(nd, NdArrayInt) + assert nd.values == [10, None, 30, 40] + dumped = json.loads(cov.model_dump_json(exclude_none=True)) + assert dumped["ranges"]["b1"]["values"] == [10, None, 30, 40] + assert_schema_valid(cov) + + def test_float_nodata_serializes_as_null(self) -> None: + """Masked float entries serialize as JSON null (NaN in the model).""" + data = _masked( + [[[1.0, 2.0], [3.0, 4.0]]], + mask=[[[False, False], [True, False]]], + dtype="float32", + ) + cov = to_coverage(_grid_input(data, bands=(BandInfo("b1"),))) + + dumped = json.loads(cov.model_dump_json(exclude_none=True)) + assert dumped["ranges"]["b1"]["values"] == [1.0, 2.0, None, 4.0] + assert_schema_valid(cov) + + def test_string_dtype_produces_string_range(self) -> None: + """A string-dtype band produces an NdArrayStr range.""" + data = _masked([[["a", "b"], ["c", "d"]]], dtype=np.dtype("U1")) + cov = to_coverage( + _grid_input(data, bands=(BandInfo("b1", dtype=np.dtype("U1")),)) + ) + + nd = cov.ranges["b1"] + assert isinstance(nd, NdArrayStr) + assert nd.values == ["a", "b", "c", "d"] + assert_schema_valid(cov) + + def test_resolved_unit_is_attached(self) -> None: + """A resolvable UCUM code becomes the parameter's unit.""" + data = _masked([[[1.0]]], dtype="float32") + cov = to_coverage( + _grid_input(data, bands=(BandInfo("b1", description="precip", unit="mm"),)) + ) + + assert cov.parameters is not None + param = cov.parameters.root["b1"] + assert param.observedProperty.label == {"en": "precip"} + assert param.unit is not None + assert isinstance(param.unit.symbol, Symbol) + assert param.unit.symbol.value == "mm" + assert_schema_valid(cov) + + def test_empty_and_unresolvable_units_omit_unit(self) -> None: + """No unit code, or an invalid one, leaves the parameter without a unit.""" + data = _masked(np.zeros((2, 1, 1), dtype="float32")) + cov = to_coverage( + _grid_input( + data, + bands=(BandInfo("plain"), BandInfo("bogus", unit="furlongs")), + ) + ) + + assert cov.parameters is not None + assert cov.parameters.root["plain"].unit is None + assert cov.parameters.root["bogus"].unit is None + assert_schema_valid(cov) + + def test_empty_bands_synthesizes_identities(self) -> None: + """With no band metadata, generic b1, b2, ... identities are synthesized.""" + data = _masked(np.zeros((3, 2, 2), dtype="float32")) + cov = to_coverage(_grid_input(data)) + + assert cov.parameters is not None + assert list(cov.parameters.root) == ["b1", "b2", "b3"] + assert list(cov.ranges) == ["b1", "b2", "b3"] + assert_schema_valid(cov) + + def test_projected_crs_referencing(self) -> None: + """A projected CRS yields ProjectedCRS referencing in the domain.""" + data = _masked([[[1.0, 2.0]]], dtype="float32") + cov = to_coverage(_grid_input(data, crs=rasterio.CRS.from_epsg(32637))) + + assert cov.domain.referencing is not None + system = cov.domain.referencing[0].system + assert system.type == "ProjectedCRS" + assert system.id == "http://www.opengis.net/def/crs/EPSG/0/32637" + assert_schema_valid(cov) + + def test_non_grid_geometry_not_yet_supported(self) -> None: + """A non-None geometry raises NotImplementedError (grid-only so far).""" + data = _masked([[1.0]], dtype="float32") + coverage_input = CoverageInput( + data=data, + bounds=(0.0, 0.0, 0.0, 0.0), + crs=rasterio.CRS.from_epsg(4326), + geometry=Point(0.0, 0.0), + ) + with pytest.raises(NotImplementedError, match="gridded inputs"): + to_coverage(coverage_input) + + def test_two_dimensional_grid_input_not_supported(self) -> None: + """2-D data with no geometry raises rather than emitting a malformed grid. + + The Grid path requires 3-D ``(bands, height, width)`` data; + :class:`CoverageInput` permits 2-D data (for the future point/profile + path), so guard against it reaching the grid conversion. + """ + data = _masked([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype="float32") + coverage_input = CoverageInput( + data=data, + bounds=(-10.0, -5.0, 10.0, 5.0), + crs=rasterio.CRS.from_epsg(4326), + ) + with pytest.raises(NotImplementedError, match="3-D"): + to_coverage(coverage_input) From c79e670b54f5b89fa6d744abacc3423b7947f738 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Tue, 23 Jun 2026 10:50:17 -0400 Subject: [PATCH 2/3] Correct "celcius" to "celsius" --- tests/test_playground_roundtrip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_playground_roundtrip.py b/tests/test_playground_roundtrip.py index 52b28df..cc8327e 100644 --- a/tests/test_playground_roundtrip.py +++ b/tests/test_playground_roundtrip.py @@ -486,7 +486,7 @@ class TestPlaygroundPoint: "POTM": { "type": "Parameter", "description": { - "en": "The potential temperature, in degrees celcius," + "en": "The potential temperature, in degrees celsius," " of the sea water" }, "unit": { @@ -622,7 +622,7 @@ class TestPlaygroundPointSeries: "POTM": { "type": "Parameter", "description": { - "en": "The potential temperature, in degrees celcius," + "en": "The potential temperature, in degrees celsius," " of the sea water" }, "unit": { From 4cd3171481925fbd5a449fe792ac6b4d45b22a1c Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Tue, 23 Jun 2026 11:01:03 -0400 Subject: [PATCH 3/3] Emit cell centers, not edges, for Grid CompactAxis A CovJSON CompactAxis describes the coordinates at which cells are defined -- the cell centers -- but the Grid modeler passed the raster bounds edges straight through, leaving x/y coordinates off by half a cell. It was also inconsistent with the num == 1 path, which already collapsed to a true center. _compact_axis now insets the bounds edges by half a cell so the axis runs first + dx/2 .. last - dx/2. The unified formula naturally yields start == stop for a single cell, subsuming the former special case. Addresses reviewer feedback on PR #21. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/04-modeler-converter-design.md | 2 +- src/titiler_covjson/modeler.py | 36 ++++++++++++++++------------- tests/test_modeler.py | 23 ++++++++++-------- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/docs/04-modeler-converter-design.md b/docs/04-modeler-converter-design.md index 0e6c9a8..af9c823 100644 --- a/docs/04-modeler-converter-design.md +++ b/docs/04-modeler-converter-design.md @@ -229,7 +229,7 @@ def _get_domain_type(self, input: CoverageInput) -> DomainType: | Domain Type | Axes Produced | | --- | --- | -| Grid | `x: CompactAxis(start=west, stop=east, num=w)`, `y: CompactAxis(start=north, stop=south, num=h)` | +| Grid | `x`/`y` `CompactAxis` of cell *centers*, inset half a cell from the bounds edges: `x` runs `west + dx/2 .. east - dx/2` (`dx = (east-west)/w`), `y` runs `north + dy/2 .. south - dy/2` (`dy = (south-north)/h`) | | Point / PointSeries | `x: ValuesAxis[float]`, `y: ValuesAxis[float]`, optionally `z`, optionally `t` | | MultiPoint | `composite: ValuesAxis[Tuple]` | | Polygon / PolygonSeries | `composite: ValuesAxis` with polygon rings, optionally `t` | diff --git a/src/titiler_covjson/modeler.py b/src/titiler_covjson/modeler.py index 5e61b63..b261fd8 100644 --- a/src/titiler_covjson/modeler.py +++ b/src/titiler_covjson/modeler.py @@ -76,12 +76,13 @@ def to_coverage(coverage_input: CoverageInput) -> Coverage: >>> cov.domain.domainType.value 'Grid' + Axes carry cell *centers* (inset half a cell from the bounds edges): ``x`` runs west->east and ``y`` north->south (raster row 0 is north): >>> cov.domain.axes.x.start, cov.domain.axes.x.stop, cov.domain.axes.x.num - (-10.0, 10.0, 2) + (-5.0, 5.0, 2) >>> cov.domain.axes.y.start, cov.domain.axes.y.stop, cov.domain.axes.y.num - (5.0, -5.0, 2) + (2.5, -2.5, 2) Each band becomes a parameter (here with its UCUM unit resolved) and a matching range whose axes line up with the grid: @@ -120,22 +121,25 @@ def to_coverage(coverage_input: CoverageInput) -> Coverage: def _compact_axis(first: float, last: float, num: int) -> CompactAxis: - """Build a CompactAxis spanning ``first``..``last`` over ``num`` cells. + """Build a CompactAxis of cell centers spanning the ``first``..``last`` edges. + + A CompactAxis describes the coordinates at which cells are defined -- the cell + *centers* -- whereas ``first``/``last`` are the outer bounds *edges*. The + centers are inset from the edges by half a cell, so for ``num`` cells spanning + ``first``..``last`` the axis runs ``first + dx/2`` .. ``last - dx/2`` where + ``dx = (last - first) / num``. Args: - first: Coordinate of the first cell (e.g., the west or north edge). - last: Coordinate of the last cell (e.g., the east or south edge). + first: Outer edge of the first cell (e.g., the west or north bound). + last: Outer edge of the last cell (e.g., the east or south bound). num: Number of cells along the axis. Returns: - CompactAxis: The axis. When ``num == 1`` the endpoints collapse to the - midpoint, since CompactAxis requires ``start == stop`` for a single cell. + CompactAxis: The axis of cell centers. When ``num == 1`` both centers + coincide at the bounds midpoint, satisfying ``start == stop``. """ - if num == 1: - midpoint = (first + last) / 2 - return CompactAxis(start=midpoint, stop=midpoint, num=1) - - return CompactAxis(start=first, stop=last, num=num) + half_cell = (last - first) / (2 * num) + return CompactAxis(start=first + half_cell, stop=last - half_cell, num=num) def _create_grid_domain(coverage_input: CoverageInput) -> Domain: @@ -150,10 +154,10 @@ def _create_grid_domain(coverage_input: CoverageInput) -> Domain: west, south, east, north = coverage_input.bounds height, width = coverage_input.data.shape[-2:] - # CompactAxis describes a regular axis by its endpoints and cell count. - # x runs west->east; y runs north->south to match raster row order - # (row 0 is the north edge). Endpoints use the bounds edges; pixel-center - # offsets are a possible later refinement. + # CompactAxis describes a regular axis by its cell-center endpoints and cell + # count. x runs west->east; y runs north->south to match raster row order + # (row 0 is the north edge). _compact_axis insets the bounds edges by half a + # cell to yield the centers. return Domain( domainType=DomainType.grid, diff --git a/tests/test_modeler.py b/tests/test_modeler.py index 8b5a75e..402ff82 100644 --- a/tests/test_modeler.py +++ b/tests/test_modeler.py @@ -86,15 +86,18 @@ def test_single_band_float_grid_is_schema_valid(self) -> None: assert isinstance(cov, Coverage) assert cov.domain.domainType == DomainType.grid - # x spans west..east over `width` columns; y spans north..south over - # `height` rows (raster row 0 is the north edge). + # Axes carry cell *centers*, inset half a cell from the bounds edges: + # x runs west..east over `width` columns, y runs north..south over + # `height` rows (raster row 0 is the north edge). Bounds are + # -10..10 (x) over 2 cols -> centers -5, 5; 5..-5 (y) over 2 rows -> + # centers 2.5, -2.5. assert isinstance(cov.domain.axes.x, CompactAxis) - assert cov.domain.axes.x.start == -10.0 - assert cov.domain.axes.x.stop == 10.0 + assert cov.domain.axes.x.start == -5.0 + assert cov.domain.axes.x.stop == 5.0 assert cov.domain.axes.x.num == 2 assert isinstance(cov.domain.axes.y, CompactAxis) - assert cov.domain.axes.y.start == 5.0 - assert cov.domain.axes.y.stop == -5.0 + assert cov.domain.axes.y.start == 2.5 + assert cov.domain.axes.y.stop == -2.5 assert cov.domain.axes.y.num == 2 assert set(cov.ranges) == {"b1"} @@ -122,11 +125,11 @@ def test_single_cell_axis_collapses_to_midpoint( axis: str, expected: tuple[float, float, int], ) -> None: - """A 1-cell axis collapses to its bounds midpoint. + """A 1-cell axis's single center is the bounds midpoint. - CompactAxis requires ``start == stop`` when ``num == 1``, so a single - row or column cannot use the edge endpoints; it uses their midpoint - (here 0.0, the centre of the symmetric -10..10 / -5..5 bounds). + With one cell the center sits half a cell in from each edge, i.e., at + the bounds midpoint, so ``start == stop`` (here 0.0, the centre of the + symmetric -10..10 / -5..5 bounds). """ data = _masked(np.zeros(shape, dtype="float32")) cov = to_coverage(_grid_input(data))