From 961f0137cbf6e222951fb4c886220886c510531e Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 27 Jun 2026 08:02:14 -0700 Subject: [PATCH 1/2] from_template: lazy dask grids skip the cell cap when chunks given (#3554) Supplying chunks now promotes an eager backend to its dask variant (numpy->dask+numpy, cupy->dask+cupy) and returns a lazy grid. The 500M-cell cap applies only to the eager backends, which materialize the whole array; a dask grid is a lazy chunk graph, so the cap is skipped. The eager cap is unchanged. --- xrspatial/templates.py | 34 +++++++++++++++++------ xrspatial/tests/test_templates.py | 46 +++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/xrspatial/templates.py b/xrspatial/templates.py index 18d818038..153833c0a 100644 --- a/xrspatial/templates.py +++ b/xrspatial/templates.py @@ -22,8 +22,10 @@ _PRESERVE_OPTIONS = ("area", "shape") # Guard against a stray fine resolution allocating an enormous array. Applied to -# every backend, including dask: a lazy grid this large is almost always a typo, -# and the uniform cap keeps the error identical across backends. +# the eager backends (numpy, cupy) only: those materialize the whole grid up +# front, so a huge shape is an immediate out-of-memory risk. A dask grid is +# lazy (da.full builds a chunk graph, never the array), so the cap is skipped +# there -- a large chunked study area is a legitimate workflow. _MAX_CELLS = 500_000_000 @@ -254,7 +256,7 @@ def from_template(name: str, resolution: Optional[Union[float, Tuple[float, float]]] = None, *, preserve: Optional[str] = None, backend: str = "numpy", fill: float = np.nan, - chunks: Union[int, str, Tuple] = "auto") -> xr.DataArray: + chunks: Optional[Union[int, str, Tuple]] = None) -> xr.DataArray: """Create an empty DataArray for a common study area. The returned raster is NaN-filled and obeys the xarray-spatial array @@ -301,11 +303,17 @@ def from_template(name: str, the chosen EPSG int. Requires pyproj. backend : str, default='numpy' Array backend: ``'numpy'``, ``'dask+numpy'`` (alias ``'dask'``), - ``'cupy'``, or ``'dask+cupy'``. + ``'cupy'``, or ``'dask+cupy'``. The eager backends (``'numpy'``, + ``'cupy'``) materialize the whole grid, so they are subject to a + 500-million-cell cap; the dask backends build a lazy chunk graph and + are not. fill : float, default=numpy.nan Value the grid is filled with. The dtype is always ``float32``. - chunks : int, str, or tuple, default='auto' - Dask chunk specification; only used by the dask backends. + chunks : int, str, or tuple, optional + Dask chunk specification. Supplying it returns a lazy, chunked grid: + an eager backend is promoted to its dask variant (``'numpy'`` to + ``'dask+numpy'``, ``'cupy'`` to ``'dask+cupy'``), and the cell cap no + longer applies. When omitted, the dask backends use ``'auto'``. Returns ------- @@ -379,12 +387,19 @@ def from_template(name: str, width = max(1, int(round((right - left) / res_x))) height = max(1, int(round((top - bottom) / res_y))) + # Supplying `chunks` signals intent for a lazy, chunked grid: promote an + # eager backend to its dask variant so the result never materializes. + backend = backend.lower() + if chunks is not None and backend in ("numpy", "cupy"): + backend = "dask+numpy" if backend == "numpy" else "dask+cupy" + is_dask = backend in ("dask", "dask+numpy", "dask+cupy") + n_cells = width * height - if n_cells > _MAX_CELLS: + if not is_dask and n_cells > _MAX_CELLS: raise ValueError( f"resolution {(res_x, res_y)} produces a {height} x {width} grid " f"({n_cells:,} cells), exceeding the {_MAX_CELLS:,}-cell limit. " - f"Use a coarser resolution." + f"Use a coarser resolution, or pass chunks=... for a lazy dask grid." ) # Honor the requested resolution exactly: anchor the lower-left corner and @@ -396,7 +411,8 @@ def from_template(name: str, top = bottom + height * res_y ys, xs = _make_output_coords((left, bottom, right, top), (height, width)) - data = _make_data((height, width), fill, backend, chunks) + data = _make_data((height, width), fill, backend, + "auto" if chunks is None else chunks) attrs = {"res": (res_x, res_y), "crs": crs} attrs.update(_cf_crs_attrs(crs)) diff --git a/xrspatial/tests/test_templates.py b/xrspatial/tests/test_templates.py index c787390ef..1c72be3d4 100644 --- a/xrspatial/tests/test_templates.py +++ b/xrspatial/tests/test_templates.py @@ -367,6 +367,52 @@ def test_over_fine_resolution_raises(): from_template("conus", resolution=1) +# new_england at 10 m is ~6.9e9 cells, well past the eager cap, but only ~26k +# chunks at 512 -- a lazy grid that builds and indexes instantly. This is the +# repro from the issue. +_OVER_CAP = dict(name="new_england", resolution=10) + + +@dask_array_available +def test_chunks_promotes_eager_to_lazy_dask(): + import dask.array as da + from xrspatial.templates import _MAX_CELLS + # Supplying chunks promotes the default numpy backend to dask and skips the + # cap, returning a lazy array that never materializes the full shape. + agg = from_template(_OVER_CAP["name"], resolution=_OVER_CAP["resolution"], + chunks=512) + assert isinstance(agg.data, da.Array) + assert agg.size > _MAX_CELLS + assert agg.data.chunksize == (512, 512) + assert agg.attrs["res"] == (10.0, 10.0) + # computing a single cell stays cheap and yields the NaN fill + assert np.isnan(float(agg.data[0, 0].compute())) + + +@dask_array_available +def test_dask_backend_skips_cell_cap_without_chunks(): + import dask.array as da + from xrspatial.templates import _MAX_CELLS + # An explicit dask backend is lazy too, so the cap is skipped even when no + # chunks are passed (the dask path falls back to 'auto'). + agg = from_template(_OVER_CAP["name"], resolution=_OVER_CAP["resolution"], + backend="dask+numpy") + assert isinstance(agg.data, da.Array) + assert agg.size > _MAX_CELLS + + +@cuda_and_cupy_available +@dask_array_available +def test_chunks_promotes_cupy_to_dask_cupy(): + import cupy + import dask.array as da + agg = from_template(_OVER_CAP["name"], resolution=_OVER_CAP["resolution"], + backend="cupy", chunks=512) + assert isinstance(agg.data, da.Array) + block = agg.data.blocks[0, 0].compute() + assert isinstance(block, cupy.ndarray) + + def test_single_pixel_grid(): # a resolution coarser than the whole study-area box clamps width and height # to the max(1, ...) floor, giving a 1x1 grid that still obeys the contract. From 587dfe2589b733b986d8f8d589a07a22531be3fa Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 27 Jun 2026 08:05:41 -0700 Subject: [PATCH 2/2] Address review: document dask task-graph footgun, add chunks example (#3554) Docstring caution that a very fine resolution on a dask backend builds a large task graph, plus an Examples entry for the lazy chunked path. The actual chunk-count guard is deferred to #3557. --- xrspatial/templates.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/xrspatial/templates.py b/xrspatial/templates.py index 153833c0a..a380351c2 100644 --- a/xrspatial/templates.py +++ b/xrspatial/templates.py @@ -313,7 +313,11 @@ def from_template(name: str, Dask chunk specification. Supplying it returns a lazy, chunked grid: an eager backend is promoted to its dask variant (``'numpy'`` to ``'dask+numpy'``, ``'cupy'`` to ``'dask+cupy'``), and the cell cap no - longer applies. When omitted, the dask backends use ``'auto'``. + longer applies. When omitted, the dask backends use ``'auto'``. The + data stays lazy, but a very fine resolution still builds one task per + chunk, so an extreme shape with small chunks can make a task graph + large enough to bog down the client; coarsen the resolution or use + larger chunks if that happens. Returns ------- @@ -359,6 +363,10 @@ def from_template(name: str, 32630 >>> from_template("FRA", preserve="area").attrs["crs"] # Equal Earth 8857 + >>> # passing chunks returns a lazy dask grid, exempt from the cell cap + >>> agg = from_template("new_england", resolution=10, chunks=512) + >>> type(agg.data).__name__ + 'Array' """ spec = _resolve(name) key = spec["key"]