From 2584f0e0996b28b8972582f9dbe1744019bff73e Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 2 Jul 2026 00:30:37 -0400 Subject: [PATCH 1/2] Add asv benchmark coverage for geotiff read/write paths geotiff had no asv benchmark, so open_geotiff and to_geotiff had no regression coverage even though the module has dask and GPU backends. This adds benchmarks/benchmarks/geotiff.py. What it covers: - WriteGeoTIFF: to_geotiff on numpy, dask (the tile-row streaming writer), and cupy (the GPU writer) - WriteCOG: the cog=True overview-pyramid path on numpy and cupy - ReadGeoTIFF: open_geotiff decode on numpy and cupy (GPU decoder) - ReadGeoTIFFChunked: open_geotiff(chunks=) dask-backed read, materialized Each class writes a small temp GeoTIFF in setup and deletes it in teardown, so nothing depends on network fixtures. Two sizes, 512 and 2048, keep CI time down while still showing how each path scales. Benchmark-only, no source changes. I ran every class directly on a GPU host to confirm it works; the cupy paths ran for real, not skipped. --- .claude/sweep-benchmarks-state.csv | 2 + benchmarks/benchmarks/geotiff.py | 157 +++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 .claude/sweep-benchmarks-state.csv create mode 100644 benchmarks/benchmarks/geotiff.py diff --git a/.claude/sweep-benchmarks-state.csv b/.claude/sweep-benchmarks-state.csv new file mode 100644 index 000000000..a54e35fa6 --- /dev/null +++ b/.claude/sweep-benchmarks-state.csv @@ -0,0 +1,2 @@ +module,last_inspected,issue,severity_max,categories_found,notes +geotiff,2026-07-02,,HIGH,1;2,"No benchmark existed for geotiff; open_geotiff/to_geotiff had zero asv coverage across numpy/dask/cupy. Added benchmarks/benchmarks/geotiff.py: WriteGeoTIFF (numpy/dask/cupy streaming), WriteCOG (numpy/cupy overview pyramid), ReadGeoTIFF (numpy/cupy decode), ReadGeoTIFFChunked (dask). All classes executed locally via direct call; cupy paths run on this GPU host. asv check discover fails suite-wide from an asv_runner + py3.14 metadata bug, unrelated to this file." diff --git a/benchmarks/benchmarks/geotiff.py b/benchmarks/benchmarks/geotiff.py new file mode 100644 index 000000000..b52260465 --- /dev/null +++ b/benchmarks/benchmarks/geotiff.py @@ -0,0 +1,157 @@ +import os +import shutil +import tempfile + +import numpy as np +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + +try: + import cupy + _has_cupy = True +except ImportError: + _has_cupy = False + +try: + import dask.array as _da + _has_dask = True +except ImportError: + _has_dask = False + + +def _make_dataarray(ny, nx, backend): + # Noisy float32 grid tagged with an EPSG CRS so the writer emits a + # georeferenced file. Valid backends: "numpy", "dask", "cupy". A GPU + # or dask backend that is unavailable raises NotImplementedError so + # asv skips the parameter combination instead of failing. + rng = np.random.default_rng(31607) + z = rng.normal(0.0, 25.0, (ny, nx)).astype(np.float32) + x = np.linspace(-180.0, 180.0, nx) + y = np.linspace(90.0, -90.0, ny) + + if backend == "numpy": + pass + elif backend == "cupy": + if not _has_cupy: + raise NotImplementedError("CuPy not available") + z = cupy.asarray(z) + elif backend == "dask": + if not _has_dask: + raise NotImplementedError("dask not available") + z = _da.from_array(z, chunks=(max(1, ny // 4), nx)) + else: + raise RuntimeError(f"Unrecognised backend {backend}") + + da = xr.DataArray(z, coords=dict(y=y, x=x), dims=["y", "x"]) + da.attrs["crs"] = 4326 + return da + + +# ------------------------------------------------------------------------- +# Write path (to_geotiff) +# +# numpy is the eager path; dask exercises the tile-row streaming writer; +# cupy exercises the GPU writer. All three are real, distinct code paths. +# ------------------------------------------------------------------------- + +class WriteGeoTIFF: + params = ([512, 2048], ["numpy", "dask", "cupy"]) + param_names = ("nx", "backend") + + def setup(self, nx, backend): + ny = nx // 2 + self.da = _make_dataarray(ny, nx, backend) + self.dir = tempfile.mkdtemp(prefix="asv_geotiff_write_") + self.path = os.path.join(self.dir, f"w_{nx}_{backend}.tif") + self.gpu = backend == "cupy" + + def teardown(self, nx, backend): + shutil.rmtree(getattr(self, "dir", ""), ignore_errors=True) + + def time_write_zstd(self, nx, backend): + to_geotiff(self.da, self.path, compression="zstd", gpu=self.gpu) + + +# ------------------------------------------------------------------------- +# COG write path (overview pyramid generation) +# +# cog=True materialises the array and builds an internal overview pyramid, +# a separate code path from the plain tiled writer above. dask is omitted +# because COG output materialises anyway. +# ------------------------------------------------------------------------- + +class WriteCOG: + params = ([512, 2048], ["numpy", "cupy"]) + param_names = ("nx", "backend") + + def setup(self, nx, backend): + ny = nx // 2 + self.da = _make_dataarray(ny, nx, backend) + self.dir = tempfile.mkdtemp(prefix="asv_geotiff_cog_") + self.path = os.path.join(self.dir, f"cog_{nx}_{backend}.tif") + self.gpu = backend == "cupy" + + def teardown(self, nx, backend): + shutil.rmtree(getattr(self, "dir", ""), ignore_errors=True) + + def time_write_cog(self, nx, backend): + to_geotiff(self.da, self.path, cog=True, + overview_levels=[2, 4, 8], overview_resampling="mean", + compression="zstd", gpu=self.gpu) + + +# ------------------------------------------------------------------------- +# Eager read path (open_geotiff) +# +# The file is written once in setup; the benchmark measures decode + +# array assembly. gpu=True routes through the GPU decoder. +# ------------------------------------------------------------------------- + +class ReadGeoTIFF: + params = ([512, 2048], ["numpy", "cupy"]) + param_names = ("nx", "backend") + + def setup(self, nx, backend): + if backend == "cupy" and not _has_cupy: + raise NotImplementedError("CuPy not available") + ny = nx // 2 + self.dir = tempfile.mkdtemp(prefix="asv_geotiff_read_") + self.path = os.path.join(self.dir, f"r_{nx}.tif") + to_geotiff(_make_dataarray(ny, nx, "numpy"), self.path, + compression="zstd", tiled=True, tile_size=256) + self.gpu = backend == "cupy" + + def teardown(self, nx, backend): + shutil.rmtree(getattr(self, "dir", ""), ignore_errors=True) + + def time_read(self, nx, backend): + open_geotiff(self.path, gpu=self.gpu) + + +# ------------------------------------------------------------------------- +# Chunked (dask-backed) read path +# +# open_geotiff(chunks=) builds a lazy dask-backed DataArray; the benchmark +# materialises it to measure the chunked-read backend, not just graph build. +# ------------------------------------------------------------------------- + +class ReadGeoTIFFChunked: + params = ([512, 2048],) + param_names = ("nx",) + + def setup(self, nx): + if not _has_dask: + raise NotImplementedError("dask not available") + ny = nx // 2 + self.dir = tempfile.mkdtemp(prefix="asv_geotiff_readchunk_") + self.path = os.path.join(self.dir, f"rc_{nx}.tif") + to_geotiff(_make_dataarray(ny, nx, "numpy"), self.path, + compression="zstd", tiled=True, tile_size=256) + self.chunks = 256 + + def teardown(self, nx): + shutil.rmtree(getattr(self, "dir", ""), ignore_errors=True) + + def time_read_chunked(self, nx): + open_geotiff(self.path, chunks=self.chunks).data.compute() From b3c3854dd3b272718d10e29f8194c8956f00c33d Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 2 Jul 2026 00:30:59 -0400 Subject: [PATCH 2/2] Record PR #3603 in benchmark sweep state for geotiff --- .claude/sweep-benchmarks-state.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/sweep-benchmarks-state.csv b/.claude/sweep-benchmarks-state.csv index a54e35fa6..17581263b 100644 --- a/.claude/sweep-benchmarks-state.csv +++ b/.claude/sweep-benchmarks-state.csv @@ -1,2 +1,2 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-07-02,,HIGH,1;2,"No benchmark existed for geotiff; open_geotiff/to_geotiff had zero asv coverage across numpy/dask/cupy. Added benchmarks/benchmarks/geotiff.py: WriteGeoTIFF (numpy/dask/cupy streaming), WriteCOG (numpy/cupy overview pyramid), ReadGeoTIFF (numpy/cupy decode), ReadGeoTIFFChunked (dask). All classes executed locally via direct call; cupy paths run on this GPU host. asv check discover fails suite-wide from an asv_runner + py3.14 metadata bug, unrelated to this file." +geotiff,2026-07-02,3603,HIGH,1;2,"No benchmark existed for geotiff; open_geotiff/to_geotiff had zero asv coverage across numpy/dask/cupy. Added benchmarks/benchmarks/geotiff.py: WriteGeoTIFF (numpy/dask/cupy streaming), WriteCOG (numpy/cupy overview pyramid), ReadGeoTIFF (numpy/cupy decode), ReadGeoTIFFChunked (dask). All classes executed locally via direct call; cupy paths run on this GPU host. asv check discover fails suite-wide from an asv_runner + py3.14 metadata bug, unrelated to this file."