Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .claude/sweep-benchmarks-state.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-07-02,3603,HIGH,1;2,"No benchmark existed for geotiff; open_geotiff/to_geotiff had zero asv coverage across numpy/dask/cupy. Added benchmarks/benchmarks/geotiff.py: WriteGeoTIFF (numpy/dask/cupy streaming), WriteCOG (numpy/cupy overview pyramid), ReadGeoTIFF (numpy/cupy decode), ReadGeoTIFFChunked (dask). All classes executed locally via direct call; cupy paths run on this GPU host. asv check discover fails suite-wide from an asv_runner + py3.14 metadata bug, unrelated to this file."
157 changes: 157 additions & 0 deletions benchmarks/benchmarks/geotiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import os
import shutil
import tempfile

import numpy as np
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff

try:
import cupy
_has_cupy = True
except ImportError:
_has_cupy = False

try:
import dask.array as _da
_has_dask = True
except ImportError:
_has_dask = False


def _make_dataarray(ny, nx, backend):
# Noisy float32 grid tagged with an EPSG CRS so the writer emits a
# georeferenced file. Valid backends: "numpy", "dask", "cupy". A GPU
# or dask backend that is unavailable raises NotImplementedError so
# asv skips the parameter combination instead of failing.
rng = np.random.default_rng(31607)
z = rng.normal(0.0, 25.0, (ny, nx)).astype(np.float32)
x = np.linspace(-180.0, 180.0, nx)
y = np.linspace(90.0, -90.0, ny)

if backend == "numpy":
pass
elif backend == "cupy":
if not _has_cupy:
raise NotImplementedError("CuPy not available")
z = cupy.asarray(z)
elif backend == "dask":
if not _has_dask:
raise NotImplementedError("dask not available")
z = _da.from_array(z, chunks=(max(1, ny // 4), nx))
else:
raise RuntimeError(f"Unrecognised backend {backend}")

da = xr.DataArray(z, coords=dict(y=y, x=x), dims=["y", "x"])
da.attrs["crs"] = 4326
return da


# -------------------------------------------------------------------------
# Write path (to_geotiff)
#
# numpy is the eager path; dask exercises the tile-row streaming writer;
# cupy exercises the GPU writer. All three are real, distinct code paths.
# -------------------------------------------------------------------------

class WriteGeoTIFF:
params = ([512, 2048], ["numpy", "dask", "cupy"])
param_names = ("nx", "backend")

def setup(self, nx, backend):
ny = nx // 2
self.da = _make_dataarray(ny, nx, backend)
self.dir = tempfile.mkdtemp(prefix="asv_geotiff_write_")
self.path = os.path.join(self.dir, f"w_{nx}_{backend}.tif")
self.gpu = backend == "cupy"

def teardown(self, nx, backend):
shutil.rmtree(getattr(self, "dir", ""), ignore_errors=True)

def time_write_zstd(self, nx, backend):
to_geotiff(self.da, self.path, compression="zstd", gpu=self.gpu)


# -------------------------------------------------------------------------
# COG write path (overview pyramid generation)
#
# cog=True materialises the array and builds an internal overview pyramid,
# a separate code path from the plain tiled writer above. dask is omitted
# because COG output materialises anyway.
# -------------------------------------------------------------------------

class WriteCOG:
params = ([512, 2048], ["numpy", "cupy"])
param_names = ("nx", "backend")

def setup(self, nx, backend):
ny = nx // 2
self.da = _make_dataarray(ny, nx, backend)
self.dir = tempfile.mkdtemp(prefix="asv_geotiff_cog_")
self.path = os.path.join(self.dir, f"cog_{nx}_{backend}.tif")
self.gpu = backend == "cupy"

def teardown(self, nx, backend):
shutil.rmtree(getattr(self, "dir", ""), ignore_errors=True)

def time_write_cog(self, nx, backend):
to_geotiff(self.da, self.path, cog=True,
overview_levels=[2, 4, 8], overview_resampling="mean",
compression="zstd", gpu=self.gpu)


# -------------------------------------------------------------------------
# Eager read path (open_geotiff)
#
# The file is written once in setup; the benchmark measures decode +
# array assembly. gpu=True routes through the GPU decoder.
# -------------------------------------------------------------------------

class ReadGeoTIFF:
params = ([512, 2048], ["numpy", "cupy"])
param_names = ("nx", "backend")

def setup(self, nx, backend):
if backend == "cupy" and not _has_cupy:
raise NotImplementedError("CuPy not available")
ny = nx // 2
self.dir = tempfile.mkdtemp(prefix="asv_geotiff_read_")
self.path = os.path.join(self.dir, f"r_{nx}.tif")
to_geotiff(_make_dataarray(ny, nx, "numpy"), self.path,
compression="zstd", tiled=True, tile_size=256)
self.gpu = backend == "cupy"

def teardown(self, nx, backend):
shutil.rmtree(getattr(self, "dir", ""), ignore_errors=True)

def time_read(self, nx, backend):
open_geotiff(self.path, gpu=self.gpu)


# -------------------------------------------------------------------------
# Chunked (dask-backed) read path
#
# open_geotiff(chunks=) builds a lazy dask-backed DataArray; the benchmark
# materialises it to measure the chunked-read backend, not just graph build.
# -------------------------------------------------------------------------

class ReadGeoTIFFChunked:
params = ([512, 2048],)
param_names = ("nx",)

def setup(self, nx):
if not _has_dask:
raise NotImplementedError("dask not available")
ny = nx // 2
self.dir = tempfile.mkdtemp(prefix="asv_geotiff_readchunk_")
self.path = os.path.join(self.dir, f"rc_{nx}.tif")
to_geotiff(_make_dataarray(ny, nx, "numpy"), self.path,
compression="zstd", tiled=True, tile_size=256)
self.chunks = 256

def teardown(self, nx):
shutil.rmtree(getattr(self, "dir", ""), ignore_errors=True)

def time_read_chunked(self, nx):
open_geotiff(self.path, chunks=self.chunks).data.compute()
Loading