From 40bcce4f181b0c5f0e94191e0e1386442a113ad6 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 1 Jul 2026 22:57:13 -0400 Subject: [PATCH 1/4] Remove the previous file's PAM sidecar when to_geotiff overwrites a path (#3595) --- xrspatial/geotiff/_writers/eager.py | 24 ++- .../test_stale_sidecar_overwrite_3595.py | 193 ++++++++++++++++++ 2 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 xrspatial/geotiff/tests/write/test_stale_sidecar_overwrite_3595.py diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index dfa148cbe..da5bc506b 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -418,7 +418,12 @@ def to_geotiff(data: xr.DataArray | np.ndarray, data; for a dask source that means reading the graph once more (see ``color_ramp_range`` to skip it). Ignored when ``pack=True``, whose on-disk packed values would not match a ramp built from the logical - values. + values. Every string-path write refreshes the PAM ``.aux.xml``: + a sidecar left by a previous write at the same path is removed + and re-created only when this write carries its own categories + or statistics (#3595). A pre-existing ``.qml`` is kept unless + ``color_ramp`` replaces it -- QGIS treats it as user styling + that persists across data updates. color_ramp_range : tuple of (float, float) or None, default None [advanced] Explicit ``(min, max)`` for the ``color_ramp`` stretch. Skips the statistics reduction -- useful for large dask graphs -- so @@ -507,6 +512,23 @@ def to_geotiff(data: xr.DataArray | np.ndarray, else _resolve_nodata_attr(data.attrs)) def _write_sidecars(): + if isinstance(path, str): + # A pre-existing PAM sidecar describes whatever file this write + # just replaced, and ``open_geotiff`` merges it back onto attrs, + # so leaving it behind hands the old file's categories / + # statistics to the new pixels (#3595). GDAL's GTiff driver + # removes the PAM sidecar when creating a dataset over an + # existing path (``GDALDriver::QuietDelete``); match that. The + # ``write_*_sidecar`` calls below re-create it when this write + # carries its own categories or statistics. The ``.qml`` style + # sidecar is deliberately left alone: QGIS treats it as user + # styling that persists across data updates, so only a new + # ``color_ramp=`` write replaces it. + from .._pam import sidecar_path + try: + os.remove(sidecar_path(path)) + except OSError: + pass if _cat_names: from .._pam import write_pam_sidecar write_pam_sidecar(path, _cat_names, _cat_colors) diff --git a/xrspatial/geotiff/tests/write/test_stale_sidecar_overwrite_3595.py b/xrspatial/geotiff/tests/write/test_stale_sidecar_overwrite_3595.py new file mode 100644 index 000000000..73e463e04 --- /dev/null +++ b/xrspatial/geotiff/tests/write/test_stale_sidecar_overwrite_3595.py @@ -0,0 +1,193 @@ +"""Overwriting a GeoTIFF refreshes its PAM sidecar (#3595). + +``to_geotiff`` writes a PAM ``.aux.xml`` sidecar for categorical +rasters (#3483) and for ``color_ramp=`` statistics (#3537), and +``open_geotiff`` merges that sidecar back onto attrs. Before #3595 a write +that produced no sidecar of its own left a previous write's ``.aux.xml`` +on disk, so re-reads attached the overwritten file's ``category_names`` / +``category_colors`` to the new pixels and GDAL/QGIS stretched the new data +with the old statistics. The writer now removes a pre-existing PAM sidecar +on every successful string-path write (matching GDAL's +``GDALDriver::QuietDelete`` behaviour) and re-creates it only when the new +write carries its own categories or statistics. + +The QGIS ``.qml`` style sidecar is deliberately NOT removed: QGIS treats it +as user styling that persists across data updates, so only a new +``color_ramp=`` write replaces it. +""" +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + +from .._helpers.markers import requires_gpu + +pytest.importorskip("tifffile") + +_NAMES = ["water", "forest", "urban"] + + +def _plain_da(dtype="float32"): + """A continuous 2D DataArray with georef attrs and no categories.""" + data = np.arange(64, dtype=dtype).reshape(8, 8) + return xr.DataArray( + data, + dims=("y", "x"), + coords={"y": 8.0 - np.arange(8) - 0.5, "x": np.arange(8) + 0.5}, + attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 8.0), "crs": 4326}, + ) + + +def _categorical_da(): + """A uint8 categorical DataArray carrying category attrs.""" + da = _plain_da(dtype="uint8") + da.data[:] = da.data % 3 + da.attrs["category_names"] = list(_NAMES) + return da + + +def test_plain_overwrite_removes_categorical_sidecar(tmp_path): + """A plain overwrite drops the previous write's RAT sidecar.""" + path = str(tmp_path / "overwrite_cat_3595.tif") + to_geotiff(_categorical_da(), path) + assert os.path.exists(path + ".aux.xml") + + to_geotiff(_plain_da(), path) + + assert not os.path.exists(path + ".aux.xml") + back = open_geotiff(path) + assert "category_names" not in back.attrs + assert "category_colors" not in back.attrs + + +def test_plain_overwrite_removes_stats_sidecar_keeps_qml(tmp_path): + """A plain overwrite drops stale statistics but keeps the .qml style.""" + path = str(tmp_path / "overwrite_ramp_3595.tif") + qml = os.path.splitext(path)[0] + ".qml" + to_geotiff(_plain_da(), path, color_ramp="viridis") + assert os.path.exists(path + ".aux.xml") + assert os.path.exists(qml) + + to_geotiff(_plain_da() * 1000.0, path) + + assert not os.path.exists(path + ".aux.xml") + # QGIS user styling persists across data updates by design. + assert os.path.exists(qml) + + +def test_categorical_overwrite_replaces_stats_sidecar(tmp_path): + """A categorical overwrite replaces old statistics with the new RAT.""" + path = str(tmp_path / "ramp_then_cat_3595.tif") + to_geotiff(_plain_da(), path, color_ramp="viridis") + + to_geotiff(_categorical_da(), path) + + back = open_geotiff(path) + assert back.attrs["category_names"] == _NAMES + with open(path + ".aux.xml") as fh: + assert "STATISTICS_MINIMUM" not in fh.read() + + +def test_color_ramp_overwrite_replaces_categorical_sidecar(tmp_path): + """A color_ramp overwrite replaces the old RAT with fresh statistics.""" + path = str(tmp_path / "cat_then_ramp_3595.tif") + to_geotiff(_categorical_da(), path) + + to_geotiff(_plain_da(), path, color_ramp="viridis") + + back = open_geotiff(path) + assert "category_names" not in back.attrs + with open(path + ".aux.xml") as fh: + text = fh.read() + assert "STATISTICS_MINIMUM" in text + assert "CategoryNames" not in text + + +def test_multiband_color_ramp_overwrite_removes_sidecar(tmp_path): + """Symbology no-ops on multiband, but the stale sidecar still goes away.""" + path = str(tmp_path / "cat_then_multiband_3595.tif") + to_geotiff(_categorical_da(), path) + + band = _plain_da() + multi = xr.concat([band, band + 1.0], dim="band") + multi = multi.assign_coords(band=[1, 2]) + multi.attrs = dict(band.attrs) + to_geotiff(multi, path, color_ramp="viridis") + + assert not os.path.exists(path + ".aux.xml") + assert "category_names" not in open_geotiff(path).attrs + + +def test_foreign_sidecar_removed_on_fresh_write(tmp_path): + """A pre-existing sidecar at a fresh output path is removed too.""" + path = str(tmp_path / "fresh_3595.tif") + with open(path + ".aux.xml", "w") as fh: + fh.write("" + "stale" + "\n") + + to_geotiff(_plain_da(), path) + + assert not os.path.exists(path + ".aux.xml") + assert "category_names" not in open_geotiff(path).attrs + + +def test_bare_ndarray_overwrite_removes_sidecar(tmp_path): + """An ndarray (non-DataArray) overwrite also refreshes the sidecar.""" + path = str(tmp_path / "ndarray_3595.tif") + to_geotiff(_categorical_da(), path) + + to_geotiff(np.arange(64, dtype="float32").reshape(8, 8), path) + + assert not os.path.exists(path + ".aux.xml") + + +def test_dask_overwrite_removes_sidecar(tmp_path): + """The dask streaming write path refreshes the sidecar.""" + import dask.array as dsa + + path = str(tmp_path / "dask_3595.tif") + to_geotiff(_categorical_da(), path) + + plain = _plain_da() + plain = plain.copy(data=dsa.from_array(plain.data, chunks=(4, 4))) + to_geotiff(plain, path) + + assert not os.path.exists(path + ".aux.xml") + + +def test_vrt_write_removes_stale_sidecar(tmp_path): + """The VRT write path refreshes the sidecar next to the .vrt index. + + The VRT writer refuses to overwrite an existing tiles directory, so the + stale sidecar here comes from a foreign/previous file at the same path + rather than a same-path overwrite. + """ + path = str(tmp_path / "mosaic_3595.vrt") + with open(path + ".aux.xml", "w") as fh: + fh.write("" + "stale" + "\n") + + to_geotiff(_plain_da(), path) + + assert not os.path.exists(path + ".aux.xml") + assert "category_names" not in open_geotiff(path).attrs + + +@requires_gpu +def test_gpu_overwrite_removes_sidecar(tmp_path): + """The GPU (nvCOMP) write path refreshes the sidecar.""" + import cupy + + path = str(tmp_path / "gpu_3595.tif") + to_geotiff(_categorical_da(), path) + + plain = _plain_da() + plain = plain.copy(data=cupy.asarray(plain.data)) + to_geotiff(plain, path, gpu=True) + + assert not os.path.exists(path + ".aux.xml") From 6c6325f582a8f5a85767a70bbfff487c371d2a0e Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 1 Jul 2026 22:57:13 -0400 Subject: [PATCH 2/4] Update metadata-sweep state for geotiff (#3595) --- .claude/sweep-metadata-state.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index c38760851..de2b9f7b0 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -5,7 +5,7 @@ contour,2026-05-29,2700,HIGH,1;5,"Audited 2026-05-29 (agent-ab7fff484a8f57de2 wo corridor,2026-06-22,3446,HIGH,1;5,"Audited 2026-06-22 (agent-a8b2674b815bdfa3f worktree, branch deep-sweep-metadata-corridor-2026-06-22). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live end-to-end for least_cost_corridor across single/threshold/relative/unreachable/pairwise paths. Cat 2 coords (x/y values + float64 dtype) and Cat 3 dims (y,x) preserved on every backend: they flow through cost_distance (coords=raster.coords, dims=raster.dims) and survive xarray's binary intersection. NEW HIGH finding #3446 (Cat 1 + Cat 5): the corridor is cd_a + cd_b where each cost-distance surface carries its SOURCE raster's attrs (cost_distance copies attrs from the source, not friction). xarray's default keep_attrs on binary + keeps only attrs present-and-equal in both operands, so when the source masks are plain marker rasters with no geo-attrs (the common case) the corridor came back with attrs=={} even though the friction surface that defines the grid had res/crs/transform/nodatavals; a downstream slope/clip on the corridor silently lost cellsize/CRS. Secondary Cat 5: .name was None whenever the two sources had different names (cost_distance renames each surface to its source .name; summing differently-named arrays drops the name). Fix (PR on this branch): non-precomputed path re-emits friction.attrs + friction.name on every output via new _apply_geo_metadata helper (single, threshold, all-NaN-unreachable, and pairwise-Dataset paths); precomputed path left on the existing source-derived behaviour since there is no friction to draw from. Only .attrs/.name set -- data values, coords, dims, dtype untouched, dask stays lazy (no compute). 10 new tests (test_corridor_inherits_friction_geo_attrs x4 backends, test_corridor_threshold_keeps_geo_attrs x4 backends, test_corridor_unreachable_keeps_geo_attrs, test_pairwise_inherits_friction_geo_attrs, test_precomputed_keeps_source_attrs_not_friction). Full corridor suite 43 passed. Cat 4 N/A: NaN-as-nodata is the library convention; corridor never reads attrs['nodatavals'] for masking. No CRITICAL/MEDIUM/LOW findings." cost_distance,2026-06-15,3344,MEDIUM,5,"Audited 2026-06-15 (agent-ad0b84e7f7b212360 worktree, branch deep-sweep-metadata-cost_distance-2026-06-15). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live end-to-end with a rich attrs set (res/crs/transform/nodatavals/_FillValue/units). Cat 1 attrs, Cat 2 coords (values + float64 dtype), and Cat 3 dims (y,x) all preserved and identical across the 4 backends -- public cost_distance() wraps with xr.DataArray(coords=raster.coords, dims=raster.dims, attrs=raster.attrs). NEW MEDIUM finding #3344 (Cat 5): the dask+numpy and dask+cupy backends leaked the internal dask graph name (_trim- from map_overlap, asarray- from the dask+cupy convert-back path) into result.name while numpy/cupy returned None; .name was a nondeterministic per-run token that breaks .to_dataset() variable keys and any name-keyed pipeline. Same .name-leak class as proximity #2723 and zonal #2611. Fix (PR #3349 on this branch): return result.rename(raster.name) -- a constructor name= kwarg does not override a named dask array, and name=None is treated as infer-from-data, so .rename() is required. supports_dataset path unaffected (keys by var_name, verified live). New parametrized regression test test_result_name_matches_input over 4 backends x {None, named}; full cost_distance suite 63 passed (post-merge with origin/main). LOW (documented, not fixed): output float32 uses NaN as the unreachable sentinel but input nodatavals/_FillValue (e.g. -9999) are carried through verbatim, so a downstream reader masks a value that never appears -- this is the library-wide attrs=raster.attrs convention shared by proximity/slope/aspect/focal, not a cost_distance-specific bug, so fixing it in isolation would diverge this module from every peer. No CRITICAL/HIGH findings." focal,2026-06-10,3217,MEDIUM,4;5,"Re-audited 2026-06-10 (agent-ad0d55a894c6abc60 worktree, branch deep-sweep-metadata-focal-2026-06-10). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live for mean, apply, focal_stats, hotspots. Cats 1-3 clean: attrs (res/crs/nodatavals/_FillValue/unit), coords (values, dtype, coord attrs), dims, .name, 3D per-band path, and hotspots unit=% all preserved and identical across the 4 backends. NEW MEDIUM finding #3217 (Cat 4 + Cat 5): (a) mean() hardcoded float32 on the GPU paths (_mean_cupy cupy.asarray(dtype=float32), _mean_dask_cupy astype(float32)) while numpy/dask+numpy returned float64 (mean() casts astype(float) before dispatch), so float64 input silently lost precision on cupy/dask+cupy; dask+cupy also advertised float64 (untyped meta) but computed float32. (b) apply()/focal_stats() dask paths passed untyped meta (np.array(()) / cupy.array(())) to map_overlap, so for float32/int input the lazy DataArray advertised float64 but computed the promoted float32 (#2805 typed the chunk fns but not the meta). Same class as aspect #2682 and proximity #2723. Fix: the mean() GPU dtype half landed on main first via duplicate issue #3214/PR #3221 (_promote_float contract: float dtypes preserved, ints->float32, GPU bit-exact vs CPU in float64); PR #3226 (branch deep-sweep-metadata-focal-2026-06-10-01) types every map_overlap meta with data.dtype and aligns tests to the _promote_float contract; 25 new parametrized regression tests (4 backends x 3 dtypes mean; dask backends x 3 dtypes apply/focal_stats; exact CPU/GPU parity). Full focal suite 258 passed. No other CRITICAL/HIGH/MEDIUM/LOW findings." -geotiff,2026-06-09,3116,HIGH,2;3,"Re-audited 2026-06-09 (agent-ae89ff94a64e3ee8f worktree, branch deep-sweep-metadata-geotiff-2026-06-09). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live. Focus: surfaces changed since the 2026-05-18 audit (unpack rename + GPU/dask+GPU support #3075, pack=True #3065/#3079, masked int->float promotion #2994, bbox= reads, rioxarray param alignment #2963, no-georef VRT coord synthesis #2824, GeoTransform omission #2971). Live probes: unpack attrs (scale_factor/add_offset/mask_and_scale_dtype/nodata/masked_nodata), masked=True promotion, default masked=False, bbox window+transform shift, multi-band band=N, dims/name/coords (incl. coord dtype) all identical across the 4 backends; nodata_pixels_present absent on dask paths is the documented lazy contract, not a bug. pack->unpack round trips verified on numpy/dask/gpu-write; pack of a cupy-backed read raises via the known cupy+xarray xp.astype incompat (see memory cupy_where_astype_incompat; dependency-pin fix, raises loudly, not a metadata bug). VRT reads (full/masked/window/bbox) and no-georef TIFF reads agree across the 4 backends. NEW HIGH finding #3116 (Cat 2+3): to_geotiff(non_georef_da, out.vrt, tile_size=N) wrote a corrupt index for arrays spanning >1 tile -- write_vrt derives placement from each source GeoTransform and non-georef tiles all carry the identity transform, so rasterX/YSize collapsed to one tile and every DstRect landed at the origin; reads silently returned a single tile (24x32 in -> 16x16 out). Gap left by #2966/#2971 (tests only covered one non-georef source). Fix: _write_vrt_tiled threads per-tile pixel offsets through _build_vrt -> write_vrt via internal dst_offsets kwarg; write_vrt refuses >1 all-non-georef sources without explicit placement and rejects dst_offsets alongside georeferenced sources. 18 new tests in tests/vrt/test_non_georef_placement_3116.py incl. 4-backend round trip, dask-backed and plain-ndarray writes, XML DstRect assertions, georef placement regression, and the write_vrt error contract. Full vrt suite 520 passed; write+round-trip suites 1292 passed." +geotiff,2026-07-01,3595,MEDIUM,1,"Re-audited 2026-07-01 (agent-adb0e639731d1209c worktree, branch deep-sweep-metadata-geotiff-2026-07-01). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live. Focus: surfaces changed since the 2026-06-09 audit -- symbology sidecars (#3538/#3546), categorical PAM sidecar backends (#3519), xarray engine (#3375/#3377/#3380), pack/nodata attr changes (#3277/#3325/#3128). Live probes all clean: 4-backend read parity (attrs/coords/dims/name/dtype incl. coord dtype), engine open_dataset vs open_geotiff parity (attrs/coords/values, chunks={}, masked=True, var name), color_ramp PAM stats + QML byte-identical across numpy/dask/cupy/dask+cupy write inputs (global dask stats via one fused dask.compute, not per-chunk), nodata excluded from stats, categorical sidecar attrs attach on all 4 read paths + engine, stats-PAM never fakes category attrs (thematic gate). #3128 64-bit sentinel eager fix verified merged. NEW MEDIUM finding #3595 (Cat 1): to_geotiff left the previous file's PAM .aux.xml behind when the new write emitted no sidecar, so open_geotiff attached the overwritten file's category_names/category_colors to the new pixels and GDAL/QGIS stretched with stale STATISTICS_*; GDAL avoids this via GDALDriver::QuietDelete. Fix on this branch: _write_sidecars removes a pre-existing .aux.xml on every successful string-path write (all 4 write paths: eager, dask streaming, GPU dispatch, VRT) before re-creating it; .qml deliberately kept (QGIS user styling persists across data updates; only a new color_ramp write replaces it); docstring documents the refresh. 10 new tests in tests/write/test_stale_sidecar_overwrite_3595.py (cat->plain, ramp->plain keeps qml, ramp->cat, cat->ramp, multiband-symbology no-op still removes, foreign sidecar on fresh path, bare ndarray, dask, VRT, GPU). Note: VRT writer refuses same-path overwrites (tiles-dir guard) so its stale case is foreign-sidecar only. Write suite 1213 passed, round-trip+attrs 63, rasterize-categorical+release-gates 182. No CRITICAL/HIGH/LOW findings." interpolate,2026-06-12,3288,MEDIUM,5,kriging K_inv-None fallback was numpy-backed on all backends and misnamed the variance raster; fixed via #3288. All 4 backends verified end-to-end on GPU host. LOW (documented only): template nodatavals/_FillValue copied verbatim while fill_value is the actual output sentinel; tests codify attrs==template.attrs mcda,2026-06-10,3147,HIGH,1,"constrain() dropped all attrs (res/crs/nodatavals) whenever exclude non-empty (xr.where takes attrs from scalar fill); fixed via attrs restore, tests for numpy/dask/dask+cupy. All other mcda funcs keep attrs/coords/dims on all 4 backends. Out-of-scope crashes noted for backend-parity: owa broken on cupy (numpy order-weights x cupy) and on dask (da.sort does not exist); sensitivity monte_carlo crashes on cupy/dask+cupy (.values on cupy); xr.where compute on cupy/dask+cupy hits known cupy13.6/xarray2025.12 incompat." multispectral,2026-06-20,3429,MEDIUM,2;3,"true_color() hardcoded y/x dims + dropped extra coords; fixed PR #3434 (all 4 backends verified, CUDA available)" From 5fed44496a8e7fcf03b4c5a9024486aab382391a Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 1 Jul 2026 23:01:24 -0400 Subject: [PATCH 3/4] Address review: failed-write sidecar test, top-level docstring note, comment on OSError swallow (#3595) --- xrspatial/geotiff/_writers/eager.py | 13 ++++++++++ .../test_stale_sidecar_overwrite_3595.py | 26 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index da5bc506b..8503e3c3f 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -131,6 +131,15 @@ def to_geotiff(data: xr.DataArray | np.ndarray, GPU write uses nvCOMP batch compression (deflate/ZSTD) and keeps the array on device. Falls back to CPU if nvCOMP is not available. + Every successful write to a string path refreshes the PAM + ``.aux.xml`` sidecar: a sidecar already at that path (from a + previous write, or a foreign tool) is removed and re-created only + when this write carries its own categories + (``attrs['category_names']``) or ``color_ramp=`` statistics + (#3595). This matches GDAL's behaviour when creating a dataset + over an existing path. A ``.qml`` style file is never removed; + see ``color_ramp``. + Parameters ---------- data : xr.DataArray or np.ndarray @@ -528,6 +537,10 @@ def _write_sidecars(): try: os.remove(sidecar_path(path)) except OSError: + # Missing sidecar is the normal case; a locked one (e.g. + # PermissionError on Windows) is swallowed too, matching + # QuietDelete: the pixel write already succeeded, so a + # leftover sidecar beats failing the whole write. pass if _cat_names: from .._pam import write_pam_sidecar diff --git a/xrspatial/geotiff/tests/write/test_stale_sidecar_overwrite_3595.py b/xrspatial/geotiff/tests/write/test_stale_sidecar_overwrite_3595.py index 73e463e04..761c5426e 100644 --- a/xrspatial/geotiff/tests/write/test_stale_sidecar_overwrite_3595.py +++ b/xrspatial/geotiff/tests/write/test_stale_sidecar_overwrite_3595.py @@ -178,6 +178,32 @@ def test_vrt_write_removes_stale_sidecar(tmp_path): assert "category_names" not in open_geotiff(path).attrs +def test_failed_write_keeps_old_sidecar(tmp_path, monkeypatch): + """A failed write leaves the old file and its sidecar consistent. + + The removal runs in ``_write_sidecars``, which only executes at the + success return points, so a write that raises mid-pixel-write must not + strip the sidecar that still describes the untouched old file. This + pins that ordering against a refactor that moves the removal before + the pixel write. + """ + path = str(tmp_path / "failed_3595.tif") + to_geotiff(_categorical_da(), path) + assert os.path.exists(path + ".aux.xml") + + from xrspatial.geotiff._writers import eager as eager_mod + + def _boom(*args, **kwargs): + raise RuntimeError("simulated mid-write failure") + + monkeypatch.setattr(eager_mod, "write", _boom) + with pytest.raises(RuntimeError, match="simulated mid-write"): + to_geotiff(_plain_da(), path) + + assert os.path.exists(path + ".aux.xml") + assert open_geotiff(path).attrs["category_names"] == _NAMES + + @requires_gpu def test_gpu_overwrite_removes_sidecar(tmp_path): """The GPU (nvCOMP) write path refreshes the sidecar.""" From b556e7e2ba13e8d729d0ee8d453c0569b9985d9c Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 1 Jul 2026 23:02:52 -0400 Subject: [PATCH 4/4] Record LOW band-mismatch sidecar edge in metadata-sweep state (#3595) --- .claude/sweep-metadata-state.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index de2b9f7b0..3704566f7 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -5,7 +5,7 @@ contour,2026-05-29,2700,HIGH,1;5,"Audited 2026-05-29 (agent-ab7fff484a8f57de2 wo corridor,2026-06-22,3446,HIGH,1;5,"Audited 2026-06-22 (agent-a8b2674b815bdfa3f worktree, branch deep-sweep-metadata-corridor-2026-06-22). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live end-to-end for least_cost_corridor across single/threshold/relative/unreachable/pairwise paths. Cat 2 coords (x/y values + float64 dtype) and Cat 3 dims (y,x) preserved on every backend: they flow through cost_distance (coords=raster.coords, dims=raster.dims) and survive xarray's binary intersection. NEW HIGH finding #3446 (Cat 1 + Cat 5): the corridor is cd_a + cd_b where each cost-distance surface carries its SOURCE raster's attrs (cost_distance copies attrs from the source, not friction). xarray's default keep_attrs on binary + keeps only attrs present-and-equal in both operands, so when the source masks are plain marker rasters with no geo-attrs (the common case) the corridor came back with attrs=={} even though the friction surface that defines the grid had res/crs/transform/nodatavals; a downstream slope/clip on the corridor silently lost cellsize/CRS. Secondary Cat 5: .name was None whenever the two sources had different names (cost_distance renames each surface to its source .name; summing differently-named arrays drops the name). Fix (PR on this branch): non-precomputed path re-emits friction.attrs + friction.name on every output via new _apply_geo_metadata helper (single, threshold, all-NaN-unreachable, and pairwise-Dataset paths); precomputed path left on the existing source-derived behaviour since there is no friction to draw from. Only .attrs/.name set -- data values, coords, dims, dtype untouched, dask stays lazy (no compute). 10 new tests (test_corridor_inherits_friction_geo_attrs x4 backends, test_corridor_threshold_keeps_geo_attrs x4 backends, test_corridor_unreachable_keeps_geo_attrs, test_pairwise_inherits_friction_geo_attrs, test_precomputed_keeps_source_attrs_not_friction). Full corridor suite 43 passed. Cat 4 N/A: NaN-as-nodata is the library convention; corridor never reads attrs['nodatavals'] for masking. No CRITICAL/MEDIUM/LOW findings." cost_distance,2026-06-15,3344,MEDIUM,5,"Audited 2026-06-15 (agent-ad0b84e7f7b212360 worktree, branch deep-sweep-metadata-cost_distance-2026-06-15). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live end-to-end with a rich attrs set (res/crs/transform/nodatavals/_FillValue/units). Cat 1 attrs, Cat 2 coords (values + float64 dtype), and Cat 3 dims (y,x) all preserved and identical across the 4 backends -- public cost_distance() wraps with xr.DataArray(coords=raster.coords, dims=raster.dims, attrs=raster.attrs). NEW MEDIUM finding #3344 (Cat 5): the dask+numpy and dask+cupy backends leaked the internal dask graph name (_trim- from map_overlap, asarray- from the dask+cupy convert-back path) into result.name while numpy/cupy returned None; .name was a nondeterministic per-run token that breaks .to_dataset() variable keys and any name-keyed pipeline. Same .name-leak class as proximity #2723 and zonal #2611. Fix (PR #3349 on this branch): return result.rename(raster.name) -- a constructor name= kwarg does not override a named dask array, and name=None is treated as infer-from-data, so .rename() is required. supports_dataset path unaffected (keys by var_name, verified live). New parametrized regression test test_result_name_matches_input over 4 backends x {None, named}; full cost_distance suite 63 passed (post-merge with origin/main). LOW (documented, not fixed): output float32 uses NaN as the unreachable sentinel but input nodatavals/_FillValue (e.g. -9999) are carried through verbatim, so a downstream reader masks a value that never appears -- this is the library-wide attrs=raster.attrs convention shared by proximity/slope/aspect/focal, not a cost_distance-specific bug, so fixing it in isolation would diverge this module from every peer. No CRITICAL/HIGH findings." focal,2026-06-10,3217,MEDIUM,4;5,"Re-audited 2026-06-10 (agent-ad0d55a894c6abc60 worktree, branch deep-sweep-metadata-focal-2026-06-10). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live for mean, apply, focal_stats, hotspots. Cats 1-3 clean: attrs (res/crs/nodatavals/_FillValue/unit), coords (values, dtype, coord attrs), dims, .name, 3D per-band path, and hotspots unit=% all preserved and identical across the 4 backends. NEW MEDIUM finding #3217 (Cat 4 + Cat 5): (a) mean() hardcoded float32 on the GPU paths (_mean_cupy cupy.asarray(dtype=float32), _mean_dask_cupy astype(float32)) while numpy/dask+numpy returned float64 (mean() casts astype(float) before dispatch), so float64 input silently lost precision on cupy/dask+cupy; dask+cupy also advertised float64 (untyped meta) but computed float32. (b) apply()/focal_stats() dask paths passed untyped meta (np.array(()) / cupy.array(())) to map_overlap, so for float32/int input the lazy DataArray advertised float64 but computed the promoted float32 (#2805 typed the chunk fns but not the meta). Same class as aspect #2682 and proximity #2723. Fix: the mean() GPU dtype half landed on main first via duplicate issue #3214/PR #3221 (_promote_float contract: float dtypes preserved, ints->float32, GPU bit-exact vs CPU in float64); PR #3226 (branch deep-sweep-metadata-focal-2026-06-10-01) types every map_overlap meta with data.dtype and aligns tests to the _promote_float contract; 25 new parametrized regression tests (4 backends x 3 dtypes mean; dask backends x 3 dtypes apply/focal_stats; exact CPU/GPU parity). Full focal suite 258 passed. No other CRITICAL/HIGH/MEDIUM/LOW findings." -geotiff,2026-07-01,3595,MEDIUM,1,"Re-audited 2026-07-01 (agent-adb0e639731d1209c worktree, branch deep-sweep-metadata-geotiff-2026-07-01). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live. Focus: surfaces changed since the 2026-06-09 audit -- symbology sidecars (#3538/#3546), categorical PAM sidecar backends (#3519), xarray engine (#3375/#3377/#3380), pack/nodata attr changes (#3277/#3325/#3128). Live probes all clean: 4-backend read parity (attrs/coords/dims/name/dtype incl. coord dtype), engine open_dataset vs open_geotiff parity (attrs/coords/values, chunks={}, masked=True, var name), color_ramp PAM stats + QML byte-identical across numpy/dask/cupy/dask+cupy write inputs (global dask stats via one fused dask.compute, not per-chunk), nodata excluded from stats, categorical sidecar attrs attach on all 4 read paths + engine, stats-PAM never fakes category attrs (thematic gate). #3128 64-bit sentinel eager fix verified merged. NEW MEDIUM finding #3595 (Cat 1): to_geotiff left the previous file's PAM .aux.xml behind when the new write emitted no sidecar, so open_geotiff attached the overwritten file's category_names/category_colors to the new pixels and GDAL/QGIS stretched with stale STATISTICS_*; GDAL avoids this via GDALDriver::QuietDelete. Fix on this branch: _write_sidecars removes a pre-existing .aux.xml on every successful string-path write (all 4 write paths: eager, dask streaming, GPU dispatch, VRT) before re-creating it; .qml deliberately kept (QGIS user styling persists across data updates; only a new color_ramp write replaces it); docstring documents the refresh. 10 new tests in tests/write/test_stale_sidecar_overwrite_3595.py (cat->plain, ramp->plain keeps qml, ramp->cat, cat->ramp, multiband-symbology no-op still removes, foreign sidecar on fresh path, bare ndarray, dask, VRT, GPU). Note: VRT writer refuses same-path overwrites (tiles-dir guard) so its stale case is foreign-sidecar only. Write suite 1213 passed, round-trip+attrs 63, rasterize-categorical+release-gates 182. No CRITICAL/HIGH/LOW findings." +geotiff,2026-07-01,3595,MEDIUM,1,"Re-audited 2026-07-01 (agent-adb0e639731d1209c worktree, branch deep-sweep-metadata-geotiff-2026-07-01). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live. Focus: surfaces changed since the 2026-06-09 audit -- symbology sidecars (#3538/#3546), categorical PAM sidecar backends (#3519), xarray engine (#3375/#3377/#3380), pack/nodata attr changes (#3277/#3325/#3128). Live probes all clean: 4-backend read parity (attrs/coords/dims/name/dtype incl. coord dtype), engine open_dataset vs open_geotiff parity (attrs/coords/values, chunks={}, masked=True, var name), color_ramp PAM stats + QML byte-identical across numpy/dask/cupy/dask+cupy write inputs (global dask stats via one fused dask.compute, not per-chunk), nodata excluded from stats, categorical sidecar attrs attach on all 4 read paths + engine, stats-PAM never fakes category attrs (thematic gate). #3128 64-bit sentinel eager fix verified merged. NEW MEDIUM finding #3595 (Cat 1): to_geotiff left the previous file's PAM .aux.xml behind when the new write emitted no sidecar, so open_geotiff attached the overwritten file's category_names/category_colors to the new pixels and GDAL/QGIS stretched with stale STATISTICS_*; GDAL avoids this via GDALDriver::QuietDelete. Fix on this branch: _write_sidecars removes a pre-existing .aux.xml on every successful string-path write (all 4 write paths: eager, dask streaming, GPU dispatch, VRT) before re-creating it; .qml deliberately kept (QGIS user styling persists across data updates; only a new color_ramp write replaces it); docstring documents the refresh. 10 new tests in tests/write/test_stale_sidecar_overwrite_3595.py (cat->plain, ramp->plain keeps qml, ramp->cat, cat->ramp, multiband-symbology no-op still removes, foreign sidecar on fresh path, bare ndarray, dask, VRT, GPU). Note: VRT writer refuses same-path overwrites (tiles-dir guard) so its stale case is foreign-sidecar only. Write suite 1213 passed, round-trip+attrs 63, rasterize-categorical+release-gates 182. LOW (documented, not fixed): read_pam_sidecar parses the first PAMRasterBand element and _attach_category_attrs applies it regardless of the band=N requested, so a foreign multiband sidecar with a band-1 thematic RAT labels a band=2 read; xrspatial's own writer only emits band-1 single-band RATs so in-library round-trips are unaffected. No CRITICAL/HIGH findings." interpolate,2026-06-12,3288,MEDIUM,5,kriging K_inv-None fallback was numpy-backed on all backends and misnamed the variance raster; fixed via #3288. All 4 backends verified end-to-end on GPU host. LOW (documented only): template nodatavals/_FillValue copied verbatim while fill_value is the actual output sentinel; tests codify attrs==template.attrs mcda,2026-06-10,3147,HIGH,1,"constrain() dropped all attrs (res/crs/nodatavals) whenever exclude non-empty (xr.where takes attrs from scalar fill); fixed via attrs restore, tests for numpy/dask/dask+cupy. All other mcda funcs keep attrs/coords/dims on all 4 backends. Out-of-scope crashes noted for backend-parity: owa broken on cupy (numpy order-weights x cupy) and on dask (da.sort does not exist); sensitivity monte_carlo crashes on cupy/dask+cupy (.values on cupy); xr.where compute on cupy/dask+cupy hits known cupy13.6/xarray2025.12 incompat." multispectral,2026-06-20,3429,MEDIUM,2;3,"true_color() hardcoded y/x dims + dropped extra coords; fixed PR #3434 (all 4 backends verified, CUDA available)"