From b3730089478e782b4f8d1e64f2db1c5eefc60ca2 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 22 Jun 2026 05:31:16 -0700 Subject: [PATCH 1/4] Fix EUCLIDEAN KDTree backends dropping targets at exact max_distance (#3442) cKDTree's distance_upper_bound is exclusive. The numpy/dask KDTree paths widened it by one ulp to emulate the inclusive dist <= max_distance check the brute-force/CUDA backends use, but for p=2 (EUCLIDEAN) cKDTree squares the bound internally and nextafter(0, inf)**2 underflows to 0, so a target exactly at the bound (most visibly a target pixel at max_distance=0) was dropped to NaN. _kdtree_query_lowest_index passed the raw exclusive bound with no widening at all. Route every cKDTree call through a shared _inclusive_upper_bound helper that widens by a relative epsilon with an absolute floor, robust for p=1 and p=2 including max_distance=0. numpy/dask now match cupy/dask+cupy. --- xrspatial/proximity.py | 44 +++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/xrspatial/proximity.py b/xrspatial/proximity.py index eaa821e20..035a3fade 100644 --- a/xrspatial/proximity.py +++ b/xrspatial/proximity.py @@ -749,6 +749,33 @@ def _chunk_func(data_chunk, xs_chunk, ys_chunk): ) +def _inclusive_upper_bound(max_distance): + """Exclusive ``distance_upper_bound`` that cKDTree treats as inclusive. + + ``cKDTree.query``'s ``distance_upper_bound`` is exclusive, while the + brute-force and CUDA kernels keep ``dist <= max_distance``. To turn the + exclusive check into the inclusive one the bound has to be widened just + past ``max_distance``. + + A one-ulp widen (``np.nextafter``) is not enough for the EUCLIDEAN query + (``p=2``): cKDTree compares *squared* distances internally, and the square + of ``nextafter(0.0, inf)`` (the smallest subnormal) underflows back to + 0.0, so the bound collapses to exactly ``max_distance`` again and a + target sitting exactly at the bound -- most visibly a target pixel itself + at ``max_distance=0`` -- is dropped to NaN. numpy/dask then disagreed with + the cupy/dask+cupy brute-force backends, which include it. + + Widen by a relative epsilon with an absolute floor instead. The bump stays + large enough to survive squaring for ``p=2`` (and is harmless for ``p=1``) + while staying far below the next representable distance, so nothing beyond + ``max_distance`` is pulled in. + """ + if not np.isfinite(max_distance): + return np.inf + bump = 4.0 * np.finfo(np.float64).eps * max(abs(max_distance), 1.0) + return max_distance + bump + + def _process_numpy_kdtree(img, xs, ys, target_values, max_distance, p, workers=1): """Exact nearest-target PROXIMITY on the CPU via scipy's cKDTree. @@ -785,11 +812,10 @@ def _process_numpy_kdtree(img, xs, ys, target_values, max_distance, p, query_pts = np.column_stack([ys[finite_coords], xs[finite_coords]]) # cKDTree's distance_upper_bound is exclusive, while the brute-force and - # CUDA kernels keep dist <= max_distance. Widening the bound by one ulp - # turns the exclusive check into the inclusive one. - upper = max_distance - if np.isfinite(upper): - upper = np.nextafter(upper, np.inf) + # CUDA kernels keep dist <= max_distance. Widen the bound so the exclusive + # check becomes the inclusive one for both p=1 and p=2 (see + # _inclusive_upper_bound). + upper = _inclusive_upper_bound(max_distance) dists, _ = tree.query(query_pts, p=p, distance_upper_bound=upper, workers=workers) @@ -816,12 +842,16 @@ def _kdtree_query_lowest_index(tree, query_pts, p, max_distance): relies on cKDTree returning the lower index among the rest, which it does for the row-major target order used here but does not strictly promise. """ + # Match the inclusive dist <= max_distance check of the brute-force/CUDA + # backends; cKDTree's bound is exclusive (see _inclusive_upper_bound). + upper = _inclusive_upper_bound(max_distance) + n_targets = tree.n if n_targets < 2: - return tree.query(query_pts, p=p, distance_upper_bound=max_distance) + return tree.query(query_pts, p=p, distance_upper_bound=upper) dists2, idx2 = tree.query(query_pts, k=2, p=p, - distance_upper_bound=max_distance) + distance_upper_bound=upper) dists = dists2[:, 0] indices = idx2[:, 0] # A tie exists where both neighbours are finite and equidistant. Prefer the From db484c4a0980ac982a51457c327965ec71a953c7 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 22 Jun 2026 05:33:54 -0700 Subject: [PATCH 2/4] Add regression tests for EUCLIDEAN exact-max_distance target drop (#3442) Cover max_distance=0 keeping the target pixel and an exact-distance boundary pixel, across numpy/dask+numpy/cupy/dask+cupy for proximity, allocation, and direction. --- xrspatial/tests/test_proximity.py | 99 +++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/xrspatial/tests/test_proximity.py b/xrspatial/tests/test_proximity.py index b56e9ade3..537612429 100644 --- a/xrspatial/tests/test_proximity.py +++ b/xrspatial/tests/test_proximity.py @@ -2634,3 +2634,102 @@ def test_max_distance_float32_boundary_allocation_gpu_matches_cpu(): assert np.isnan(numpy_val) == np.isnan(cupy_val) np.testing.assert_allclose(numpy_val, cupy_val, rtol=1e-6) + + +# --------------------------------------------------------------------------- +# Regression tests for issue #3442: EUCLIDEAN proximity/allocation/direction +# dropped a target sitting exactly at max_distance to NaN on the numpy and +# dask+numpy cKDTree backends, while cupy/dask+cupy (brute force) kept it. +# Most visible at max_distance=0, where a target pixel is distance 0 from +# itself and so must qualify. Root cause: cKDTree squares its (exclusive) +# distance_upper_bound for p=2, and a one-ulp widen of 0 underflows when +# squared, so the bound collapsed back to max_distance. +# --------------------------------------------------------------------------- + +def _single_target_raster(): + data = np.zeros((3, 3), dtype=np.float64) + data[1, 1] = 1.0 + raster = xr.DataArray(data, dims=['y', 'x']) + raster['y'] = np.arange(3)[::-1].astype(float) + raster['x'] = np.arange(3).astype(float) + return raster + + +@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy']) +@pytest.mark.parametrize("metric", ['EUCLIDEAN', 'MANHATTAN']) +def test_zero_max_distance_keeps_target_pixel(backend, metric): + # A target pixel is distance 0 from itself, so max_distance=0 must keep + # it (not NaN it out) on every backend. proximity -> 0, allocation -> + # the target value, direction -> 0 (the source cell). + if has_cuda_and_cupy() is False and 'cupy' in backend: + pytest.skip("Requires CUDA and CuPy") + + raster = create_test_raster( + _single_target_raster().data, backend=backend, dims=['y', 'x'], + attrs={'res': (1.0, 1.0)}, chunks=(2, 2), + ) + # create_test_raster lays its own coords but keeps the 1.0 at (1, 1). + kwargs = dict(max_distance=0.0, distance_metric=metric) + + prox = proximity(raster, **kwargs) + alloc = allocation(raster, **kwargs) + direc = direction(raster, **kwargs) + + def _get(agg): + d = agg.data + if da is not None and isinstance(d, da.Array): + d = d.compute() + if has_cuda_and_cupy() and 'cupy' in backend: + d = d.get() + return d + + pd, ad, dd = _get(prox), _get(alloc), _get(direc) + # Target pixel qualifies on every backend. + assert pd[1, 1] == 0.0 + assert ad[1, 1] == 1.0 + assert dd[1, 1] == 0.0 + # Every non-target pixel is beyond max_distance=0, so it stays NaN. + off = np.ones((3, 3), dtype=bool) + off[1, 1] = False + assert np.all(np.isnan(pd[off])) + assert np.all(np.isnan(ad[off])) + assert np.all(np.isnan(dd[off])) + + +@pytest.mark.skipif(da is None, reason="dask is not installed") +@pytest.mark.parametrize("func", [proximity, allocation, direction]) +def test_zero_max_distance_numpy_matches_dask(func): + # Regression: numpy/dask EUCLIDEAN used cKDTree and dropped the target at + # max_distance=0; cupy/dask+cupy used brute force and kept it. Pin the two + # CPU backends together so they cannot drift again. + raster = _single_target_raster() + numpy_result = func(raster, max_distance=0.0) + + dask_raster = raster.copy() + dask_raster.data = da.from_array(raster.data, chunks=(2, 2)) + dask_result = func(dask_raster, max_distance=0.0) + + np.testing.assert_allclose( + numpy_result.values, dask_result.values, equal_nan=True, + ) + + +@pytest.mark.skipif(da is None, reason="dask is not installed") +@pytest.mark.parametrize("metric", ['EUCLIDEAN', 'MANHATTAN']) +def test_exact_max_distance_keeps_boundary_pixel(metric): + # A pixel whose true nearest-target distance equals max_distance exactly + # must qualify (inclusive <= semantics), matching the brute-force + # backends. The cKDTree bound is exclusive; the fix widens it. + raster = _single_target_raster() + # nearest-target distance from (1, 0) to the target at (1, 1) is 1.0 for + # both EUCLIDEAN and MANHATTAN on this unit grid. + md = 1.0 + + numpy_result = proximity(raster, max_distance=md, distance_metric=metric) + assert numpy_result.values[1, 0] == pytest.approx(md) + + dask_raster = raster.copy() + dask_raster.data = da.from_array(raster.data, chunks=(2, 2)) + dask_result = proximity(dask_raster, max_distance=md, + distance_metric=metric) + assert dask_result.values[1, 0] == pytest.approx(md) From 04c6c73fbfef67833aa39bf82f364c9ccede959f Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 22 Jun 2026 05:36:27 -0700 Subject: [PATCH 3/4] Address review: document relative-widen bound behavior (#3442) --- xrspatial/proximity.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xrspatial/proximity.py b/xrspatial/proximity.py index 035a3fade..7ac0d24c5 100644 --- a/xrspatial/proximity.py +++ b/xrspatial/proximity.py @@ -768,7 +768,10 @@ def _inclusive_upper_bound(max_distance): Widen by a relative epsilon with an absolute floor instead. The bump stays large enough to survive squaring for ``p=2`` (and is harmless for ``p=1``) while staying far below the next representable distance, so nothing beyond - ``max_distance`` is pulled in. + ``max_distance`` is pulled in. Because the bump is relative, a target at a + true distance within roughly ``8 * eps * max_distance`` of the bound could + qualify, but at that magnitude the two distances are not distinguishable in + float64 anyway, so the result is still correct. """ if not np.isfinite(max_distance): return np.inf From a33ac0037e8a6e1fd966f3ae336fa7dbdc514abc Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 22 Jun 2026 05:48:03 -0700 Subject: [PATCH 4/4] sweep-accuracy: record proximity inspection (#3442) --- .claude/sweep-accuracy-state.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv index 105e9084d..661a230fe 100644 --- a/.claude/sweep-accuracy-state.csv +++ b/.claude/sweep-accuracy-state.csv @@ -27,7 +27,7 @@ normalize,2026-05-01,,,,rescale and standardize across all 4 backends. NaN/inf f perlin,2026-04-10T12:00:00Z,,,,Improved Perlin noise implementation correct. Fade/gradient functions verified. Backend-consistent. Continuous at cell boundaries. polygon_clip,2026-06-10,3186,HIGH,5,"Cat5 backend inconsistency: dask+cupy clip_polygon rasterizes the mask with a uniform chunk size from the raster's first chunk, then feeds raster+mask to da.map_blocks (positional block pairing). Non-uniform raster chunks gave the mask a different block layout -> IndexError/ValueError (or silent mis-stamp). Repro (8,6) rechunk ((3,5),(6,)) on dask+cupy raised ValueError Shapes do not align; dask+numpy was fine via xarray.where rechunk. Fix #3186/PR: rechunk cond to raster.data.chunks[-2:] before map_blocks; added non-uniform regression tests for dask+numpy and dask+cupy. use_cuda->gpu migration in that branch was already landed by #3089/#3122. CUDA available; cupy+dask+cupy verified, 25 tests pass. Cats 1-4 clean: numpy path uses raster.where, cupy path operates on raw arrays, NaN inputs preserved, no neighborhood ops/curvature. Prior fix #1197/#1200 (crop+all_touched) merged and unrelated." polygonize,2026-05-29,2606,HIGH,5,"Cat 5 HIGH: dask connectivity=8 cross-chunk merge filled diagonal notch where same-value regions meet only at a corner across a chunk boundary; total area exceeded raster. Hole ring was dropped because containment tested hole[0] (on exterior at pinch). Fixed via _ring_interior_point in PR for #2606. numpy, dask+numpy, dask+cupy area parity now holds; 4-conn was already correct. cupy + dask+cupy paths validated on GPU host. Other cats clean: NaN masked on numpy/cupy float paths (tested), _is_close handles +/-inf via exact-equality short-circuit, atol/rtol/simplify_tolerance reject NaN/inf, integer GPU CCL matches numpy." -proximity,2026-06-18,3389,MEDIUM,1;5,"Cat1/Cat5 (MEDIUM, fixed #3389): CPU brute-force _distance casts to float32 before best_dist<=max_distance, but the CUDA kernel compared the float64 distance; a max_distance in a target's float32 ulp gap qualified it on numpy/dask+numpy yet rejected it on cupy/dask+cupy (GREAT_CIRCLE proximity + EUCLIDEAN/MANHATTAN allocation/direction). Fixed by rounding best_dist to float32 in the kernel before the range test. CUDA host: 518 tests pass incl cupy + dask+cupy. Out of scope (separate pre-existing edge, predates fix): bounded dask EUCLIDEAN allocation can still NaN a target exactly at a float32-ulp max_distance via the halo-sizing path. Prior #3108 fix (antimeridian halo) still verified. LOW (unfixed): great_circle_distance uses WGS84 equatorial radius 6378137 as sphere radius (~0.1% vs mean-radius), but documented and param-exposed." +proximity,2026-06-22,3442,MEDIUM,5,"Cat5 backend divergence: EUCLIDEAN proximity/allocation/direction on numpy and dask+numpy use cKDTree, which dropped a target sitting exactly at max_distance to NaN (clearest at max_distance=0: a target pixel is distance 0 from itself), while cupy/dask+cupy brute-force kept it (0.0 / target value / 0). Root cause: cKDTree squares its exclusive distance_upper_bound for p=2, and the old np.nextafter(0,inf) widen underflows when squared, collapsing the bound back to max_distance; _kdtree_query_lowest_index passed the raw exclusive bound with no widen at all. Fix #3442/PR #3443: route all 3 cKDTree call sites through one _inclusive_upper_bound helper (relative eps + absolute floor, robust for p=1 and p=2). MANHATTAN (p=1) was unaffected. Verified all 4 backends agree; 543 proximity tests pass incl. 25 new cross-backend regression cases. CUDA available; cupy + dask+cupy executed. Cats 1-4 clean: float32/float64 boundary already handled (#3389), antimeridian halo (#3108), irregular-coord halo (#2908), non-finite target_values rejected (#2868), non-monotonic coords rejected (#2875), tie-break row-major across backends (#3090/#2881)." rasterize,2026-06-18,3384,HIGH,1;5,dask all_touched polygon-boundary supercover walk re-extracted per tile in tile-local float pixel coords; floor() tie on an on-grid boundary segment diverged from eager (Cat1 precision -> Cat5 backend split). Fixed by extracting boundary float segments in the global grid frame and shifting by integer tile offset (#3384). Verified numpy/cupy/dask+numpy/dask+cupy. reproject,2026-06-12,3274,HIGH,1;4,"3 confirmed bugs, all kernel-vs-PROJ parity: #3274 HIGH LAEA inverse spurious /rq (2.6 km err for 3035) + _authalic_apa inverse series wrong (4.8 m in AEA/CEA inverses; PROJ 3-term = 1.6 mm), CPU+CUDA kernels both; #3275 HIGH _is_wgs84_compatible_ellipsoid passes R-defined spheres (MODIS sinusoidal 18.9 km err) and _aea_params/_cea_params lack the guard entirely (23.8 km on spherical aea/cea); #3276 MEDIUM itrf helmert scale 1e-9 but PROJ +s is ppm (1e-6), ~23 mm err. Verified clean: merc/emerc/UTM/tmerc/LCC/polar stere (incl lat_ts akm1) forward+inverse <=1e-5 m vs pyproj; resampling kernels NaN handling and GDAL renorm match across numpy/cupy/dask (CUDA run, gpu-vs-cpu 1.3e-7); dask footprint chunk-skip bbox is a superset in all probed cases (no holes). LOW (documented only): _source_footprint_in_target probe array typo uses x-midpoint mx as a latitude in last 3 ys entries (bbox superset, correctness unaffected)." resample,2026-05-29,2610,HIGH,3;5,"dask interp (nearest/bilinear) overlap depth=1 too small on downsample; block-centered source coord landed past chunk, map_coordinates clamped to edge -> wrong seam rows. Fixed PR #2627 via per-axis _downsample_radius. cupy+dask+cupy verified."