From 6545a74754eafdbfd27640d32be0688a2899e7c4 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 3 Jul 2026 09:16:12 -0400 Subject: [PATCH 1/2] Order tile edges in kde dask point filter so descending coordinates keep their points (#3627) _filter_points_to_tile assumed positive dx/dy when computing the tile extent, so dask+numpy and dask+cupy dropped most or all points for descending-coordinate templates: all-zero output for compact kernels, partially wrong values for gaussian. Same bug class as #1198, one layer above the #1199 kernel fix. Also records the 2026-07-03 accuracy sweep of the kde module in the sweep state CSV. --- .claude/sweep-accuracy-state.csv | 2 +- xrspatial/kde.py | 11 +++++-- xrspatial/tests/test_kde.py | 54 ++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv index cc43f3153..583dea01e 100644 --- a/.claude/sweep-accuracy-state.csv +++ b/.claude/sweep-accuracy-state.csv @@ -18,7 +18,7 @@ glcm,2026-05-01,1408,HIGH,2,"angle=None averaged NaN as 0, masking no-valid-pair hillshade,2026-04-10T12:00:00Z,,,,"Horn's method correct. All backends consistent. NaN propagation correct. float32 adequate for [0,1] output." hydro,2026-04-30,,LOW,1,Only LOW: twi log(0)=-inf if fa=0 (out-of-contract); MFD weighted sum no Kahan (negligible). No CRIT/HIGH issues. interpolate-kriging,2026-06-04,2915,MEDIUM,1,"Cat1 nugget-on-diagonal bug (MEDIUM): _build_kriging_matrix set K[:n,:n]=vario_func(D) where D has 0 diagonal, so vario_func(0)=nugget c0 landed on the matrix diagonal; semivariogram gamma(0)=0 by definition (nugget is the h->0+ limit). Forced exact interpolation of noisy data and biased kriging variance downward. Only bites when fitted nugget>0; existing trend-dominated test data fits ~0 nugget so tests passed. Fix #2915/PR #2922: np.fill_diagonal(G,0.0) in shared host code (all 4 backends consume same K_inv). Cats 2-5 clean: validate_points drops NaN/Inf rows; range floor 1e-12 prevents div blowup; dask map_blocks slices grid coords with correct half-open extents and returns matching block shape (kriging is global, no overlap needed); planar Euclidean distance is expected for kriging (Cat4 n/a); numpy/cupy/dask share one algorithm and parity tests pass rtol=1e-10. CUDA available; all 16 kriging tests pass incl cupy + dask+cupy. Singular-matrix path adds 1e-10*eye Tikhonov term (separate from nugget, unaffected, correct)." -kde,2026-04-13T12:00:00Z,1198,,,kde/line_density return zeros for descending-y templates. Fix in PR #1199. +kde,2026-07-03,3627;3628,HIGH,2;3;5,"#1199 descending-coords fix verified intact on eager paths. NEW HIGH #3627 (Cat3+5): _filter_points_to_tile assumed positive dx/dy so dask+numpy/dask+cupy returned all-zero (compact kernels) or partially-wrong (gaussian) output on descending-coordinate templates; fix orders tile edges with min/max (PR pending on issue-3627). NEW HIGH #3628 (Cat2+5): NaN inputs diverge across backends (eager cupy gaussian poisons whole grid NaN, others silently drop the point) and a NaN coord collapses the auto extent to an all-zero grid with NaN coords; fix filters non-finite points/segments up front per interpolate precedent (PR pending on issue-3628). Cat1/4 clean. Cat6: integral==n for all 3 kernels, quartic exact vs direct summation (1.6e-12), gaussian 4*bw box cutoff fringe ~3e-4 is a documented convention (scipy delta traces to its covariance-scaled kernel). LOW not fixed: line_density ignores template backend and always returns eager numpy output (values correct, type/memory only). Coverage note for test-coverage sweep: Dask/CuPy parity tests were ascending-only pre-#3627. CUDA available; all 4 backends executed." mahalanobis,2026-05-01,,LOW,1,"LOW: np.linalg.inv (no pinv fallback) returns garbage for near-singular cov without raising. LOW: two-pass mean/cov instead of Welford could lose precision for inputs with very large mean/small variance. No CRIT/HIGH; all four backends use float64 throughout, NaN handled via isfinite, dist_sq clamped non-negative, singular case raises ValueError." mcda,2026-06-10,3146,MEDIUM,5,"Cat5 backend failures, all raise loudly (no wrong numbers): owa raised on every dask backend (da.sort does not exist in dask.array; fixed via rechunk+map_blocks np.sort with explicit meta) and on cupy (numpy order-weight array * cupy stack); standardize piecewise raised on cupy (cupy.interp needs cupy bp/vl + C-contiguous input) and dask+cupy (np.asarray on cupy chunk), categorical raised on dask+cupy (same asarray); monte-carlo sensitivity raised on cupy/dask+cupy (.values implicit conversion; now Welford accumulates with matching array module). All fixed + GPU tests added (issue #3146). Cats 1-4 clean: Welford already used, AHP Perron eigenvector + Saaty RI table correct, NaN propagation verified across combine ops, no neighborhood/geodesic code. constrain on cupy raises cupy.astype AttributeError = known cupy 13.6 + xarray xr.where incompat (dependency pin, not mcda). CUDA available; cupy + dask+cupy executed for all probes and tests." morphology,2026-04-30,"1397,1399",HIGH,2;5,HIGH fixed in #1397/PR #1398: morph_erode/dilate seeded centre cell into running min/max even when kernel[centre]==0 (all 4 backends). HIGH fixed in #1399/PR #1400: dask backends raised on 1xN/Nx1 kernels because empty-slice writeback (0:-0). diff --git a/xrspatial/kde.py b/xrspatial/kde.py index ff9e8a2bb..19d0d09e6 100644 --- a/xrspatial/kde.py +++ b/xrspatial/kde.py @@ -388,10 +388,17 @@ def _filter_points_to_tile(xs, ys, ws, tile_x0, tile_y0, dx, dy, Points whose cutoff circle doesn't overlap the tile extent are excluded, reducing serialization and speeding up the kernel. """ + # dx/dy may be negative (descending coordinates), so order the tile + # edges with min/max before widening by the cutoff. The unordered + # version inverted the interval and dropped the points (#3627). tile_x1 = tile_x0 + tile_cols * dx tile_y1 = tile_y0 + tile_rows * dy - mask = ((xs >= tile_x0 - cutoff) & (xs <= tile_x1 + cutoff) & - (ys >= tile_y0 - cutoff) & (ys <= tile_y1 + cutoff)) + x_lo = min(tile_x0, tile_x1) - cutoff + x_hi = max(tile_x0, tile_x1) + cutoff + y_lo = min(tile_y0, tile_y1) - cutoff + y_hi = max(tile_y0, tile_y1) + cutoff + mask = ((xs >= x_lo) & (xs <= x_hi) & + (ys >= y_lo) & (ys <= y_hi)) if mask.all(): return xs, ys, ws return xs[mask], ys[mask], ws[mask] diff --git a/xrspatial/tests/test_kde.py b/xrspatial/tests/test_kde.py index 9da1c0cd1..9db7196b9 100644 --- a/xrspatial/tests/test_kde.py +++ b/xrspatial/tests/test_kde.py @@ -476,6 +476,36 @@ def test_compact_kernel_exact_match(self, point_cluster, simple_grid): dask_result.values, np_result.values, rtol=1e-12, ) + @pytest.mark.parametrize('desc_y,desc_x', [ + (True, False), (False, True), (True, True), + ]) + @pytest.mark.parametrize('kernel', ['gaussian', 'epanechnikov', 'quartic']) + def test_dask_matches_numpy_descending_coords(self, point_cluster, + desc_y, desc_x, kernel): + """Descending templates: dask tile filter must not drop points (#3627).""" + x, y = point_cluster + ys = np.linspace(4, -4, 16) if desc_y else np.linspace(-4, 4, 16) + xs = np.linspace(4, -4, 16) if desc_x else np.linspace(-4, 4, 16) + template = xr.DataArray( + np.zeros((16, 16), dtype=np.float64), + dims=['y', 'x'], coords={'y': ys, 'x': xs}, + ) + np_result = kde(x, y, bandwidth=1.0, kernel=kernel, template=template) + dask_template = self._make_dask_template(template) + dask_result = kde(x, y, bandwidth=1.0, kernel=kernel, + template=dask_template) + assert float(dask_result.sum()) > 0.0 + if kernel == 'gaussian': + # Fringe pixels at the 4*bw box cutoff may land on either + # side of the tile filter; those values are ~exp(-8). + np.testing.assert_allclose( + dask_result.values, np_result.values, atol=1e-5, + ) + else: + np.testing.assert_allclose( + dask_result.values, np_result.values, rtol=1e-12, + ) + @cuda_and_cupy_available class TestCuPyParity: @@ -499,6 +529,30 @@ def test_cupy_matches_numpy(self, point_cluster, simple_grid, kernel): np.testing.assert_allclose(result_np, np_result.values, rtol=tol) +@cuda_and_cupy_available +@dask_array_available +class TestDaskCupyDescending: + """dask+cupy must not drop points on descending templates (#3627).""" + + def test_dask_cupy_matches_numpy_descending_y(self, point_cluster): + import cupy + x, y = point_cluster + template = xr.DataArray( + np.zeros((16, 16), dtype=np.float64), + dims=['y', 'x'], + coords={'y': np.linspace(4, -4, 16), 'x': np.linspace(-4, 4, 16)}, + ) + np_result = kde(x, y, bandwidth=1.0, kernel='quartic', + template=template) + dask_cupy_template = template.copy( + data=da.from_array(cupy.asarray(template.values), chunks=(8, 8))) + result = kde(x, y, bandwidth=1.0, kernel='quartic', + template=dask_cupy_template) + result_np = result.data.compute().get() + assert float(result_np.sum()) > 0.0 + np.testing.assert_allclose(result_np, np_result.values, rtol=1e-6) + + # --------------------------------------------------------------------------- # Output resolution metadata (issue #3571) # --------------------------------------------------------------------------- From ae16e5e8435c23d76cdd92af27f6442da1649391 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 3 Jul 2026 09:22:04 -0400 Subject: [PATCH 2/2] Address review: parametrize dask+cupy descending test, note filter overshoot (#3627) --- xrspatial/kde.py | 2 ++ xrspatial/tests/test_kde.py | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/xrspatial/kde.py b/xrspatial/kde.py index 19d0d09e6..73087ff30 100644 --- a/xrspatial/kde.py +++ b/xrspatial/kde.py @@ -391,6 +391,8 @@ def _filter_points_to_tile(xs, ys, ws, tile_x0, tile_y0, dx, dy, # dx/dy may be negative (descending coordinates), so order the tile # edges with min/max before widening by the cutoff. The unordered # version inverted the interval and dropped the points (#3627). + # tile_x1/tile_y1 overshoot the last pixel centre by one spacing, + # which keeps the filter conservative (never drops a contributor). tile_x1 = tile_x0 + tile_cols * dx tile_y1 = tile_y0 + tile_rows * dy x_lo = min(tile_x0, tile_x1) - cutoff diff --git a/xrspatial/tests/test_kde.py b/xrspatial/tests/test_kde.py index 9db7196b9..9a941b59b 100644 --- a/xrspatial/tests/test_kde.py +++ b/xrspatial/tests/test_kde.py @@ -534,13 +534,17 @@ def test_cupy_matches_numpy(self, point_cluster, simple_grid, kernel): class TestDaskCupyDescending: """dask+cupy must not drop points on descending templates (#3627).""" - def test_dask_cupy_matches_numpy_descending_y(self, point_cluster): + @pytest.mark.parametrize('desc_y,desc_x', [(True, False), (False, True)]) + def test_dask_cupy_matches_numpy_descending(self, point_cluster, + desc_y, desc_x): import cupy x, y = point_cluster + ys = np.linspace(4, -4, 16) if desc_y else np.linspace(-4, 4, 16) + xs = np.linspace(4, -4, 16) if desc_x else np.linspace(-4, 4, 16) template = xr.DataArray( np.zeros((16, 16), dtype=np.float64), dims=['y', 'x'], - coords={'y': np.linspace(4, -4, 16), 'x': np.linspace(-4, 4, 16)}, + coords={'y': ys, 'x': xs}, ) np_result = kde(x, y, bandwidth=1.0, kernel='quartic', template=template)