From 408d6db6c594c4c25d1e773beca65ca28ff9ce70 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 26 Jun 2026 18:12:41 -0700 Subject: [PATCH 1/2] Return all-NaN instead of crashing on all-non-finite classify input (#3510) head_tail_breaks, percentiles, and box_plot reduced over the empty finite subset of an all-NaN/all-inf raster and raised an opaque reduction error. Guard the empty-finite case in each so they degrade to an all-NaN result, matching equal_interval, std_mean, and maximum_breaks. Flip the xfail regression tests to plain assertions and add all-inf coverage. --- xrspatial/classify.py | 23 ++++++++++++++- xrspatial/tests/test_classify.py | 50 ++++++++++++++++++++------------ 2 files changed, 53 insertions(+), 20 deletions(-) diff --git a/xrspatial/classify.py b/xrspatial/classify.py index facf4c48c..169eaa3c3 100644 --- a/xrspatial/classify.py +++ b/xrspatial/classify.py @@ -1159,6 +1159,11 @@ def std_mean(agg: xr.DataArray, def _compute_head_tail_bins(values_np): """Compute head/tail break bins from flat finite numpy values.""" + if values_np.size == 0: + # All-NaN/inf input: no finite values to partition. Return a NaN bin + # so _bin maps every (non-finite) pixel to NaN, matching equal_interval + # and maximum_breaks on degenerate input (#3510). + return np.array([np.nan]) bins = [] data = values_np.copy() while len(data) > 1: @@ -1297,7 +1302,11 @@ def head_tail_breaks(agg: xr.DataArray, def _run_percentiles(data, num_sample, pct, module): # num_sample ignored for in-memory backends - q = module.percentile(data[module.isfinite(data)], pct) + finite = data[module.isfinite(data)] + if finite.size == 0: + # All-NaN/inf input: no finite values to take percentiles of (#3510). + return module.array([np.nan]) + q = module.percentile(finite, pct) q = module.unique(q) return q @@ -1313,6 +1322,9 @@ def _run_dask_percentiles(data, num_sample, pct): sample_idx = _generate_sample_indices(num_data, num_sample) values = np.asarray(clean.ravel()[sample_idx].compute()) values = values[np.isfinite(values)] + if values.size == 0: + # All-NaN/inf input: no finite values to take percentiles of (#3510). + return np.array([np.nan]) q = np.percentile(values, pct) q = np.unique(q) return q @@ -1574,6 +1586,9 @@ def maximum_breaks(agg: xr.DataArray, def _box_plot_bins_from_sample(finite_np, hinge, max_v): + if finite_np.size == 0: + # All-NaN/inf input: no finite values to take percentiles of (#3510). + return np.array([np.nan]) q1 = float(np.percentile(finite_np, 25)) q2 = float(np.percentile(finite_np, 50)) q3 = float(np.percentile(finite_np, 75)) @@ -1591,6 +1606,12 @@ def _run_box_plot(agg, hinge, module): data_clean = module.where(module.isinf(data), np.nan, data) finite_data = data_clean[module.isfinite(data_clean)] + if finite_data.size == 0: + # All-NaN/inf input: no finite values to take percentiles of. Return a + # NaN bin so every (non-finite) pixel maps to NaN, matching the other + # classifiers on degenerate input (#3510). + return _bin(agg, np.array([np.nan]), np.array([0])) + if module == cupy: q1 = float(cupy.percentile(finite_data, 25).get()) q2 = float(cupy.percentile(finite_data, 50).get()) diff --git a/xrspatial/tests/test_classify.py b/xrspatial/tests/test_classify.py index 494f75cc9..f63306ea0 100644 --- a/xrspatial/tests/test_classify.py +++ b/xrspatial/tests/test_classify.py @@ -1280,13 +1280,10 @@ def test_generate_sample_indices_large_is_deterministic(): # =================================================================== # equal_interval and natural_breaks already cover all-NaN. The other # classifiers were not exercised on an all-non-finite raster, where the -# finite mask removes every element. std_mean and maximum_breaks degrade -# to an all-NaN result; on the eager (numpy/cupy) backends head_tail_breaks, -# percentiles, and box_plot currently raise an opaque reduction error -# (issue #3510), so their tests are xfail until that is fixed. Flip them to -# plain assertions when #3510 lands. strict=False so a concurrent fix does -# not break main via XPASS. See the dask section below for the per-backend -# split (head_tail_breaks already degrades cleanly on dask). +# finite mask removes every element. All of them now degrade to an all-NaN +# result. head_tail_breaks, percentiles, and box_plot used to raise an opaque +# reduction error on the eager (numpy/cupy) backends until #3510 added an +# empty-finite guard. See the dask section below for the per-backend split. def test_std_mean_all_nan(): import warnings @@ -1303,34 +1300,51 @@ def test_maximum_breaks_all_nan(): assert np.all(np.isnan(result.data)) -@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False) def test_head_tail_breaks_all_nan(): agg = xr.DataArray(np.full((4, 5), np.nan)) result = head_tail_breaks(agg) assert np.all(np.isnan(result.data)) -@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False) def test_percentiles_all_nan(): agg = xr.DataArray(np.full((4, 5), np.nan)) result = percentiles(agg) assert np.all(np.isnan(result.data)) -@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False) def test_box_plot_all_nan(): agg = xr.DataArray(np.full((4, 5), np.nan)) result = box_plot(agg) assert np.all(np.isnan(result.data)) -# All-NaN on the dask backend. The dask paths are separate implementations, -# and they do not match the eager paths on this degenerate input: -# std_mean, maximum_breaks, and head_tail_breaks all return all-NaN on dask -# (head_tail_breaks's dask path has a total_count == 0 guard the eager path -# lacks, so it does not hit the #3510 crash), while percentiles and box_plot -# crash on dask too. Pin all of it so the per-backend behaviour is explicit -# and the #3510 fix can target only the eager paths. +# All-inf input is mapped to NaN before binning, so it hits the same +# empty-finite path as all-NaN and must also return an all-NaN result (#3510). + +def test_head_tail_breaks_all_inf(): + agg = xr.DataArray(np.full((4, 5), np.inf)) + result = head_tail_breaks(agg) + assert np.all(np.isnan(result.data)) + + +def test_percentiles_all_inf(): + agg = xr.DataArray(np.full((4, 5), np.inf)) + result = percentiles(agg) + assert np.all(np.isnan(result.data)) + + +def test_box_plot_all_inf(): + agg = xr.DataArray(np.full((4, 5), -np.inf)) + result = box_plot(agg) + assert np.all(np.isnan(result.data)) + + +# All-NaN on the dask backend. The dask paths are separate implementations. +# std_mean, maximum_breaks, and head_tail_breaks always returned all-NaN on +# dask (head_tail_breaks's dask path has a total_count == 0 guard the eager +# path lacked); percentiles and box_plot used to crash on dask too until +# #3510 added empty-finite guards to their dask sample paths. Pin all of it +# so the per-backend behaviour stays explicit. @dask_array_available def test_std_mean_all_nan_dask(): @@ -1359,7 +1373,6 @@ def test_head_tail_breaks_all_nan_dask(): @dask_array_available -@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False) def test_percentiles_all_nan_dask(): agg = xr.DataArray(da.full((4, 5), np.nan, chunks=(2, 5))) result = percentiles(agg) @@ -1367,7 +1380,6 @@ def test_percentiles_all_nan_dask(): @dask_array_available -@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False) def test_box_plot_all_nan_dask(): agg = xr.DataArray(da.full((4, 5), np.nan, chunks=(2, 5))) result = box_plot(agg) From c94d36e5ebcca72da47cc1444dfe42b85059abd1 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 26 Jun 2026 18:14:52 -0700 Subject: [PATCH 2/2] Address review: suppress benign all-NaN RuntimeWarning in percentiles tests (#3510) percentiles still takes nanmax over the cleaned array to set the top bin edge, which warns on all-non-finite input like std_mean does. Suppress it in the regression tests to match the test_std_mean_all_nan convention. --- xrspatial/tests/test_classify.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/xrspatial/tests/test_classify.py b/xrspatial/tests/test_classify.py index f63306ea0..771e005f8 100644 --- a/xrspatial/tests/test_classify.py +++ b/xrspatial/tests/test_classify.py @@ -1307,8 +1307,13 @@ def test_head_tail_breaks_all_nan(): def test_percentiles_all_nan(): + import warnings agg = xr.DataArray(np.full((4, 5), np.nan)) - result = percentiles(agg) + # percentiles still takes nanmax over the (all-NaN) cleaned array to set + # the top bin edge, which warns like std_mean does on this input. + with warnings.catch_warnings(): + warnings.simplefilter('ignore', RuntimeWarning) + result = percentiles(agg) assert np.all(np.isnan(result.data)) @@ -1328,8 +1333,11 @@ def test_head_tail_breaks_all_inf(): def test_percentiles_all_inf(): + import warnings agg = xr.DataArray(np.full((4, 5), np.inf)) - result = percentiles(agg) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', RuntimeWarning) + result = percentiles(agg) assert np.all(np.isnan(result.data))