Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion xrspatial/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,6 +1159,11 @@ def std_mean(agg: xr.DataArray,

def _compute_head_tail_bins(values_np):
"""Compute head/tail break bins from flat finite numpy values."""
if values_np.size == 0:
# All-NaN/inf input: no finite values to partition. Return a NaN bin
# so _bin maps every (non-finite) pixel to NaN, matching equal_interval
# and maximum_breaks on degenerate input (#3510).
return np.array([np.nan])
bins = []
data = values_np.copy()
while len(data) > 1:
Expand Down Expand Up @@ -1297,7 +1302,11 @@ def head_tail_breaks(agg: xr.DataArray,

def _run_percentiles(data, num_sample, pct, module):
# num_sample ignored for in-memory backends
q = module.percentile(data[module.isfinite(data)], pct)
finite = data[module.isfinite(data)]
if finite.size == 0:
# All-NaN/inf input: no finite values to take percentiles of (#3510).
return module.array([np.nan])
q = module.percentile(finite, pct)
q = module.unique(q)
return q

Expand All @@ -1313,6 +1322,9 @@ def _run_dask_percentiles(data, num_sample, pct):
sample_idx = _generate_sample_indices(num_data, num_sample)
values = np.asarray(clean.ravel()[sample_idx].compute())
values = values[np.isfinite(values)]
if values.size == 0:
# All-NaN/inf input: no finite values to take percentiles of (#3510).
return np.array([np.nan])
q = np.percentile(values, pct)
q = np.unique(q)
return q
Expand Down Expand Up @@ -1574,6 +1586,9 @@ def maximum_breaks(agg: xr.DataArray,


def _box_plot_bins_from_sample(finite_np, hinge, max_v):
if finite_np.size == 0:
# All-NaN/inf input: no finite values to take percentiles of (#3510).
return np.array([np.nan])
q1 = float(np.percentile(finite_np, 25))
q2 = float(np.percentile(finite_np, 50))
q3 = float(np.percentile(finite_np, 75))
Expand All @@ -1591,6 +1606,12 @@ def _run_box_plot(agg, hinge, module):
data_clean = module.where(module.isinf(data), np.nan, data)
finite_data = data_clean[module.isfinite(data_clean)]

if finite_data.size == 0:
# All-NaN/inf input: no finite values to take percentiles of. Return a
# NaN bin so every (non-finite) pixel maps to NaN, matching the other
# classifiers on degenerate input (#3510).
return _bin(agg, np.array([np.nan]), np.array([0]))

if module == cupy:
q1 = float(cupy.percentile(finite_data, 25).get())
q2 = float(cupy.percentile(finite_data, 50).get())
Expand Down
60 changes: 40 additions & 20 deletions xrspatial/tests/test_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -1280,13 +1280,10 @@ def test_generate_sample_indices_large_is_deterministic():
# ===================================================================
# equal_interval and natural_breaks already cover all-NaN. The other
# classifiers were not exercised on an all-non-finite raster, where the
# finite mask removes every element. std_mean and maximum_breaks degrade
# to an all-NaN result; on the eager (numpy/cupy) backends head_tail_breaks,
# percentiles, and box_plot currently raise an opaque reduction error
# (issue #3510), so their tests are xfail until that is fixed. Flip them to
# plain assertions when #3510 lands. strict=False so a concurrent fix does
# not break main via XPASS. See the dask section below for the per-backend
# split (head_tail_breaks already degrades cleanly on dask).
# finite mask removes every element. All of them now degrade to an all-NaN
# result. head_tail_breaks, percentiles, and box_plot used to raise an opaque
# reduction error on the eager (numpy/cupy) backends until #3510 added an
# empty-finite guard. See the dask section below for the per-backend split.

def test_std_mean_all_nan():
import warnings
Expand All @@ -1303,34 +1300,59 @@ def test_maximum_breaks_all_nan():
assert np.all(np.isnan(result.data))


@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False)
def test_head_tail_breaks_all_nan():
agg = xr.DataArray(np.full((4, 5), np.nan))
result = head_tail_breaks(agg)
assert np.all(np.isnan(result.data))


@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False)
def test_percentiles_all_nan():
import warnings
agg = xr.DataArray(np.full((4, 5), np.nan))
result = percentiles(agg)
# percentiles still takes nanmax over the (all-NaN) cleaned array to set
# the top bin edge, which warns like std_mean does on this input.
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
result = percentiles(agg)
assert np.all(np.isnan(result.data))


@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False)
def test_box_plot_all_nan():
agg = xr.DataArray(np.full((4, 5), np.nan))
result = box_plot(agg)
assert np.all(np.isnan(result.data))


# All-NaN on the dask backend. The dask paths are separate implementations,
# and they do not match the eager paths on this degenerate input:
# std_mean, maximum_breaks, and head_tail_breaks all return all-NaN on dask
# (head_tail_breaks's dask path has a total_count == 0 guard the eager path
# lacks, so it does not hit the #3510 crash), while percentiles and box_plot
# crash on dask too. Pin all of it so the per-backend behaviour is explicit
# and the #3510 fix can target only the eager paths.
# All-inf input is mapped to NaN before binning, so it hits the same
# empty-finite path as all-NaN and must also return an all-NaN result (#3510).

def test_head_tail_breaks_all_inf():
agg = xr.DataArray(np.full((4, 5), np.inf))
result = head_tail_breaks(agg)
assert np.all(np.isnan(result.data))


def test_percentiles_all_inf():
import warnings
agg = xr.DataArray(np.full((4, 5), np.inf))
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
result = percentiles(agg)
assert np.all(np.isnan(result.data))


def test_box_plot_all_inf():
agg = xr.DataArray(np.full((4, 5), -np.inf))
result = box_plot(agg)
assert np.all(np.isnan(result.data))


# All-NaN on the dask backend. The dask paths are separate implementations.
# std_mean, maximum_breaks, and head_tail_breaks always returned all-NaN on
# dask (head_tail_breaks's dask path has a total_count == 0 guard the eager
# path lacked); percentiles and box_plot used to crash on dask too until
# #3510 added empty-finite guards to their dask sample paths. Pin all of it
# so the per-backend behaviour stays explicit.

@dask_array_available
def test_std_mean_all_nan_dask():
Expand Down Expand Up @@ -1359,15 +1381,13 @@ def test_head_tail_breaks_all_nan_dask():


@dask_array_available
@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False)
def test_percentiles_all_nan_dask():
agg = xr.DataArray(da.full((4, 5), np.nan, chunks=(2, 5)))
result = percentiles(agg)
assert np.all(np.isnan(result.data.compute()))


@dask_array_available
@pytest.mark.xfail(reason="all-NaN input crashes; see issue #3510", strict=False)
def test_box_plot_all_nan_dask():
agg = xr.DataArray(da.full((4, 5), np.nan, chunks=(2, 5)))
result = box_plot(agg)
Expand Down
Loading