From d8c580b9fc89e200418bcbd95e167ebdbc498492 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Thu, 11 Jun 2026 13:38:03 +0200 Subject: [PATCH] Fix continuous ROC for binary obs with out-of-range forecasts (#442) In `roc(..., bin_edges="continuous")` the binary observations were categorised with the forecast-derived threshold `i`. When forecast values fall outside the [0, 1] range of the observations, no observation ever lands in the event category, so the hit rate is always 0 and the area collapses (e.g. 0.0 instead of 1.0). Categorise observations with a fixed split at 0.5 in continuous mode, varying only the forecast threshold. Results now match `sklearn.metrics.roc_auc_score` regardless of forecast magnitude or input dimension ordering. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.rst | 12 +++++++++ xskillscore/core/probabilistic.py | 7 ++++- xskillscore/tests/test_probabilistic.py | 34 +++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e7f4fd45..c4fccc1f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,18 @@ Changelog History ================= +xskillscore unreleased +---------------------- + +Bug Fixes +~~~~~~~~~ +- Fixed :py:func:`~xskillscore.roc` with ``bin_edges="continuous"`` returning + incorrect areas (down to ``0.0`` instead of ``1.0``) when forecast values fall + outside the ``[0, 1]`` range of the binary observations. Observations are now + categorised with a fixed split rather than the forecast-derived threshold, so + results match ``sklearn.metrics.roc_auc_score``. (:issue:`442`) `Aaron Spring`_ + + xskillscore v0.0.29 (2026-02-18) -------------------------------- diff --git a/xskillscore/core/probabilistic.py b/xskillscore/core/probabilistic.py index 7e650360..7134b28d 100644 --- a/xskillscore/core/probabilistic.py +++ b/xskillscore/core/probabilistic.py @@ -1310,6 +1310,11 @@ def roc( # loop over each bin_edge and get true positive rate and false positive rate # from contingency + if continuous: + # observations are binary (0/1); categorise them with a fixed split at 0.5 + # rather than the forecast-derived threshold ``i`` (which may lie outside + # the [0, 1] range of the observations). See GH #442. + observation_category_edges = np.array([-np.inf, 0.5, np.inf]) tpr_list, fpr_list = [], [] for i in bin_edges: dichotomous_category_edges = np.array( @@ -1318,7 +1323,7 @@ def roc( dichotomous_contingency = Contingency( observations, forecasts, - dichotomous_category_edges, + observation_category_edges if continuous else dichotomous_category_edges, dichotomous_category_edges, dim=dim, ) diff --git a/xskillscore/tests/test_probabilistic.py b/xskillscore/tests/test_probabilistic.py index 5147d476..2763a19a 100644 --- a/xskillscore/tests/test_probabilistic.py +++ b/xskillscore/tests/test_probabilistic.py @@ -1020,6 +1020,40 @@ def test_roc_bin_edges_continuous_against_sklearn( assert (xs_tpr == sk_tpr).all() +@pytest.mark.parametrize("drop_intermediate_bool", [False, True]) +def test_roc_continuous_forecast_out_of_unit_range_against_sklearn(drop_intermediate_bool): + """Continuous ROC must match sklearn even when forecasts lie outside [0, 1]. + + Regression test for GH #442: observations are binary but were categorised + with the forecast-derived threshold, so events were never detected once + forecast magnitudes exceeded the [0, 1] range of the observations. + """ + np.random.seed(1512) + obs_raw = xr.DataArray( + np.random.normal(0.5, 0.2, size=(20, 10)), + coords=[("time", np.arange(20)), ("points", np.arange(10))], + ) + da_obs = (obs_raw > 0.5).astype(int) + # forecast shifted out of [0, 1] per point and with a transposed dim order + alpha = xr.DataArray(np.linspace(0, 1, num=10), coords=[("points", np.arange(10))]) + err = xr.DataArray(np.random.normal(0.0, 0.03, size=20), coords=[("time", np.arange(20))]) + da_forecast = alpha + obs_raw + err + + xs_area = roc( + da_obs, + da_forecast, + "continuous", + dim="time", + drop_intermediate=drop_intermediate_bool, + return_results="area", + ) + for point in range(da_obs.points.size): + sk_area = roc_auc_score( + da_obs.isel(points=point).values, da_forecast.isel(points=point).values + ) + np.testing.assert_allclose(xs_area.isel(points=point), sk_area) + + def test_roc_bin_edges_drop_intermediate(forecast_1d_long, observation_1d_long): """Test that drop_intermediate reduces probability_bins in xs.roc .""" fp = forecast_1d_long.clip(0, 1) # prob