diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e7f4fd45..c4fccc1f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,18 @@ Changelog History ================= +xskillscore unreleased +---------------------- + +Bug Fixes +~~~~~~~~~ +- Fixed :py:func:`~xskillscore.roc` with ``bin_edges="continuous"`` returning + incorrect areas (down to ``0.0`` instead of ``1.0``) when forecast values fall + outside the ``[0, 1]`` range of the binary observations. Observations are now + categorised with a fixed split rather than the forecast-derived threshold, so + results match ``sklearn.metrics.roc_auc_score``. (:issue:`442`) `Aaron Spring`_ + + xskillscore v0.0.29 (2026-02-18) -------------------------------- diff --git a/xskillscore/core/probabilistic.py b/xskillscore/core/probabilistic.py index 7e650360..7134b28d 100644 --- a/xskillscore/core/probabilistic.py +++ b/xskillscore/core/probabilistic.py @@ -1310,6 +1310,11 @@ def roc( # loop over each bin_edge and get true positive rate and false positive rate # from contingency + if continuous: + # observations are binary (0/1); categorise them with a fixed split at 0.5 + # rather than the forecast-derived threshold ``i`` (which may lie outside + # the [0, 1] range of the observations). See GH #442. + observation_category_edges = np.array([-np.inf, 0.5, np.inf]) tpr_list, fpr_list = [], [] for i in bin_edges: dichotomous_category_edges = np.array( @@ -1318,7 +1323,7 @@ def roc( dichotomous_contingency = Contingency( observations, forecasts, - dichotomous_category_edges, + observation_category_edges if continuous else dichotomous_category_edges, dichotomous_category_edges, dim=dim, ) diff --git a/xskillscore/tests/test_probabilistic.py b/xskillscore/tests/test_probabilistic.py index 5147d476..2763a19a 100644 --- a/xskillscore/tests/test_probabilistic.py +++ b/xskillscore/tests/test_probabilistic.py @@ -1020,6 +1020,40 @@ def test_roc_bin_edges_continuous_against_sklearn( assert (xs_tpr == sk_tpr).all() +@pytest.mark.parametrize("drop_intermediate_bool", [False, True]) +def test_roc_continuous_forecast_out_of_unit_range_against_sklearn(drop_intermediate_bool): + """Continuous ROC must match sklearn even when forecasts lie outside [0, 1]. + + Regression test for GH #442: observations are binary but were categorised + with the forecast-derived threshold, so events were never detected once + forecast magnitudes exceeded the [0, 1] range of the observations. + """ + np.random.seed(1512) + obs_raw = xr.DataArray( + np.random.normal(0.5, 0.2, size=(20, 10)), + coords=[("time", np.arange(20)), ("points", np.arange(10))], + ) + da_obs = (obs_raw > 0.5).astype(int) + # forecast shifted out of [0, 1] per point and with a transposed dim order + alpha = xr.DataArray(np.linspace(0, 1, num=10), coords=[("points", np.arange(10))]) + err = xr.DataArray(np.random.normal(0.0, 0.03, size=20), coords=[("time", np.arange(20))]) + da_forecast = alpha + obs_raw + err + + xs_area = roc( + da_obs, + da_forecast, + "continuous", + dim="time", + drop_intermediate=drop_intermediate_bool, + return_results="area", + ) + for point in range(da_obs.points.size): + sk_area = roc_auc_score( + da_obs.isel(points=point).values, da_forecast.isel(points=point).values + ) + np.testing.assert_allclose(xs_area.isel(points=point), sk_area) + + def test_roc_bin_edges_drop_intermediate(forecast_1d_long, observation_1d_long): """Test that drop_intermediate reduces probability_bins in xs.roc .""" fp = forecast_1d_long.clip(0, 1) # prob