From dbda34b25ac800fc371de5e129f6b232e96d9415 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:08:51 +0200
Subject: [PATCH 01/26] Add array-api-compat and array-api-extra as core
 dependencies

---
 pyproject.toml |  2 ++
 uv.lock        | 27 ++++++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 33986d4..6d76671 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,6 +7,8 @@ requires-python = ">=3.12"
 dependencies = [
     "numpy>=2.0.0",
     "scipy>=1.14.0",
+    "array-api-compat>=1.9",
+    "array-api-extra>=0.5",
 ]
 authors = [
     {name = "Francesco Zanetta", email = "zanetta.francesco@gmail.com"},
diff --git a/uv.lock b/uv.lock
index d9a3da5..1880bc5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -6,6 +6,27 @@ resolution-markers = [
     "python_full_version < '3.13'",
 ]
 
+[[package]]
+name = "array-api-compat"
+version = "1.14.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/89/e5/9a12dd1c2b0ad61f3c3ad0fc14b888c65fd735dd9d26805f77317303cbe5/array_api_compat-1.14.0.tar.gz", hash = "sha256:c819ba707f5c507800cb545f7e6348ff1ecc46538381d9ad9b371ffc9cd6d784", size = 106369, upload-time = "2026-02-26T12:02:42.452Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/d3/54cd560804a8c2b898824778e86c13c2a14600bc83532a9c4f69f2f469c3/array_api_compat-1.14.0-py3-none-any.whl", hash = "sha256:ed5af1f9b6595a199c942505f281ec994892556b6efc24679a0501e87a7d6279", size = 60124, upload-time = "2026-02-26T12:02:41.127Z" },
+]
+
+[[package]]
+name = "array-api-extra"
+version = "0.10.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "array-api-compat" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b2/4d/ff0cb01001385ad31d4798906e96c5e43e17770037a93dc5f33cd44ecd9d/array_api_extra-0.10.3.tar.gz", hash = "sha256:6cabfefe10db45f5eb4c642fc2465646ad0ed017d3774fc16d763486b31ee5ae", size = 94321, upload-time = "2026-06-03T14:34:31.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/16/b950ac4018ee149924ca1155ea749c5d175800b78234f0941fdfcb79233b/array_api_extra-0.10.3-py3-none-any.whl", hash = "sha256:4968892e6641b8d2b6f5e4fdcdbd979951f601411436948f4342831a626ba03c", size = 91055, upload-time = "2026-06-03T14:34:29.766Z" },
+]
+
 [[package]]
 name = "cfgv"
 version = "3.4.0"
@@ -1056,9 +1077,11 @@ wheels = [
 
 [[package]]
 name = "scoringrules"
-version = "0.10.0"
+version = "0.11.0"
 source = { editable = "." }
 dependencies = [
+    { name = "array-api-compat" },
+    { name = "array-api-extra" },
     { name = "numpy" },
     { name = "scipy" },
 ]
@@ -1084,6 +1107,8 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "array-api-compat", specifier = ">=1.9" },
+    { name = "array-api-extra", specifier = ">=0.5" },
     { name = "jax", marker = "extra == 'jax'", specifier = ">=0.4.31" },
     { name = "numba", marker = "extra == 'numba'", specifier = ">=0.60.0" },
     { name = "numpy", specifier = ">=2.0.0" },

From 1b7996dd32ce9d4a8f1f38758d56c8f070967675 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:09:50 +0200
Subject: [PATCH 02/26] Remove vendored array-api copies in favour of pip
 dependencies

---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 45599db..965b51c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,4 +153,3 @@ _devlog/
 tests/output
 .devcontainer/
 docs/generated
-scoringrules/vendored

From 7fdd6f30b42f633303ebdc922323ae4b29e81dbf Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:10:05 +0200
Subject: [PATCH 03/26] Narrow coverage omit so the array-api extension layer
 is measured

---
 pyproject.toml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6d76671..8bfbfd9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,8 +60,10 @@ ignore = ["E741"]
 [tool.coverage.run]
 omit = [
     "**/_gufuncs.py", # numba gufuncs are not python code
+    "**/_gufuncs_w.py",
     "**/_gufunc.py",
-    "scoringrules/backend/*.py", # superfluous
+    "scoringrules/backend/base.py", # ABC, being removed
+    "scoringrules/backend/registry.py", # backend glue
     "scoringrules/core/typing.py" # only type hints
 ]
 

From 1f89e8e12667c3cb20d1748cd0529754ec656d70 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:13:07 +0200
Subject: [PATCH 04/26] Add array-api augmented namespace with input inference

---
 scoringrules/backend/namespace.py | 138 ++++++++++++++++++++++++++++++
 tests/test_namespace.py           |  38 ++++++++
 2 files changed, 176 insertions(+)
 create mode 100644 scoringrules/backend/namespace.py
 create mode 100644 tests/test_namespace.py

diff --git a/scoringrules/backend/namespace.py b/scoringrules/backend/namespace.py
new file mode 100644
index 0000000..8591f7e
--- /dev/null
+++ b/scoringrules/backend/namespace.py
@@ -0,0 +1,138 @@
+"""An array-API namespace augmented with the special functions and helpers
+scoringrules needs that the standard does not provide.
+
+The wrapper delegates every standard array-API op to the framework namespace
+inferred from the input arrays (via array-api-compat) and adds the missing
+special functions and a few non-standard helpers (see ``extensions``)."""
+
+import numpy as np
+from array_api_compat import array_namespace, is_array_api_obj
+
+
+class ArrayAPINamespace:
+    """A superset of an array-API namespace (bound to ``xp`` at call sites)."""
+
+    def __init__(self, xp):
+        self._xp = xp
+
+    # --- standard ops: delegate everything else to the framework namespace ---
+    def __getattr__(self, name):
+        return getattr(self._xp, name)
+
+    # --- linear algebra (thin delegations so call sites stay mechanical) ---
+    def norm(self, x, axis=None):
+        return self._xp.linalg.vector_norm(x, axis=axis)
+
+    def inv(self, x):
+        return self._xp.linalg.inv(x)
+
+    def det(self, x):
+        return self._xp.linalg.det(x)
+
+    # --- non-standard helpers ---
+    def gather(self, x, ind, axis):
+        return self._xp.take_along_axis(x, ind, axis=axis)
+
+    def apply_along_axis(self, func1d, x, axis):
+        from . import extensions
+
+        return extensions.apply_along_axis(self._xp, func1d, x, axis)
+
+    def cov(self, x, rowvar=True, bias=False):
+        from . import extensions
+
+        return extensions.cov(self._xp, x, rowvar=rowvar, bias=bias)
+
+    def indices(self, dimensions):
+        from . import extensions
+
+        return extensions.indices(self._xp, dimensions)
+
+    # --- special functions (native-first, scipy fallback) ---
+    def erf(self, x):
+        from . import extensions
+
+        return extensions.erf(self._xp, x)
+
+    def gamma(self, x):
+        from . import extensions
+
+        return extensions.gamma(self._xp, x)
+
+    def gammainc(self, x, y):
+        from . import extensions
+
+        return extensions.gammainc(self._xp, x, y)
+
+    def gammalinc(self, x, y):
+        from . import extensions
+
+        return extensions.gammalinc(self._xp, x, y)
+
+    def gammauinc(self, x, y):
+        from . import extensions
+
+        return extensions.gammauinc(self._xp, x, y)
+
+    def beta(self, x, y):
+        from . import extensions
+
+        return extensions.beta(self._xp, x, y)
+
+    def betainc(self, x, y, z):
+        from . import extensions
+
+        return extensions.betainc(self._xp, x, y, z)
+
+    def mbessel0(self, x):
+        from . import extensions
+
+        return extensions.mbessel0(self._xp, x)
+
+    def mbessel1(self, x):
+        from . import extensions
+
+        return extensions.mbessel1(self._xp, x)
+
+    def hypergeometric(self, a, b, c, z):
+        from . import extensions
+
+        return extensions.hypergeometric(self._xp, a, b, c, z)
+
+    def expi(self, x):
+        from . import extensions
+
+        return extensions.expi(self._xp, x)
+
+    def comb(self, n, k):
+        from . import extensions
+
+        return extensions.comb(self._xp, n, k)
+
+    def factorial(self, n):
+        from . import extensions
+
+        return extensions.factorial(self._xp, n)
+
+
+_NUMPY_NS = ArrayAPINamespace(array_namespace(np.empty(0)))
+
+
+def get_namespace(*arrays):
+    """Return the augmented array-API namespace for ``arrays``.
+
+    - all-scalar / list / ``None`` inputs (no recognised array) -> numpy;
+    - arrays from one framework -> that framework's namespace;
+    - arrays from different frameworks -> ``ValueError``.
+    """
+    candidates = [a for a in arrays if is_array_api_obj(a)]
+    if not candidates:
+        return _NUMPY_NS
+    try:
+        xp = array_namespace(*candidates)
+    except TypeError as err:  # array-api-compat raises on multiple namespaces
+        raise ValueError(
+            "Inputs come from multiple array frameworks; convert all inputs to a "
+            "single framework (e.g. all numpy, all jax, or all torch)."
+        ) from err
+    return ArrayAPINamespace(xp)
diff --git a/tests/test_namespace.py b/tests/test_namespace.py
new file mode 100644
index 0000000..7edec1b
--- /dev/null
+++ b/tests/test_namespace.py
@@ -0,0 +1,38 @@
+import numpy as np
+import pytest
+from scoringrules.backend.namespace import get_namespace
+
+
+def test_numpy_inference_standard_ops():
+    x = np.asarray([1.0, -2.0, 3.0])
+    xp = get_namespace(x)
+    out = xp.sum(xp.abs(x))
+    assert np.asarray(out) == pytest.approx(6.0)
+
+
+def test_all_scalar_inputs_fall_back_to_numpy():
+    xp = get_namespace(0.3, 0.7, 1.1)
+    out = xp.asarray([1.0, 2.0])
+    assert isinstance(np.asarray(out), np.ndarray)
+    assert np.asarray(xp.sum(out)) == pytest.approx(3.0)
+
+
+def test_list_inputs_fall_back_to_numpy():
+    xp = get_namespace([1.0, 2.0, 3.0])
+    out = xp.asarray([1.0, 2.0, 3.0])
+    assert np.asarray(xp.sum(out)) == pytest.approx(6.0)
+
+
+@pytest.mark.skip(
+    reason="special-function methods exercised once extensions.py lands (later task)"
+)
+def test_special_function_method_present():
+    pass
+
+
+def test_mixed_namespaces_raise():
+    torch = pytest.importorskip("torch")
+    a = np.asarray([1.0, 2.0])
+    b = torch.tensor([1.0, 2.0])
+    with pytest.raises(ValueError, match="single framework|multiple|mixed"):
+        get_namespace(a, b)

From bbc248dd5cc72c42eb0861d207645792edb3f812 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:13:33 +0200
Subject: [PATCH 05/26] Export get_namespace from scoringrules.backend

---
 scoringrules/backend/__init__.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scoringrules/backend/__init__.py b/scoringrules/backend/__init__.py
index ef934e2..cdb1bda 100644
--- a/scoringrules/backend/__init__.py
+++ b/scoringrules/backend/__init__.py
@@ -1,3 +1,4 @@
+from .namespace import ArrayAPINamespace, get_namespace
 from .registry import BackendsRegistry
 
 backends = BackendsRegistry()
@@ -7,4 +8,10 @@ def register_backend(backend):
     backends.register_backend(backend)
 
 
-__all__ = ["backends", "BackendsRegistry", "register_backend"]
+__all__ = [
+    "backends",
+    "BackendsRegistry",
+    "register_backend",
+    "get_namespace",
+    "ArrayAPINamespace",
+]

From e0d30a1e193155593892a9872c5737ebecf937e6 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:21:32 +0200
Subject: [PATCH 06/26] Guard namespace __getattr__ recursion and cover
 linalg/gather delegation

---
 scoringrules/backend/namespace.py |  7 +++++++
 tests/test_namespace.py           | 22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/scoringrules/backend/namespace.py b/scoringrules/backend/namespace.py
index 8591f7e..4e2f88a 100644
--- a/scoringrules/backend/namespace.py
+++ b/scoringrules/backend/namespace.py
@@ -17,10 +17,17 @@ def __init__(self, xp):
 
     # --- standard ops: delegate everything else to the framework namespace ---
     def __getattr__(self, name):
+        # Guard against infinite recursion if ``_xp`` is missing (e.g. a
+        # partially-constructed instance): without it, ``self._xp`` would
+        # re-enter __getattr__("_xp") forever.
+        if name == "_xp":
+            raise AttributeError("ArrayAPINamespace._xp is not set")
         return getattr(self._xp, name)
 
     # --- linear algebra (thin delegations so call sites stay mechanical) ---
     def norm(self, x, axis=None):
+        # array-API standard spelling of the existing backends' linalg.norm
+        # (equivalent for the L2/vector norm scoringrules uses).
         return self._xp.linalg.vector_norm(x, axis=axis)
 
     def inv(self, x):
diff --git a/tests/test_namespace.py b/tests/test_namespace.py
index 7edec1b..0b1a381 100644
--- a/tests/test_namespace.py
+++ b/tests/test_namespace.py
@@ -23,6 +23,28 @@ def test_list_inputs_fall_back_to_numpy():
     assert np.asarray(xp.sum(out)) == pytest.approx(6.0)
 
 
+def test_norm_delegates_to_linalg():
+    x = np.asarray([[3.0, 4.0]])
+    xp = get_namespace(x)
+    assert np.asarray(xp.norm(x, axis=-1)) == pytest.approx([5.0])
+
+
+def test_gather_delegates_to_take_along_axis():
+    x = np.asarray([[10.0, 20.0, 30.0]])
+    ind = np.asarray([[2, 0, 1]])
+    xp = get_namespace(x)
+    out = np.asarray(xp.gather(x, ind, axis=-1))
+    assert out.ravel().tolist() == pytest.approx([30.0, 10.0, 20.0])
+
+
+def test_missing_xp_raises_attributeerror_not_recursion():
+    from scoringrules.backend.namespace import ArrayAPINamespace
+
+    obj = ArrayAPINamespace.__new__(ArrayAPINamespace)  # _xp never set
+    with pytest.raises(AttributeError):
+        obj.sum  # noqa: B018
+
+
 @pytest.mark.skip(
     reason="special-function methods exercised once extensions.py lands (later task)"
 )

From 3612d820aa734d9ab79c4b3b05236ae48f3bbc07 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:26:16 +0200
Subject: [PATCH 07/26] Add per-framework special-function support audit

---
 docs/special_functions_audit.md | 88 +++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 docs/special_functions_audit.md

diff --git a/docs/special_functions_audit.md b/docs/special_functions_audit.md
new file mode 100644
index 0000000..1e04e21
--- /dev/null
+++ b/docs/special_functions_audit.md
@@ -0,0 +1,88 @@
+# Special-function support audit
+
+`scoringrules` needs a handful of special functions that the array-API standard
+does not provide. The extension layer (`scoringrules/backend/extensions.py`)
+dispatches **native-first** — using each framework's own implementation where it
+exists (so autograd and device placement are preserved) — and falls back to
+`scipy.special` only for plain numpy.
+
+This document records, per function and per framework, whether a usable path
+exists for the **forward** evaluation and for the **gradient**. The values below
+were produced by probing the actually-installed frameworks (jax, torch, scipy)
+on this branch, not from memory.
+
+## Legend
+
+- **PASS** — a usable native path exists. For jax this is `jax.scipy.special`;
+  for torch it is either `torch.special` or a differentiable composition built
+  from native primitives (`exp`/`lgamma`); for numpy it is `scipy.special`. The
+  forward value is correct and, where the *grad* column says PASS, the gradient
+  is finite.
+- **PARTIAL (numpy round-trip)** — only reachable by detouring through
+  numpy/scipy, which breaks the framework's native array type and autograd. No
+  special function currently needs this; the only numpy round-trip in the layer
+  is the `apply_along_axis` helper on torch (see *Non-standard helpers* below).
+- **BLOCKED** — no native and no differentiable path. The scipy fallback cannot
+  be used because, on that framework, calling scipy raises (torch refuses
+  `numpy()` on grad-requiring tensors; see probe output). These functions must
+  receive numpy or jax inputs. The extension layer raises an explicit
+  `NotImplementedError` for them on torch.
+
+## Probe results (this branch)
+
+```
+JAX native:   erf T  gamma T  gammainc T  gammaincc T  beta T  betainc T  i0 T  i1 T  hyp2f1 T  expi T  factorial T  comb F
+TORCH native: erf T  gamma F  gammainc T  gammaincc T  beta F  betainc F  i0 T  i1 T  hyp2f1 F  expi F  factorial F  comb F
+torch scipy-fallback under grad: FAIL (RuntimeError: can't call numpy() on a tensor that requires grad)
+jax   scipy-fallback under grad: PASS  (jax traces through scipy.special for these ufuncs)
+```
+
+Notes from the probe that shaped the implementation:
+
+- `jax.scipy.special.hyp2f1` **exists and is correct** (the legacy backend had it
+  commented out): `hyp2f1(1,1,2,0.5) = 1.386294`, matching scipy.
+- `jax.scipy.special.expi` accepts **0-d input** fine (`expi(1.0) = 1.895118`),
+  so no scalar-reshape workaround is needed.
+- jax has **no** `comb`, and composing it as `factorial(n) // (factorial(k)·
+  factorial(n-k))` is wrong: jax `factorial` is float32, and floor-division of
+  `120.0001 // 12.00001` rounds to **9.0** instead of 10. The layer therefore
+  composes `comb` with floating division and `round`, which gives 10.0 on numpy,
+  jax, and torch.
+- torch has no `gamma`/`beta`/`factorial`, but all three are built from the
+  native, differentiable `torch.lgamma`, so they are PASS (forward + grad).
+
+## Support matrix
+
+| fn | numpy | jax fwd | jax grad | torch fwd | torch grad | notes |
+|----|-------|---------|----------|-----------|------------|-------|
+| `erf` | PASS | PASS | PASS | PASS | PASS | `jax.scipy.special.erf` / `torch.special.erf` |
+| `gamma` | PASS | PASS | PASS | PASS | PASS | torch via `exp(lgamma)` |
+| `gammainc` (reg. lower) | PASS | PASS | PASS | PASS | PASS | `torch.special.gammainc` |
+| `gammalinc` (unreg. lower) | PASS | PASS | PASS | PASS | PASS | composed `gammainc·gamma` |
+| `gammauinc` (unreg. upper) | PASS | PASS | PASS | PASS | PASS | composed `gammaincc·gamma`; `torch.special.gammaincc` present |
+| `beta` | PASS | PASS | PASS | PASS | PASS | torch via `lgamma` |
+| `betainc` (reg. incomplete) | PASS | PASS | PASS | BLOCKED | BLOCKED | no torch native; scipy raises under grad |
+| `mbessel0` (`i0`) | PASS | PASS | PASS | PASS | PASS | `torch.special.i0` |
+| `mbessel1` (`i1`) | PASS | PASS | PASS | PASS | PASS | `torch.special.i1` |
+| `hypergeometric` (`hyp2f1`) | PASS | PASS | PASS | BLOCKED | BLOCKED | jax: `jax.scipy.special.hyp2f1`; torch: none |
+| `expi` | PASS | PASS | PASS | BLOCKED | BLOCKED | no torch native; jax 0-d works |
+| `factorial` | PASS | PASS | PASS | PASS | PASS | torch via `exp(lgamma(n+1))` |
+| `comb` | PASS | PASS | n/a | PASS | n/a | composed from `factorial` with `/ + round`; discrete, so gradient is not meaningful |
+
+`n/a` for `comb` grad: `comb` is integer-valued and routed through `round`, so it
+has no meaningful gradient (this matched the old `//` form, which was also
+non-differentiable). `factorial` itself, built from the smooth `lgamma`, *is*
+differentiable.
+
+## Non-standard helpers
+
+These are not special functions but are also provided by the extension layer:
+
+- `apply_along_axis` — numpy/jax have native paths (jax via `vmap` with a
+  `jnp.apply_along_axis` fallback); **torch has no native equivalent**, so it is
+  the one **numpy round-trip** in the layer (PARTIAL): the per-slice callable is
+  evaluated through `numpy.apply_along_axis` and the result re-wrapped as a
+  tensor. This breaks autograd on torch and should be avoided in grad-sensitive
+  code paths.
+- `cov` — `jnp.cov` / `torch.cov` (with `correction` mapped from `bias`) / `np.cov`.
+- `indices` — `jnp.indices` / `np.indices` (torch wraps `np.indices`).

From a6c5da6f45804169b5628eb036afb5428a4bf9aa Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:28:02 +0200
Subject: [PATCH 08/26] Add native-first special-function extension layer

---
 scoringrules/backend/extensions.py | 210 +++++++++++++++++++++++++++++
 tests/test_extensions.py           |  68 ++++++++++
 2 files changed, 278 insertions(+)
 create mode 100644 scoringrules/backend/extensions.py
 create mode 100644 tests/test_extensions.py

diff --git a/scoringrules/backend/extensions.py b/scoringrules/backend/extensions.py
new file mode 100644
index 0000000..8cbe353
--- /dev/null
+++ b/scoringrules/backend/extensions.py
@@ -0,0 +1,210 @@
+"""Special functions and non-standard helpers missing from the array-API
+standard. Each takes the (compat) namespace ``xp`` and dispatches native-first,
+falling back to scipy only where no native implementation exists.
+
+The forward/gradient support matrix is documented in
+``docs/special_functions_audit.md``."""
+
+import os
+
+os.environ.setdefault("SCIPY_ARRAY_API", "1")
+
+import numpy as np  # noqa: E402
+import scipy.special as sp  # noqa: E402
+
+
+def _kind(xp):
+    name = getattr(xp, "__name__", "")
+    if "torch" in name:
+        return "torch"
+    if "jax" in name:
+        return "jax"
+    return "numpy"
+
+
+def _jsp():
+    import jax.scipy.special as jsp
+
+    return jsp
+
+
+def _ts():
+    import torch
+
+    return torch
+
+
+# --- special functions ---
+
+
+def erf(xp, x):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().erf(x)
+    if k == "torch":
+        return _ts().special.erf(x)
+    return sp.erf(x)
+
+
+def gamma(xp, x):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().gamma(x)
+    if k == "torch":
+        return _ts().exp(_ts().lgamma(x))
+    return sp.gamma(x)
+
+
+def gammainc(xp, x, y):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().gammainc(x, y)
+    if k == "torch":
+        return _ts().special.gammainc(x, y)
+    return sp.gammainc(x, y)
+
+
+def gammalinc(xp, x, y):
+    """Lower incomplete gamma (unregularised)."""
+    return gammainc(xp, x, y) * gamma(xp, x)
+
+
+def gammauinc(xp, x, y):
+    """Upper incomplete gamma (unregularised)."""
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().gammaincc(x, y) * _jsp().gamma(x)
+    if k == "torch":
+        return _ts().special.gammaincc(x, y) * _ts().exp(_ts().lgamma(x))
+    return sp.gammaincc(x, y) * sp.gamma(x)
+
+
+def beta(xp, x, y):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().beta(x, y)
+    if k == "torch":
+        t = _ts()
+        return t.exp(t.lgamma(x) + t.lgamma(y) - t.lgamma(x + y))
+    return sp.beta(x, y)
+
+
+def betainc(xp, x, y, z):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().betainc(x, y, z)
+    if k == "torch":
+        raise NotImplementedError(
+            "betainc has no native torch implementation and the scipy fallback "
+            "does not support autograd; pass numpy/jax arrays for this score."
+        )
+    return sp.betainc(x, y, z)
+
+
+def mbessel0(xp, x):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().i0(x)
+    if k == "torch":
+        return _ts().special.i0(x)
+    return sp.i0(x)
+
+
+def mbessel1(xp, x):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().i1(x)
+    if k == "torch":
+        return _ts().special.i1(x)
+    return sp.i1(x)
+
+
+def hypergeometric(xp, a, b, c, z):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().hyp2f1(a, b, c, z)
+    if k == "torch":
+        raise NotImplementedError(
+            "hyp2f1 has no native torch implementation; pass numpy/jax arrays."
+        )
+    return sp.hyp2f1(a, b, c, z)
+
+
+def expi(xp, x):
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().expi(x)
+    if k == "torch":
+        raise NotImplementedError(
+            "expi has no native torch implementation; pass numpy/jax arrays."
+        )
+    return sp.expi(x)
+
+
+def factorial(xp, n):
+    kd = _kind(xp)
+    if kd == "jax":
+        return _jsp().factorial(n)
+    if kd == "torch":
+        t = _ts()
+        return t.exp(t.lgamma(n + 1))
+    return sp.factorial(n)
+
+
+def comb(xp, n, k):
+    # Compose from factorial. Use floating division + round rather than floor
+    # division: jax's factorial is float32, and ``120.0001 // 12.00001`` floors
+    # to 9.0 instead of 10.0. Rounding the exact-in-float ratio is robust on
+    # numpy/jax/torch. comb is integer-valued, so this is not differentiable
+    # (matching the old floor-division form).
+    ratio = factorial(xp, n) / (factorial(xp, k) * factorial(xp, n - k))
+    return xp.round(ratio)
+
+
+# --- non-standard helpers ---
+
+
+def apply_along_axis(xp, func1d, x, axis):
+    k = _kind(xp)
+    if k == "jax":
+        import jax
+        import jax.numpy as jnp
+
+        try:
+            shape = list(x.shape)
+            return jax.vmap(func1d)(x.reshape(-1, shape.pop(axis))).reshape(shape)
+        except Exception:
+            return jnp.apply_along_axis(func1d, axis, x)
+    if k == "numpy":
+        return np.apply_along_axis(func1d, axis, x)
+    t = _ts()
+    return t.as_tensor(
+        np.apply_along_axis(
+            lambda a: np.asarray(func1d(t.as_tensor(a))), axis, np.asarray(x)
+        )
+    )
+
+
+def cov(xp, x, rowvar=True, bias=False):
+    k = _kind(xp)
+    if k == "jax":
+        import jax.numpy as jnp
+
+        return jnp.cov(x, rowvar=rowvar, bias=bias)
+    if k == "torch":
+        t = _ts()
+        if not rowvar:
+            x = x.T
+        return t.cov(x, correction=0 if bias else 1)
+    return np.cov(x, rowvar=rowvar, bias=bias)
+
+
+def indices(xp, dimensions):
+    k = _kind(xp)
+    if k == "jax":
+        import jax.numpy as jnp
+
+        return jnp.indices(dimensions)
+    if k == "torch":
+        return _ts().as_tensor(np.indices(dimensions))
+    return np.indices(dimensions)
diff --git a/tests/test_extensions.py b/tests/test_extensions.py
new file mode 100644
index 0000000..70717f0
--- /dev/null
+++ b/tests/test_extensions.py
@@ -0,0 +1,68 @@
+import numpy as np
+import pytest
+from scoringrules.backend import extensions as ext
+from array_api_compat import array_namespace
+
+NP = array_namespace(np.empty(0))
+
+
+def test_erf_numpy():
+    x = np.asarray([0.0, 1.0])
+    out = np.asarray(ext.erf(NP, x))
+    assert out[0] == pytest.approx(0.0)
+    assert out[1] == pytest.approx(0.8427007929, abs=1e-6)
+
+
+def test_gamma_numpy():
+    x = np.asarray([1.0, 2.0, 3.0])
+    out = np.asarray(ext.gamma(NP, x))
+    assert out == pytest.approx([1.0, 1.0, 2.0])
+
+
+def test_betainc_numpy():
+    out = np.asarray(ext.betainc(NP, np.asarray(2.0), np.asarray(3.0), np.asarray(0.5)))
+    assert out == pytest.approx(0.6875, abs=1e-6)
+
+
+def test_comb_numpy():
+    out = np.asarray(ext.comb(NP, np.asarray(5), np.asarray(2)))
+    assert out == pytest.approx(10.0)
+
+
+def test_erf_matches_across_backends(backend):
+    mods = {"numpy": "numpy", "numba": "numpy", "jax": "jax.numpy", "torch": "torch"}
+    xp_mod = pytest.importorskip(mods[backend])
+    import numpy as np
+
+    ref = np.asarray([0.1, 0.5, 1.0, 2.0])
+    x = xp_mod.tensor(ref) if backend == "torch" else xp_mod.asarray(ref)
+    from array_api_compat import array_namespace
+    from scoringrules.backend import extensions as ext
+
+    out = np.asarray(ext.erf(array_namespace(x), x))
+    assert out == pytest.approx(
+        np.asarray(ext.erf(array_namespace(ref), ref)), abs=1e-5
+    )
+
+
+def test_torch_betainc_blocked():
+    pytest.importorskip("torch")
+    import torch
+    from array_api_compat import array_namespace
+    from scoringrules.backend import extensions as ext
+
+    a = torch.tensor([2.0])
+    with pytest.raises(NotImplementedError):
+        ext.betainc(array_namespace(a), a, torch.tensor([3.0]), torch.tensor([0.5]))
+
+
+def test_gamma_grad_jax():
+    jax = pytest.importorskip("jax")
+    import jax.numpy as jnp
+    from array_api_compat import array_namespace
+    from scoringrules.backend import extensions as ext
+
+    g = jax.grad(lambda v: ext.gamma(array_namespace(v), v).sum())(
+        jnp.asarray([1.5, 2.5])
+    )
+    assert jnp.all(jnp.isfinite(g))

From 64e4ad6d040ebcc283dd069326baad029ba52399 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:38:32 +0200
Subject: [PATCH 09/26] Fix jax apply_along_axis axis handling; drop dead
 SCIPY_ARRAY_API; expand extension tests

---
 docs/special_functions_audit.md    |  3 +-
 scoringrules/backend/extensions.py | 30 +++++++++++-------
 tests/test_extensions.py           | 51 ++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/docs/special_functions_audit.md b/docs/special_functions_audit.md
index 1e04e21..18ef3ff 100644
--- a/docs/special_functions_audit.md
+++ b/docs/special_functions_audit.md
@@ -83,6 +83,7 @@ These are not special functions but are also provided by the extension layer:
   the one **numpy round-trip** in the layer (PARTIAL): the per-slice callable is
   evaluated through `numpy.apply_along_axis` and the result re-wrapped as a
   tensor. This breaks autograd on torch and should be avoided in grad-sensitive
-  code paths.
+  code paths; it also requires CPU tensors, since `numpy()` raises on CUDA
+  tensors.
 - `cov` — `jnp.cov` / `torch.cov` (with `correction` mapped from `bias`) / `np.cov`.
 - `indices` — `jnp.indices` / `np.indices` (torch wraps `np.indices`).
diff --git a/scoringrules/backend/extensions.py b/scoringrules/backend/extensions.py
index 8cbe353..589ea8c 100644
--- a/scoringrules/backend/extensions.py
+++ b/scoringrules/backend/extensions.py
@@ -5,12 +5,11 @@
 The forward/gradient support matrix is documented in
 ``docs/special_functions_audit.md``."""
 
-import os
+import numpy as np
+import scipy.special as sp
 
-os.environ.setdefault("SCIPY_ARRAY_API", "1")
-
-import numpy as np  # noqa: E402
-import scipy.special as sp  # noqa: E402
+# NOTE: the scipy branches below only ever receive numpy arrays (jax/torch
+# dispatch natively or raise), so SCIPY_ARRAY_API is irrelevant here.
 
 
 def _kind(xp):
@@ -66,7 +65,12 @@ def gammainc(xp, x, y):
 
 def gammalinc(xp, x, y):
     """Lower incomplete gamma (unregularised)."""
-    return gammainc(xp, x, y) * gamma(xp, x)
+    k = _kind(xp)
+    if k == "jax":
+        return _jsp().gammainc(x, y) * _jsp().gamma(x)
+    if k == "torch":
+        return _ts().special.gammainc(x, y) * _ts().exp(_ts().lgamma(x))
+    return sp.gammainc(x, y) * sp.gamma(x)
 
 
 def gammauinc(xp, x, y):
@@ -142,10 +146,10 @@ def expi(xp, x):
 
 
 def factorial(xp, n):
-    kd = _kind(xp)
-    if kd == "jax":
+    k = _kind(xp)
+    if k == "jax":
         return _jsp().factorial(n)
-    if kd == "torch":
+    if k == "torch":
         t = _ts()
         return t.exp(t.lgamma(n + 1))
     return sp.factorial(n)
@@ -171,8 +175,12 @@ def apply_along_axis(xp, func1d, x, axis):
         import jax.numpy as jnp
 
         try:
-            shape = list(x.shape)
-            return jax.vmap(func1d)(x.reshape(-1, shape.pop(axis))).reshape(shape)
+            # vmap fast path (scalar-returning func1d): move the target axis last
+            # so the reshape groups slices correctly for ANY axis, not just -1.
+            xm = jnp.moveaxis(x, axis, -1)
+            shape = list(xm.shape)
+            n = shape.pop()
+            return jax.vmap(func1d)(xm.reshape(-1, n)).reshape(shape)
         except Exception:
             return jnp.apply_along_axis(func1d, axis, x)
     if k == "numpy":
diff --git a/tests/test_extensions.py b/tests/test_extensions.py
index 70717f0..4a36158 100644
--- a/tests/test_extensions.py
+++ b/tests/test_extensions.py
@@ -29,6 +29,43 @@ def test_comb_numpy():
     assert out == pytest.approx(10.0)
 
 
+def test_factorial_numpy():
+    out = np.asarray(ext.factorial(NP, np.asarray(5)))
+    assert out == pytest.approx(120.0)
+
+
+def test_gammalinc_numpy():
+    # lower incomplete gamma (unregularised): gamma(2)*P(2, 1) = 1 - 2/e
+    out = np.asarray(ext.gammalinc(NP, np.asarray(2.0), np.asarray(1.0)))
+    assert out == pytest.approx(0.2642411177, abs=1e-6)
+
+
+def test_gammauinc_numpy():
+    # upper incomplete gamma (unregularised): gamma(1)*Q(1, 0.5) = exp(-0.5)
+    out = np.asarray(ext.gammauinc(NP, np.asarray(1.0), np.asarray(0.5)))
+    assert out == pytest.approx(0.6065306597, abs=1e-6)
+
+
+def test_apply_along_axis_numpy_both_axes():
+    x = np.arange(6.0).reshape(2, 3)
+    # scalar-returning func1d; check both the last axis and a non-last axis
+    last = np.asarray(ext.apply_along_axis(NP, np.sum, x, -1))
+    assert last.tolist() == pytest.approx([3.0, 12.0])
+    first = np.asarray(ext.apply_along_axis(NP, np.sum, x, 0))
+    assert first.tolist() == pytest.approx([3.0, 5.0, 7.0])
+
+
+def test_apply_along_axis_jax_non_last_axis():
+    pytest.importorskip("jax")
+    import jax.numpy as jnp
+    from array_api_compat import array_namespace
+
+    x = jnp.arange(6.0).reshape(2, 3)
+    # regression guard: the vmap fast path must handle axis=0, not just axis=-1
+    out = np.asarray(ext.apply_along_axis(array_namespace(x), jnp.sum, x, 0))
+    assert out.tolist() == pytest.approx([3.0, 5.0, 7.0])
+
+
 def test_erf_matches_across_backends(backend):
     mods = {"numpy": "numpy", "numba": "numpy", "jax": "jax.numpy", "torch": "torch"}
     xp_mod = pytest.importorskip(mods[backend])
@@ -56,6 +93,20 @@ def test_torch_betainc_blocked():
         ext.betainc(array_namespace(a), a, torch.tensor([3.0]), torch.tensor([0.5]))
 
 
+def test_torch_hypergeometric_and_expi_blocked():
+    pytest.importorskip("torch")
+    import torch
+    from array_api_compat import array_namespace
+    from scoringrules.backend import extensions as ext
+
+    t = torch.tensor([0.5])
+    ns = array_namespace(t)
+    with pytest.raises(NotImplementedError):
+        ext.hypergeometric(ns, t, t, t, t)
+    with pytest.raises(NotImplementedError):
+        ext.expi(ns, t)
+
+
 def test_gamma_grad_jax():
     jax = pytest.importorskip("jax")
     import jax.numpy as jnp

From bdc469277a48d936c85d72b48e47a4f203eeaa51 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:40:46 +0200
Subject: [PATCH 10/26] Default active backend to numpy so numba selection is
 explicit

---
 scoringrules/backend/registry.py | 2 +-
 tests/test_dispatch.py           | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_dispatch.py

diff --git a/scoringrules/backend/registry.py b/scoringrules/backend/registry.py
index a4cd50b..09a63da 100644
--- a/scoringrules/backend/registry.py
+++ b/scoringrules/backend/registry.py
@@ -32,7 +32,7 @@ def __init__(self):
         if _NUMBA_IMPORTED:
             self.register_backend("numba")
 
-        self._active = "numba" if _NUMBA_IMPORTED else "numpy"
+        self._active = "numpy"
 
     @property
     def available_backends(self):
diff --git a/tests/test_dispatch.py b/tests/test_dispatch.py
new file mode 100644
index 0000000..c92c8ba
--- /dev/null
+++ b/tests/test_dispatch.py
@@ -0,0 +1,7 @@
+from scoringrules.backend import backends
+
+
+def test_default_active_is_numpy():
+    # default active must be numpy so that an active value of "numba" can only
+    # come from an explicit set_active call.
+    assert backends._active == "numpy"

From 730423184a0ddeaa0dc5b2ec74888c4ade3dd5f6 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:43:04 +0200
Subject: [PATCH 11/26] Add use_numba dispatch helper and deprecate legacy
 backend selection

---
 scoringrules/_dispatch.py        | 58 ++++++++++++++++++++++++++++++++
 scoringrules/backend/__init__.py |  9 +++++
 scoringrules/backend/registry.py |  9 +++++
 tests/test_dispatch.py           | 29 ++++++++++++++++
 4 files changed, 105 insertions(+)
 create mode 100644 scoringrules/_dispatch.py

diff --git a/scoringrules/_dispatch.py b/scoringrules/_dispatch.py
new file mode 100644
index 0000000..21bf982
--- /dev/null
+++ b/scoringrules/_dispatch.py
@@ -0,0 +1,58 @@
+# scoringrules/_dispatch.py
+"""Backend-argument handling for the array-API era.
+
+``backend`` historically chose both the array library and numba. The library is
+now inferred from the input, so the only meaningful values are ``None`` (infer)
+and ``"numba"`` (gufunc fast-path). The legacy framework strings and the
+registry remain accepted, with a DeprecationWarning, until 1.0."""
+
+import warnings
+
+from array_api_compat import is_array_api_obj
+
+from scoringrules.backend import backends
+
+_LEGACY_ARRAY_API_BACKENDS = {"numpy", "jax", "torch"}
+
+
+def resolve_backend_arg(backend):
+    """Warn on deprecated backend strings; return the value unchanged."""
+    if backend in _LEGACY_ARRAY_API_BACKENDS:
+        warnings.warn(
+            f"Passing backend={backend!r} is deprecated and will be removed in "
+            "1.0. The array framework is now inferred from the input; remove the "
+            "backend argument (use backend='numba' only for the numba fast-path).",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+    return backend
+
+
+def _is_numpy_compatible(*arrays):
+    for a in arrays:
+        if is_array_api_obj(a):
+            name = type(a).__module__
+            if not name.startswith("numpy"):
+                return False
+    return True
+
+
+def use_numba(backend, *arrays):
+    """Decide whether to use the numba gufunc path for this call.
+
+    True iff ``backend == "numba"`` (explicit), or ``backend is None`` and the
+    user globally selected numba via ``set_active("numba")`` AND the inputs are
+    numpy-compatible. A numba request with non-numpy inputs is an error.
+    """
+    explicit = backend == "numba"
+    global_numba = backend is None and backends._active == "numba"
+    if not (explicit or global_numba):
+        return False
+    if not _is_numpy_compatible(*arrays):
+        if explicit:
+            raise ValueError(
+                "backend='numba' requires numpy-compatible inputs; pass numpy "
+                "arrays or drop backend='numba' to use the input's framework."
+            )
+        return False  # global numba default does not apply to jax/torch inputs
+    return True
diff --git a/scoringrules/backend/__init__.py b/scoringrules/backend/__init__.py
index cdb1bda..940488e 100644
--- a/scoringrules/backend/__init__.py
+++ b/scoringrules/backend/__init__.py
@@ -1,3 +1,5 @@
+import warnings
+
 from .namespace import ArrayAPINamespace, get_namespace
 from .registry import BackendsRegistry
 
@@ -5,6 +7,13 @@
 
 
 def register_backend(backend):
+    if backend in {"numpy", "jax", "torch"}:
+        warnings.warn(
+            "register_backend for array-API backends is deprecated and removed "
+            "in 1.0; the framework is inferred from the input.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
     backends.register_backend(backend)
 
 
diff --git a/scoringrules/backend/registry.py b/scoringrules/backend/registry.py
index 09a63da..d841469 100644
--- a/scoringrules/backend/registry.py
+++ b/scoringrules/backend/registry.py
@@ -62,6 +62,15 @@ def __getitem__(self, __key: str) -> ArrayBackend:
             return super().__getitem__(__key)
 
     def set_active(self, backend: str):
+        if backend in {"numpy", "jax", "torch"}:
+            import warnings
+
+            warnings.warn(
+                "set_active for array-API backends is deprecated and removed in "
+                "1.0; the framework is inferred from the input.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         self._active = backend
 
     @property
diff --git a/tests/test_dispatch.py b/tests/test_dispatch.py
index c92c8ba..8d235b7 100644
--- a/tests/test_dispatch.py
+++ b/tests/test_dispatch.py
@@ -1,3 +1,7 @@
+import numpy as np
+import pytest
+
+from scoringrules._dispatch import resolve_backend_arg, use_numba
 from scoringrules.backend import backends
 
 
@@ -5,3 +9,28 @@ def test_default_active_is_numpy():
     # default active must be numpy so that an active value of "numba" can only
     # come from an explicit set_active call.
     assert backends._active == "numpy"
+
+
+def test_use_numba_explicit():
+    assert use_numba("numba", np.asarray([1.0])) is True
+
+
+def test_use_numba_none_default_is_false():
+    assert use_numba(None, np.asarray([1.0])) is False
+
+
+def test_use_numba_numba_with_non_numpy_raises():
+    torch = pytest.importorskip("torch")
+    with pytest.raises(ValueError, match="numpy-compatible"):
+        use_numba("numba", torch.tensor([1.0]))
+
+
+def test_legacy_backend_string_warns():
+    with pytest.warns(DeprecationWarning):
+        resolve_backend_arg("jax")
+
+
+def test_numba_arg_does_not_warn(recwarn):
+    resolve_backend_arg("numba")
+    resolve_backend_arg(None)
+    assert not any(isinstance(w.message, DeprecationWarning) for w in recwarn)

From 7417a2f57388d72e32cbb9a5c0b1fa959c040fd5 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:47:28 +0200
Subject: [PATCH 12/26] Tidy warnings import and test global-numba non-numpy
 path

---
 scoringrules/backend/registry.py |  3 +--
 tests/test_dispatch.py           | 12 ++++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/scoringrules/backend/registry.py b/scoringrules/backend/registry.py
index d841469..814f92c 100644
--- a/scoringrules/backend/registry.py
+++ b/scoringrules/backend/registry.py
@@ -1,4 +1,5 @@
 import typing as tp
+import warnings
 from importlib.util import find_spec
 
 from .base import ArrayBackend
@@ -63,8 +64,6 @@ def __getitem__(self, __key: str) -> ArrayBackend:
 
     def set_active(self, backend: str):
         if backend in {"numpy", "jax", "torch"}:
-            import warnings
-
             warnings.warn(
                 "set_active for array-API backends is deprecated and removed in "
                 "1.0; the framework is inferred from the input.",
diff --git a/tests/test_dispatch.py b/tests/test_dispatch.py
index 8d235b7..f6fedc7 100644
--- a/tests/test_dispatch.py
+++ b/tests/test_dispatch.py
@@ -25,6 +25,18 @@ def test_use_numba_numba_with_non_numpy_raises():
         use_numba("numba", torch.tensor([1.0]))
 
 
+def test_use_numba_global_numba_with_non_numpy_returns_false():
+    # set_active("numba") globally must NOT force the gufunc path on jax/torch
+    # inputs (it only applies to numpy inputs); it returns False, not a raise.
+    jnp = pytest.importorskip("jax.numpy")
+    saved = backends._active
+    backends._active = "numba"
+    try:
+        assert use_numba(None, jnp.asarray([1.0])) is False
+    finally:
+        backends._active = saved
+
+
 def test_legacy_backend_string_warns():
     with pytest.warns(DeprecationWarning):
         resolve_backend_arg("jax")

From a3f425955d8f2ad7d895839c184b5a3d9f659e4a Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:51:10 +0200
Subject: [PATCH 13/26] Add per-backend native-array fixtures and inference
 assertion

---
 tests/conftest.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tests/conftest.py b/tests/conftest.py
index 2d03aad..8f71142 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -64,3 +64,42 @@ def probability_forecasts():
         usecols=(1, -1),
     )
     return data
+
+
+@pytest.fixture()
+def to_backend(backend):
+    """Return a function that converts numpy input to the backend's native array.
+
+    numpy/numba -> numpy ndarray; jax -> jax array; torch -> torch tensor.
+    """
+    import numpy as np
+
+    if backend in ("numpy", "numba"):
+        return lambda x: np.asarray(x)
+    if backend == "jax":
+        import jax.numpy as jnp
+
+        return lambda x: jnp.asarray(x)
+    if backend == "torch":
+        import torch
+
+        return lambda x: torch.as_tensor(np.asarray(x), dtype=torch.float64)
+    raise ValueError(backend)
+
+
+@pytest.fixture()
+def backend_kwargs(backend):
+    """kwargs to pass to a score: only numba needs an explicit backend string."""
+    return {"backend": "numba"} if backend == "numba" else {}
+
+
+def assert_inferred(result, backend):
+    """Assert the result array belongs to the expected framework (guards against
+    silent numpy fallback when a non-numpy backend was requested)."""
+    mod = type(result).__module__
+    if backend == "jax":
+        assert "jax" in mod, f"expected a jax array, got {mod}"
+    elif backend == "torch":
+        assert "torch" in mod, f"expected a torch tensor, got {mod}"
+    else:
+        assert "numpy" in mod, f"expected a numpy array, got {mod}"

From a6429c79fdc1497a1a3fca1c70f9da68f850787e Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 01:51:22 +0200
Subject: [PATCH 14/26] Add xp-parameterised univariate ensemble helpers for
 the CRPS pilot

---
 scoringrules/core/utils_xp.py | 90 +++++++++++++++++++++++++++++++++++
 tests/test_utils_xp.py        | 43 +++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 scoringrules/core/utils_xp.py
 create mode 100644 tests/test_utils_xp.py

diff --git a/scoringrules/core/utils_xp.py b/scoringrules/core/utils_xp.py
new file mode 100644
index 0000000..8f85437
--- /dev/null
+++ b/scoringrules/core/utils_xp.py
@@ -0,0 +1,90 @@
+# scoringrules/core/utils_xp.py
+"""xp-parameterised copies of the univariate ensemble helpers used by the
+migrated CRPS family. Parallel to core/utils.py during the array-API transition;
+the follow-on migration consolidates them."""
+
+import typing as tp
+
+from .utils import _M_AXIS_UV, _shape_compatibility_check
+
+if tp.TYPE_CHECKING:
+    from scoringrules.core.typing import Array, NanPolicy  # noqa: F401
+
+
+def univariate_array_check(obs, fct, m_axis, xp):
+    obs, fct = xp.asarray(obs), xp.asarray(fct)
+    m_axis = m_axis if m_axis >= 0 else fct.ndim + m_axis
+    _shape_compatibility_check(obs, fct, m_axis)
+    if m_axis != _M_AXIS_UV:
+        fct = xp.moveaxis(fct, m_axis, _M_AXIS_UV)
+    return obs, fct
+
+
+def univariate_weight_check(ens_w, fct, m_axis, xp):
+    if ens_w is not None:
+        ens_w = xp.asarray(ens_w)
+        m_axis = m_axis if m_axis >= 0 else fct.ndim + m_axis
+        ens_w = xp.moveaxis(ens_w, m_axis, _M_AXIS_UV)
+        if ens_w.shape != fct.shape:
+            raise ValueError(
+                f"Shape of weights {ens_w.shape} is not compatible with forecast "
+                f"shape {fct.shape}"
+            )
+        if xp.any(ens_w < 0):
+            raise ValueError("`ens_w` contains negative entries")
+        ens_w = ens_w / xp.sum(ens_w, axis=-1, keepdims=True)
+    return ens_w
+
+
+def univariate_sort_ens(
+    fct, ens_w=None, m_axis=-1, estimator=None, sorted_ensemble=False, *, xp
+):
+    sort_ensemble = not sorted_ensemble and estimator in ["qd", "pwm", "int"]
+    if sort_ensemble:
+        ind = xp.argsort(fct, axis=-1)
+        fct = xp.gather(fct, ind, axis=-1)
+    if ens_w is not None:
+        ens_w = univariate_weight_check(ens_w, fct, m_axis, xp=xp)
+        if sort_ensemble:
+            ens_w = xp.gather(ens_w, ind, axis=-1)
+    return fct, ens_w
+
+
+def apply_nan_policy_ens_uv(
+    obs, fct, nan_policy="propagate", ens_w=None, estimator=None, m_axis=-1, *, xp
+):
+    if ens_w is not None:
+        ens_w = xp.moveaxis(xp.asarray(ens_w, dtype=fct.dtype), m_axis, -1)
+
+    if nan_policy == "propagate":
+        return obs, fct, ens_w
+
+    nan_mask = xp.isnan(fct)
+    if ens_w is not None:
+        nan_mask = nan_mask | xp.isnan(ens_w)
+
+    if nan_policy == "raise":
+        if xp.any(nan_mask) or xp.any(xp.isnan(obs)):
+            raise ValueError(
+                "NaN values encountered in input. Use nan_policy='propagate' or "
+                "nan_policy='omit' to handle NaN values."
+            )
+        return obs, fct, ens_w
+
+    if nan_policy == "omit":
+        if estimator in ["int", "akr", "akr_circperm"]:
+            raise NotImplementedError(
+                f"NaN handling with nan_policy='omit' is not implemented for "
+                f"estimator '{estimator}'."
+            )
+        fct = xp.where(xp.isnan(fct), xp.asarray(0.0), fct)
+        if ens_w is None:
+            ens_w = xp.asarray(~nan_mask, dtype=fct.dtype)
+        else:
+            ens_w = xp.where(nan_mask, xp.asarray(0.0), ens_w)
+        return obs, fct, ens_w
+
+    raise ValueError(
+        f"Invalid nan_policy '{nan_policy}'. Must be one of 'propagate', 'omit', "
+        "'raise'."
+    )
diff --git a/tests/test_utils_xp.py b/tests/test_utils_xp.py
new file mode 100644
index 0000000..829bba7
--- /dev/null
+++ b/tests/test_utils_xp.py
@@ -0,0 +1,43 @@
+import numpy as np
+import pytest
+from scoringrules.backend import get_namespace
+from scoringrules.core import utils_xp
+
+
+def test_univariate_array_check_moves_axis():
+    obs = np.random.randn(5)
+    fct = np.random.randn(11, 5)  # ensemble axis first
+    xp = get_namespace(obs, fct)
+    o, f = utils_xp.univariate_array_check(obs, fct, m_axis=0, xp=xp)
+    assert f.shape == (5, 11)
+
+
+def test_nan_policy_propagate_is_noop():
+    obs = np.random.randn(5)
+    fct = np.random.randn(5, 11)
+    xp = get_namespace(obs, fct)
+    o, f, w = utils_xp.apply_nan_policy_ens_uv(obs, fct, "propagate", xp=xp)
+    assert w is None
+
+
+def test_xp_helpers_match_original_numpy():
+    from scoringrules.core import utils as orig
+
+    obs = np.random.randn(4)
+    fct = np.random.randn(4, 7)
+    xp = get_namespace(obs, fct)
+    # weight normalisation parity
+    w = np.abs(np.random.randn(4, 7)) + 0.1
+    w_xp = utils_xp.univariate_weight_check(w.copy(), fct, -1, xp=xp)
+    w_orig = orig.univariate_weight_check(w.copy(), fct, -1, backend="numpy")
+    assert np.asarray(w_xp) == pytest.approx(np.asarray(w_orig))
+    # nan omit parity (mask building)
+    fct_nan = fct.copy()
+    fct_nan[0, [1, 3]] = np.nan
+    _, f_xp, m_xp = utils_xp.apply_nan_policy_ens_uv(
+        obs, fct_nan.copy(), "omit", estimator="nrg", xp=xp
+    )
+    _, f_o, m_o = orig.apply_nan_policy_ens_uv(
+        obs, fct_nan.copy(), "omit", estimator="nrg", m_axis=-1, backend="numpy"
+    )
+    assert np.asarray(m_xp) == pytest.approx(np.asarray(m_o))

From d79808a9f61391caf7411ea8e81feea9ed252d40 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 02:00:42 +0200
Subject: [PATCH 15/26] Migrate CRPS ensemble-estimator core to the array-api
 xp namespace

---
 scoringrules/core/crps/_approx.py   | 131 ++++++++++++---------------
 scoringrules/core/crps/_approx_w.py | 132 ++++++++++++----------------
 tests/test_crps_core_xp.py          |  35 ++++++++
 3 files changed, 145 insertions(+), 153 deletions(-)
 create mode 100644 tests/test_crps_core_xp.py

diff --git a/scoringrules/core/crps/_approx.py b/scoringrules/core/crps/_approx.py
index 3f282a2..7a921de 100644
--- a/scoringrules/core/crps/_approx.py
+++ b/scoringrules/core/crps/_approx.py
@@ -1,32 +1,31 @@
 import typing as tp
 
-from scoringrules.backend import backends
-
 if tp.TYPE_CHECKING:
-    from scoringrules.core.typing import Array, ArrayLike, Backend
+    from scoringrules.core.typing import Array, ArrayLike  # noqa: F401
 
 
 def ensemble(
     obs: "ArrayLike",
     fct: "Array",
     estimator: str = "pwm",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for a finite ensemble."""
     if estimator == "nrg":
-        out = _crps_ensemble_nrg(obs, fct, backend=backend)
+        out = _crps_ensemble_nrg(obs, fct, xp=xp)
     elif estimator == "pwm":
-        out = _crps_ensemble_pwm(obs, fct, backend=backend)
+        out = _crps_ensemble_pwm(obs, fct, xp=xp)
     elif estimator == "fair":
-        out = _crps_ensemble_fair(obs, fct, backend=backend)
+        out = _crps_ensemble_fair(obs, fct, xp=xp)
     elif estimator == "qd":
-        out = _crps_ensemble_qd(obs, fct, backend=backend)
+        out = _crps_ensemble_qd(obs, fct, xp=xp)
     elif estimator == "akr":
-        out = _crps_ensemble_akr(obs, fct, backend=backend)
+        out = _crps_ensemble_akr(obs, fct, xp=xp)
     elif estimator == "akr_circperm":
-        out = _crps_ensemble_akr_circperm(obs, fct, backend=backend)
+        out = _crps_ensemble_akr_circperm(obs, fct, xp=xp)
     elif estimator == "int":
-        out = _crps_ensemble_int(obs, fct, backend=backend)
+        out = _crps_ensemble_int(obs, fct, xp=xp)
     else:
         raise ValueError(
             f"{estimator} not a valid estimator, must be one of 'nrg', 'fair', 'pwm', 'qd', 'akr', 'akr_circperm' and 'int'."
@@ -34,103 +33,81 @@ def ensemble(
     return out
 
 
-def _crps_ensemble_fair(
-    obs: "Array", fct: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_fair(obs: "Array", fct: "Array", *, xp) -> "Array":
     """Fair version of the CRPS estimator based on the energy form."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    e_1 = B.sum(B.abs(obs[..., None] - fct), axis=-1) / M
-    e_2 = B.sum(
-        B.abs(fct[..., None] - fct[..., None, :]),
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct), axis=-1) / M
+    e_2 = xp.sum(
+        xp.abs(fct[..., None] - fct[..., None, :]),
         axis=(-1, -2),
     ) / (M * (M - 1))
     return e_1 - 0.5 * e_2
 
 
-def _crps_ensemble_nrg(
-    obs: "Array", fct: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_nrg(obs: "Array", fct: "Array", *, xp) -> "Array":
     """CRPS estimator based on the energy form."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    e_1 = B.sum(B.abs(obs[..., None] - fct), axis=-1) / M
-    e_2 = B.sum(B.abs(fct[..., None] - fct[..., None, :]), (-1, -2)) / (M**2)
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct), axis=-1) / M
+    e_2 = xp.sum(xp.abs(fct[..., None] - fct[..., None, :]), axis=(-1, -2)) / (M**2)
     return e_1 - 0.5 * e_2
 
 
-def _crps_ensemble_pwm(
-    obs: "Array", fct: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_pwm(obs: "Array", fct: "Array", *, xp) -> "Array":
     """CRPS estimator based on the probability weighted moment (PWM) form."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    expected_diff = B.sum(B.abs(obs[..., None] - fct), axis=-1) / M
-    β_0 = B.sum(fct, axis=-1) / M
-    β_1 = B.sum(fct * B.arange(0, M), axis=-1) / (M * (M - 1.0))
+    expected_diff = xp.sum(xp.abs(obs[..., None] - fct), axis=-1) / M
+    β_0 = xp.sum(fct, axis=-1) / M
+    β_1 = xp.sum(fct * xp.arange(0, M), axis=-1) / (M * (M - 1.0))
     return expected_diff + β_0 - 2.0 * β_1
 
 
-def _crps_ensemble_akr(
-    obs: "Array", fct: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_akr(obs: "Array", fct: "Array", *, xp) -> "Array":
     """CRPS estimator based on the approximate kernel representation."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    e_1 = B.sum(B.abs(obs[..., None] - fct), axis=-1) / M
-    e_2 = B.sum(B.abs(fct - B.roll(fct, shift=1, axis=-1)), -1) / M
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct), axis=-1) / M
+    e_2 = xp.sum(xp.abs(fct - xp.roll(fct, shift=1, axis=-1)), axis=-1) / M
     return e_1 - 0.5 * e_2
 
 
-def _crps_ensemble_akr_circperm(
-    obs: "Array", fct: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_akr_circperm(obs: "Array", fct: "Array", *, xp) -> "Array":
     """CRPS estimator based on the AKR with cyclic permutation."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    e_1 = B.sum(B.abs(obs[..., None] - fct), axis=-1) / M
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct), axis=-1) / M
     shift = M // 2
-    e_2 = B.sum(B.abs(fct - B.roll(fct, shift=shift, axis=-1)), -1) / M
+    e_2 = xp.sum(xp.abs(fct - xp.roll(fct, shift=shift, axis=-1)), axis=-1) / M
     return e_1 - 0.5 * e_2
 
 
-def _crps_ensemble_qd(obs: "Array", fct: "Array", backend: "Backend" = None) -> "Array":
+def _crps_ensemble_qd(obs: "Array", fct: "Array", *, xp) -> "Array":
     """CRPS estimator based on the quantile decomposition form."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    alpha = B.arange(1, M + 1) - 0.5
+    alpha = xp.arange(1, M + 1) - 0.5
     below = (fct <= obs[..., None]) * alpha * (obs[..., None] - fct)
     above = (fct > obs[..., None]) * (M - alpha) * (fct - obs[..., None])
-    out = B.sum(below + above, axis=-1) / (M**2)
+    out = xp.sum(below + above, axis=-1) / (M**2)
     return 2 * out
 
 
-def _crps_ensemble_int(
-    obs: "Array", fct: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_int(obs: "Array", fct: "Array", *, xp) -> "Array":
     """CRPS estimator based on the integral representation."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    y_pos = B.mean((fct <= obs[..., None]) * 1.0, axis=-1, keepdims=True)
-    fct_cdf = B.zeros(fct.shape) + B.arange(1, M + 1) / M
-    fct_cdf = B.concat((fct_cdf, y_pos), axis=-1)
-    fct_cdf = B.sort(fct_cdf, axis=-1)
-    fct_exp = B.concat((fct, obs[..., None]), axis=-1)
-    fct_exp = B.sort(fct_exp, axis=-1)
+    y_pos = xp.mean((fct <= obs[..., None]) * 1.0, axis=-1, keepdims=True)
+    fct_cdf = xp.zeros(fct.shape) + xp.arange(1, M + 1) / M
+    fct_cdf = xp.concat((fct_cdf, y_pos), axis=-1)
+    fct_cdf = xp.sort(fct_cdf, axis=-1)
+    fct_exp = xp.concat((fct, obs[..., None]), axis=-1)
+    fct_exp = xp.sort(fct_exp, axis=-1)
     fct_dif = fct_exp[..., 1:] - fct_exp[..., :M]
     obs_cdf = (obs[..., None] <= fct_exp) * 1.0
     out = fct_dif * (fct_cdf[..., :M] - obs_cdf[..., :M]) ** 2
-    return B.sum(out, axis=-1)
+    return xp.sum(out, axis=-1)
 
 
-def quantile_pinball(
-    obs: "Array", fct: "Array", alpha: "Array", backend: "Backend" = None
-) -> "Array":
+def quantile_pinball(obs: "Array", fct: "Array", alpha: "Array", *, xp) -> "Array":
     """CRPS approximation via Pinball Loss."""
-    B = backends.active if backend is None else backends[backend]
     below = (fct <= obs[..., None]) * alpha * (obs[..., None] - fct)
     above = (fct > obs[..., None]) * (1 - alpha) * (fct - obs[..., None])
-    return 2 * B.mean(below + above, axis=-1)
+    return 2 * xp.mean(below + above, axis=-1)
 
 
 def ow_ensemble(
@@ -138,15 +115,15 @@ def ow_ensemble(
     fct: "Array",
     ow: "Array",
     fw: "Array",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Outcome-Weighted CRPS estimator based on the energy form."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    wbar = B.mean(fw, axis=-1)
-    e_1 = B.sum(B.abs(obs[..., None] - fct) * fw, axis=-1) * ow / (M * wbar)
-    e_2 = B.sum(
-        B.abs(fct[..., None] - fct[..., None, :]) * fw[..., None] * fw[..., None, :],
+    wbar = xp.mean(fw, axis=-1)
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct) * fw, axis=-1) * ow / (M * wbar)
+    e_2 = xp.sum(
+        xp.abs(fct[..., None] - fct[..., None, :]) * fw[..., None] * fw[..., None, :],
         axis=(-1, -2),
     )
     e_2 *= ow / (M**2 * wbar**2)
@@ -158,17 +135,17 @@ def vr_ensemble(
     fct: "Array",
     ow: "Array",
     fw: "Array",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Vertically Re-scaled CRPS estimator based on the energy form."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    e_1 = B.sum(B.abs(obs[..., None] - fct) * fw, axis=-1) * ow / M
-    e_2 = B.sum(
-        B.abs(B.expand_dims(fct, axis=-1) - B.expand_dims(fct, axis=-2))
-        * (B.expand_dims(fw, axis=-1) * B.expand_dims(fw, axis=-2)),
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct) * fw, axis=-1) * ow / M
+    e_2 = xp.sum(
+        xp.abs(xp.expand_dims(fct, axis=-1) - xp.expand_dims(fct, axis=-2))
+        * (xp.expand_dims(fw, axis=-1) * xp.expand_dims(fw, axis=-2)),
         axis=(-1, -2),
     ) / (M**2)
-    e_3 = B.mean(B.abs(fct) * fw, axis=-1) - B.abs(obs) * ow
-    e_3 *= B.mean(fw, axis=1) - ow
+    e_3 = xp.mean(xp.abs(fct) * fw, axis=-1) - xp.abs(obs) * ow
+    e_3 *= xp.mean(fw, axis=1) - ow
     return e_1 - 0.5 * e_2 + e_3
diff --git a/scoringrules/core/crps/_approx_w.py b/scoringrules/core/crps/_approx_w.py
index 8625d99..de70a57 100644
--- a/scoringrules/core/crps/_approx_w.py
+++ b/scoringrules/core/crps/_approx_w.py
@@ -1,9 +1,7 @@
 import typing as tp
 
-from scoringrules.backend import backends
-
 if tp.TYPE_CHECKING:
-    from scoringrules.core.typing import Array, ArrayLike, Backend
+    from scoringrules.core.typing import Array, ArrayLike  # noqa: F401
 
 
 def ensemble_w(
@@ -11,23 +9,24 @@ def ensemble_w(
     fct: "Array",
     ens_w: "Array" = None,
     estimator: str = "pwm",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for a finite weighted ensemble."""
     if estimator == "nrg":
-        out = _crps_ensemble_nrg_w(obs, fct, ens_w, backend=backend)
+        out = _crps_ensemble_nrg_w(obs, fct, ens_w, xp=xp)
     elif estimator == "pwm":
-        out = _crps_ensemble_pwm_w(obs, fct, ens_w, backend=backend)
+        out = _crps_ensemble_pwm_w(obs, fct, ens_w, xp=xp)
     elif estimator == "fair":
-        out = _crps_ensemble_fair_w(obs, fct, ens_w, backend=backend)
+        out = _crps_ensemble_fair_w(obs, fct, ens_w, xp=xp)
     elif estimator == "qd":
-        out = _crps_ensemble_qd_w(obs, fct, ens_w, backend=backend)
+        out = _crps_ensemble_qd_w(obs, fct, ens_w, xp=xp)
     elif estimator == "akr":
-        out = _crps_ensemble_akr_w(obs, fct, ens_w, backend=backend)
+        out = _crps_ensemble_akr_w(obs, fct, ens_w, xp=xp)
     elif estimator == "akr_circperm":
-        out = _crps_ensemble_akr_circperm_w(obs, fct, ens_w, backend=backend)
+        out = _crps_ensemble_akr_circperm_w(obs, fct, ens_w, xp=xp)
     elif estimator == "int":
-        out = _crps_ensemble_int_w(obs, fct, ens_w, backend=backend)
+        out = _crps_ensemble_int_w(obs, fct, ens_w, xp=xp)
     else:
         raise ValueError(
             f"{estimator} not a valid estimator, must be one of 'nrg', 'fair', 'pwm', 'qd', 'akr', 'akr_circperm' and 'int'."
@@ -36,98 +35,79 @@ def ensemble_w(
     return out
 
 
-def _crps_ensemble_fair_w(
-    obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_fair_w(obs: "Array", fct: "Array", w: "Array", *, xp) -> "Array":
     """Fair version of the CRPS estimator based on the energy form."""
-    B = backends.active if backend is None else backends[backend]
-    e_1 = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1)
-    e_2 = B.sum(
-        B.abs(fct[..., None] - fct[..., None, :]) * w[..., None] * w[..., None, :],
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct) * w, axis=-1)
+    e_2 = xp.sum(
+        xp.abs(fct[..., None] - fct[..., None, :]) * w[..., None] * w[..., None, :],
         axis=(-1, -2),
-    ) / (1 - B.sum(w * w, axis=-1))
+    ) / (1 - xp.sum(w * w, axis=-1))
     return e_1 - 0.5 * e_2
 
 
-def _crps_ensemble_nrg_w(
-    obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_nrg_w(obs: "Array", fct: "Array", w: "Array", *, xp) -> "Array":
     """CRPS estimator based on the energy form."""
-    B = backends.active if backend is None else backends[backend]
-    e_1 = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1)
-    e_2 = B.sum(
-        B.abs(fct[..., None] - fct[..., None, :]) * w[..., None] * w[..., None, :],
-        (-1, -2),
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct) * w, axis=-1)
+    e_2 = xp.sum(
+        xp.abs(fct[..., None] - fct[..., None, :]) * w[..., None] * w[..., None, :],
+        axis=(-1, -2),
     )
     return e_1 - 0.5 * e_2
 
 
-def _crps_ensemble_pwm_w(
-    obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_pwm_w(obs: "Array", fct: "Array", w: "Array", *, xp) -> "Array":
     """CRPS estimator based on the probability weighted moment (PWM) form."""
-    B = backends.active if backend is None else backends[backend]
-    w_sum = B.cumsum(w, axis=-1)
-    expected_diff = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1)
-    β_0 = B.sum(fct * w, axis=-1)
-    β_1 = B.sum(fct * w * (w_sum - w), axis=-1) / (1 - B.sum(w * w, axis=-1))
+    w_sum = xp.cumsum(w, axis=-1)
+    expected_diff = xp.sum(xp.abs(obs[..., None] - fct) * w, axis=-1)
+    β_0 = xp.sum(fct * w, axis=-1)
+    β_1 = xp.sum(fct * w * (w_sum - w), axis=-1) / (1 - xp.sum(w * w, axis=-1))
     return expected_diff + β_0 - 2.0 * β_1
 
 
-def _crps_ensemble_qd_w(
-    obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_qd_w(obs: "Array", fct: "Array", w: "Array", *, xp) -> "Array":
     """CRPS estimator based on the quantile score decomposition."""
-    B = backends.active if backend is None else backends[backend]
-    w_sum = B.cumsum(w, axis=-1)
+    w_sum = xp.cumsum(w, axis=-1)
     a = w_sum - 0.5 * w
     dif = fct - obs[..., None]
-    c = B.where(dif > 0, 1 - a, -a)
-    s = B.sum(w * c * dif, axis=-1)
+    c = xp.where(dif > 0, 1 - a, -a)
+    s = xp.sum(w * c * dif, axis=-1)
     return 2 * s
 
 
-def _crps_ensemble_akr_w(
-    obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_akr_w(obs: "Array", fct: "Array", w: "Array", *, xp) -> "Array":
     """CRPS estimator based on the approximate kernel representation."""
-    B = backends.active if backend is None else backends[backend]
     M = fct.shape[-1]
-    e_1 = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1)
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct) * w, axis=-1)
     ind = [(i + 1) % M for i in range(M)]
-    e_2 = B.sum(B.abs(fct[..., ind] - fct) * w[..., ind], axis=-1)
+    e_2 = xp.sum(xp.abs(fct[..., ind] - fct) * w[..., ind], axis=-1)
     return e_1 - 0.5 * e_2
 
 
 def _crps_ensemble_akr_circperm_w(
-    obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None
+    obs: "Array", fct: "Array", w: "Array", *, xp
 ) -> "Array":
     """CRPS estimator based on the AKR with cyclic permutation."""
-    B = backends.active if backend is None else backends[backend]
     M = fct.shape[-1]
-    e_1 = B.sum(B.abs(obs[..., None] - fct) * w, axis=-1)
+    e_1 = xp.sum(xp.abs(obs[..., None] - fct) * w, axis=-1)
     shift = int((M - 1) / 2)
     ind = [(i + shift) % M for i in range(M)]
-    e_2 = B.sum(B.abs(fct[..., ind] - fct) * w[..., ind], axis=-1)
+    e_2 = xp.sum(xp.abs(fct[..., ind] - fct) * w[..., ind], axis=-1)
     return e_1 - 0.5 * e_2
 
 
-def _crps_ensemble_int_w(
-    obs: "Array", fct: "Array", w: "Array", backend: "Backend" = None
-) -> "Array":
+def _crps_ensemble_int_w(obs: "Array", fct: "Array", w: "Array", *, xp) -> "Array":
     """CRPS estimator based on the integral representation."""
-    B = backends.active if backend is None else backends[backend]
     M: int = fct.shape[-1]
-    y_pos = B.sum((fct <= obs[..., None]) * w, axis=-1, keepdims=True)
-    fct_cdf = B.cumsum(w, axis=-1)
-    fct_cdf = B.concat((fct_cdf, y_pos), axis=-1)
-    fct_cdf = B.sort(fct_cdf, axis=-1)
-    fct_exp = B.concat((fct, obs[..., None]), axis=-1)
-    fct_exp = B.sort(fct_exp, axis=-1)
+    y_pos = xp.sum((fct <= obs[..., None]) * w, axis=-1, keepdims=True)
+    fct_cdf = xp.cumsum(w, axis=-1)
+    fct_cdf = xp.concat((fct_cdf, y_pos), axis=-1)
+    fct_cdf = xp.sort(fct_cdf, axis=-1)
+    fct_exp = xp.concat((fct, obs[..., None]), axis=-1)
+    fct_exp = xp.sort(fct_exp, axis=-1)
     fct_dif = fct_exp[..., 1:] - fct_exp[..., :M]
     obs_cdf = (obs[..., None] <= fct_exp) * 1.0
     out = fct_dif * (fct_cdf[..., :M] - obs_cdf[..., :M]) ** 2
-    return B.sum(out, axis=-1)
+    return xp.sum(out, axis=-1)
 
 
 def ow_ensemble_w(
@@ -136,16 +116,16 @@ def ow_ensemble_w(
     ow: "Array",
     fw: "Array",
     ens_w: "Array",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Outcome-Weighted CRPS for an ensemble forecast."""
-    B = backends.active if backend is None else backends[backend]
-    wbar = B.sum(ens_w * fw, axis=-1)
-    e_1 = B.sum(ens_w * B.abs(obs[..., None] - fct) * fw, axis=-1) * ow / wbar
-    e_2 = B.sum(
+    wbar = xp.sum(ens_w * fw, axis=-1)
+    e_1 = xp.sum(ens_w * xp.abs(obs[..., None] - fct) * fw, axis=-1) * ow / wbar
+    e_2 = xp.sum(
         ens_w[..., None]
         * ens_w[..., None, :]
-        * B.abs(fct[..., None] - fct[..., None, :])
+        * xp.abs(fct[..., None] - fct[..., None, :])
         * fw[..., None]
         * fw[..., None, :],
         axis=(-1, -2),
@@ -160,19 +140,19 @@ def vr_ensemble_w(
     ow: "Array",
     fw: "Array",
     ens_w: "Array",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Vertically Re-scaled CRPS for an ensemble forecast."""
-    B = backends.active if backend is None else backends[backend]
-    e_1 = B.sum(ens_w * B.abs(obs[..., None] - fct) * fw, axis=-1) * ow
-    e_2 = B.sum(
+    e_1 = xp.sum(ens_w * xp.abs(obs[..., None] - fct) * fw, axis=-1) * ow
+    e_2 = xp.sum(
         ens_w[..., None]
         * ens_w[..., None, :]
-        * B.abs(fct[..., None] - fct[..., None, :])
+        * xp.abs(fct[..., None] - fct[..., None, :])
         * fw[..., None]
         * fw[..., None, :],
         axis=(-1, -2),
     )
-    e_3 = B.sum(ens_w * B.abs(fct) * fw, axis=-1) - B.abs(obs) * ow
-    e_3 *= B.sum(ens_w * fw, axis=-1) - ow
+    e_3 = xp.sum(ens_w * xp.abs(fct) * fw, axis=-1) - xp.abs(obs) * ow
+    e_3 *= xp.sum(ens_w * fw, axis=-1) - ow
     return e_1 - 0.5 * e_2 + e_3
diff --git a/tests/test_crps_core_xp.py b/tests/test_crps_core_xp.py
new file mode 100644
index 0000000..a982af8
--- /dev/null
+++ b/tests/test_crps_core_xp.py
@@ -0,0 +1,35 @@
+import numpy as np
+import pytest
+from scoringrules.backend import get_namespace
+from scoringrules.core import crps
+
+ESTIMATORS = ["nrg", "fair", "pwm", "qd", "int"]
+
+
+@pytest.mark.parametrize("estimator", ESTIMATORS)
+def test_core_ensemble_matches_numpy(estimator, backend, to_backend):
+    rng = np.random.default_rng(0)
+    obs_np = rng.standard_normal(6)
+    fct_np = np.sort(rng.standard_normal((6, 9)), axis=-1)
+    obs, fct = to_backend(obs_np), to_backend(fct_np)
+    out = np.asarray(crps.ensemble(obs, fct, estimator, xp=get_namespace(obs, fct)))
+    ref = np.asarray(
+        crps.ensemble(obs_np, fct_np, estimator, xp=get_namespace(obs_np, fct_np))
+    )
+    assert out == pytest.approx(ref, abs=1e-4)
+    if estimator not in ("akr", "akr_circperm"):
+        assert np.all(out >= -1e-6)
+
+
+def test_core_ensemble_w_runs(backend, to_backend):
+    rng = np.random.default_rng(1)
+    obs_np = rng.standard_normal(5)
+    fct_np = rng.standard_normal((5, 8))
+    w_np = np.abs(rng.standard_normal((5, 8))) + 0.1
+    w_np = w_np / w_np.sum(-1, keepdims=True)
+    obs, fct, w = to_backend(obs_np), to_backend(fct_np), to_backend(w_np)
+    out = np.asarray(crps.ensemble_w(obs, fct, w, "nrg", xp=get_namespace(obs, fct)))
+    ref = np.asarray(
+        crps.ensemble_w(obs_np, fct_np, w_np, "nrg", xp=get_namespace(obs_np, fct_np))
+    )
+    assert out == pytest.approx(ref, abs=1e-4)

From a1b3c1d8b29cfa0b91211351e1661e8f45b0e9e0 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 02:14:25 +0200
Subject: [PATCH 16/26] Migrate CRPS closed-form scores to the array-api xp
 namespace

---
 scoringrules/core/crps/_closed.py | 486 +++++++++++++++---------------
 scoringrules/core/stats_xp.py     | 199 ++++++++++++
 tests/test_crps_core_xp.py        |  50 +++
 3 files changed, 491 insertions(+), 244 deletions(-)
 create mode 100644 scoringrules/core/stats_xp.py

diff --git a/scoringrules/core/crps/_closed.py b/scoringrules/core/crps/_closed.py
index 713c6aa..812198d 100644
--- a/scoringrules/core/crps/_closed.py
+++ b/scoringrules/core/crps/_closed.py
@@ -1,7 +1,6 @@
 import typing as tp
 
-from scoringrules.backend import backends
-from scoringrules.core.stats import (
+from scoringrules.core.stats_xp import (
     _binom_cdf,
     _binom_pdf,
     _exp_cdf,
@@ -21,7 +20,7 @@
 )
 
 if tp.TYPE_CHECKING:
-    from scoringrules.core.typing import Array, ArrayLike, Backend
+    from scoringrules.core.typing import Array, ArrayLike
 
 
 def beta(
@@ -30,27 +29,27 @@ def beta(
     b: "ArrayLike",
     lower: "ArrayLike" = 0.0,
     upper: "ArrayLike" = 1.0,
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the beta distribution."""
-    B = backends.active if backend is None else backends[backend]
-    obs, a, b, lower, upper = map(B.asarray, (obs, a, b, lower, upper))
+    obs, a, b, lower, upper = map(xp.asarray, (obs, a, b, lower, upper))
 
     if _is_scalar_value(lower, 0.0) and _is_scalar_value(upper, 1.0):
         special_limits = False
     else:
-        if B.any(lower >= upper):
+        if xp.any(lower >= upper):
             raise ValueError("lower must be less than upper")
         special_limits = True
 
     if special_limits:
         obs = (obs - lower) / (upper - lower)
 
-    I_ab = B.betainc(a, b, obs)
-    I_a1b = B.betainc(a + 1, b, obs)
-    F_ab = B.minimum(B.maximum(I_ab, 0), 1)
-    F_a1b = B.minimum(B.maximum(I_a1b, 0), 1)
-    bet_rat = 2 * B.beta(2 * a, 2 * b) / (a * B.beta(a, b) ** 2)
+    I_ab = xp.betainc(a, b, obs)
+    I_a1b = xp.betainc(a + 1, b, obs)
+    F_ab = xp.minimum(xp.maximum(I_ab, 0), 1)
+    F_a1b = xp.minimum(xp.maximum(I_a1b, 0), 1)
+    bet_rat = 2 * xp.beta(2 * a, 2 * b) / (a * xp.beta(a, b) ** 2)
     s = obs * (2 * F_ab - 1) + (a / (a + b)) * (1 - 2 * F_a1b - bet_rat)
 
     if special_limits:
@@ -63,7 +62,8 @@ def binomial(
     obs: "ArrayLike",
     n: "ArrayLike",
     prob: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the binomial distribution.
 
@@ -72,22 +72,21 @@ def binomial(
     This is a bit of a hacky implementation, due to how the arrays
     must be broadcasted, but it should work for now.
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, n, prob = map(B.asarray, (obs, n, prob))
+    obs, n, prob = map(xp.asarray, (obs, n, prob))
     ones_like_n = 0.0 * n + 1
 
     def _inner(params):
         obs, n, prob = params
-        x = B.arange(0, n + 1)
-        w = _binom_pdf(x, n, prob)
-        a = _binom_cdf(x, n, prob) - 0.5 * w
-        s = 2 * B.sum(w * ((obs < x) - a) * (x - obs))
+        x = xp.arange(0, n + 1)
+        w = _binom_pdf(x, n, prob, xp=xp)
+        a = _binom_cdf(x, n, prob, xp=xp) - 0.5 * w
+        s = 2 * xp.sum(w * ((obs < x) - a) * (x - obs))
         return s
 
     # if n is a scalar, then if needed we must broadcast k and p to the same shape as n
-    # TODO: implement B.broadcast() for backends
+    # TODO: implement xp.broadcast() for backends
     if n.size == 1:
-        x = B.arange(0, n + 1)
+        x = xp.arange(0, n + 1)
         need_broadcast = not (obs.size == 1 and prob.size == 1)
 
         if need_broadcast:
@@ -98,9 +97,9 @@ def _inner(params):
             prob = prob * ones_like_n
             obs = obs * ones_like_n
 
-        w = _binom_pdf(x, n, prob)
-        a = _binom_cdf(x, n, prob) - 0.5 * w
-        s = 2 * B.sum(
+        w = _binom_pdf(x, n, prob, xp=xp)
+        a = _binom_cdf(x, n, prob, xp=xp) - 0.5 * w
+        s = 2 * xp.sum(
             w * ((obs < x) - a) * (x - obs), axis=-1 if need_broadcast else None
         )
 
@@ -110,24 +109,21 @@ def _inner(params):
         prob = prob * ones_like_n if prob.size == 1 else prob
 
         # option 1: in a loop
-        s = B.stack(
+        s = xp.stack(
             [_inner(params) for params in zip(obs, n, prob, strict=True)],
             axis=-1,
         )
 
         # option 2: apply_along_axis (does not work with JAX)
-        # s = B.apply_along_axis(_inner, B.stack((obs, n, prob), axis=-1), -1)
+        # s = xp.apply_along_axis(_inner, xp.stack((obs, n, prob), axis=-1), -1)
 
     return s
 
 
-def exponential(
-    obs: "ArrayLike", rate: "ArrayLike", backend: "Backend" = None
-) -> "Array":
+def exponential(obs: "ArrayLike", rate: "ArrayLike", *, xp) -> "Array":
     """Compute the CRPS for the exponential distribution."""
-    B = backends.active if backend is None else backends[backend]
-    rate, obs = map(B.asarray, (rate, obs))
-    s = B.abs(obs) - (2 * _exp_cdf(obs, rate, backend=backend) / rate) + 1 / (2 * rate)
+    rate, obs = map(xp.asarray, (rate, obs))
+    s = xp.abs(obs) - (2 * _exp_cdf(obs, rate, xp=xp) / rate) + 1 / (2 * rate)
     return s
 
 
@@ -136,22 +132,22 @@ def exponentialM(
     mass: "ArrayLike",
     location: "ArrayLike",
     scale: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the standard exponential distribution with a point mass at the boundary."""
-    B = backends.active if backend is None else backends[backend]
-    obs, location, scale, mass = map(B.asarray, (obs, location, scale, mass))
+    obs, location, scale, mass = map(xp.asarray, (obs, location, scale, mass))
 
     if not _is_scalar_value(location, 0.0):
         obs -= location
 
     a = 1.0 if _is_scalar_value(mass, 0.0) else 1 - mass
-    s = B.abs(obs)
+    s = xp.abs(obs)
 
     if _is_scalar_value(scale, 1.0):
-        s -= a * (2 * _exp_cdf(obs, 1.0, backend=backend) - 0.5 * a)
+        s -= a * (2 * _exp_cdf(obs, 1.0, xp=xp) - 0.5 * a)
     else:
-        s -= scale * a * (2 * _exp_cdf(obs, 1 / scale, backend=backend) - 0.5 * a)
+        s -= scale * a * (2 * _exp_cdf(obs, 1 / scale, xp=xp) - 0.5 * a)
 
     return s
 
@@ -161,19 +157,19 @@ def twopexponential(
     scale1: "ArrayLike",
     scale2: "ArrayLike",
     location: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the two-piece exponential distribution."""
-    B = backends.active if backend is None else backends[backend]
-    scale1, scale2, location, obs = map(B.asarray, (scale1, scale2, location, obs))
+    scale1, scale2, location, obs = map(xp.asarray, (scale1, scale2, location, obs))
     obs = obs - location
-    z = B.abs(obs)
+    z = xp.abs(obs)
     c1 = 2 * (scale1**2) / (scale1 + scale2)
     c2 = 2 * (scale2**2) / (scale1 + scale2)
     c3 = (scale1**3 + scale2**3) / (2 * (scale1 + scale2) ** 2)
-    s_1 = z + c1 * B.exp(-z / scale1) - c1 + c3
-    s_2 = z + c2 * B.exp(-z / scale2) - c2 + c3
-    s = B.where(obs < 0.0, s_1, s_2)
+    s_1 = z + c1 * xp.exp(-z / scale1) - c1 + c3
+    s_2 = z + c2 * xp.exp(-z / scale2) - c2 + c3
+    s = xp.where(obs < 0.0, s_1, s_2)
     return s
 
 
@@ -181,17 +177,17 @@ def gamma(
     obs: "ArrayLike",
     shape: "ArrayLike",
     rate: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the gamma distribution."""
-    B = backends.active if backend is None else backends[backend]
-    obs, shape, rate = map(B.asarray, (obs, shape, rate))
-    F_ab = _gamma_cdf(obs, shape, rate, backend=backend)
-    F_ab1 = _gamma_cdf(obs, shape + 1, rate, backend=backend)
+    obs, shape, rate = map(xp.asarray, (obs, shape, rate))
+    F_ab = _gamma_cdf(obs, shape, rate, xp=xp)
+    F_ab1 = _gamma_cdf(obs, shape + 1, rate, xp=xp)
     s = (
         obs * (2 * F_ab - 1)
         - (shape / rate) * (2 * F_ab1 - 1)
-        - 1 / (rate * B.beta(B.asarray(0.5), shape))
+        - 1 / (rate * xp.beta(xp.asarray(0.5), shape))
     )
     return s
 
@@ -201,20 +197,22 @@ def csg0(
     shape: "ArrayLike",
     rate: "ArrayLike",
     shift: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the censored, shifted gamma distribution."""
-    B = backends.active if backend is None else backends[backend]
-    obs, shape, rate, shift = map(B.asarray, (obs, shape, rate, shift))
+    obs, shape, rate, shift = map(xp.asarray, (obs, shape, rate, shift))
     obs_shifted = obs + shift
-    F_ab_shifted = _gamma_cdf(obs_shifted, shape, rate, backend=backend)
-    F_2ab_2d = _gamma_cdf(2 * shift, 2 * shape, rate, backend=backend)
-    F_ab_d = _gamma_cdf(shift, shape, rate, backend=backend)
-    F_ab1_d = _gamma_cdf(shift, shape + 1, rate, backend=backend)
-    F_ab1_shifted = _gamma_cdf(obs_shifted, shape + 1, rate, backend=backend)
+    F_ab_shifted = _gamma_cdf(obs_shifted, shape, rate, xp=xp)
+    F_2ab_2d = _gamma_cdf(2 * shift, 2 * shape, rate, xp=xp)
+    F_ab_d = _gamma_cdf(shift, shape, rate, xp=xp)
+    F_ab1_d = _gamma_cdf(shift, shape + 1, rate, xp=xp)
+    F_ab1_shifted = _gamma_cdf(obs_shifted, shape + 1, rate, xp=xp)
     s = (
         obs_shifted * (2 * F_ab_shifted - 1)
-        - (shape / (rate * B.pi)) * B.beta(B.asarray(0.5), shape + 0.5) * (1 - F_2ab_2d)
+        - (shape / (rate * xp.pi))
+        * xp.beta(xp.asarray(0.5), shape + 0.5)
+        * (1 - F_2ab_2d)
         + shape / rate * (1 + 2 * F_ab_d * F_ab1_d - F_ab_d**2 - 2 * F_ab1_shifted)
         - shift * F_ab_d**2
     )
@@ -229,11 +227,11 @@ def gev(
     shape: "ArrayLike",
     location: "ArrayLike",
     scale: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the GEV distribution."""
-    B = backends.active if backend is None else backends[backend]
-    obs, shape, location, scale = map(B.asarray, (obs, shape, location, scale))
+    obs, shape, location, scale = map(xp.asarray, (obs, shape, location, scale))
 
     obs = (obs - location) / scale
     # if not _is_scalar_value(location, 0.0):
@@ -243,32 +241,32 @@ def gev(
     # obs /= scale
 
     def _gev_adjust_fn(s, xi, f_xi):
-        res = B.nan * s
+        res = xp.nan * s
         p_xi = xi > 0
         n_xi = xi < 0
         n_inv_xi = -1 / xi
 
-        gen_res = n_inv_xi * f_xi + B.gammauinc(1 - xi, -B.log(f_xi)) / xi
+        gen_res = n_inv_xi * f_xi + xp.gammauinc(1 - xi, -xp.log(f_xi)) / xi
 
-        res = B.where(p_xi & (s <= n_inv_xi), 0, res)
-        res = B.where(p_xi & (s > n_inv_xi), gen_res, res)
+        res = xp.where(p_xi & (s <= n_inv_xi), 0, res)
+        res = xp.where(p_xi & (s > n_inv_xi), gen_res, res)
 
-        res = B.where(n_xi & (s < n_inv_xi), gen_res, res)
-        res = B.where(n_xi & (s >= n_inv_xi), n_inv_xi + B.gamma(1 - xi) / xi, res)
+        res = xp.where(n_xi & (s < n_inv_xi), gen_res, res)
+        res = xp.where(n_xi & (s >= n_inv_xi), n_inv_xi + xp.gamma(1 - xi) / xi, res)
 
         return res
 
-    F_xi = _gev_cdf(obs, shape, backend=backend)
+    F_xi = _gev_cdf(obs, shape, xp=xp)
     zero_shape = shape == 0.0
-    shape = B.where(~zero_shape, shape, B.nan)
+    shape = xp.where(~zero_shape, shape, xp.nan)
     G_xi = _gev_adjust_fn(obs, shape, F_xi)
 
-    out = B.where(
+    out = xp.where(
         zero_shape,
-        -obs - 2 * B.expi(B.log(F_xi)) + EULERMASCHERONI - B.log(2),
+        -obs - 2 * xp.expi(xp.log(F_xi)) + EULERMASCHERONI - xp.log(2),
         obs * (2 * F_xi - 1)
         - 2 * G_xi
-        - (1 - (2 - 2**shape) * B.gamma(1 - shape)) / shape,
+        - (1 - (2 - 2**shape) * xp.gamma(1 - shape)) / shape,
     )
 
     out = out * scale
@@ -282,19 +280,19 @@ def gpd(
     location: "ArrayLike",
     scale: "ArrayLike",
     mass: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the GPD distribution."""
-    B = backends.active if backend is None else backends[backend]
     shape, location, scale, mass, obs = map(
-        B.asarray, (shape, location, scale, mass, obs)
+        xp.asarray, (shape, location, scale, mass, obs)
     )
-    shape = B.where(shape < 1.0, shape, B.nan)
-    mass = B.where((mass >= 0.0) & (mass <= 1.0), mass, B.nan)
+    shape = xp.where(shape < 1.0, shape, xp.nan)
+    mass = xp.where((mass >= 0.0) & (mass <= 1.0), mass, xp.nan)
     ω = (obs - location) / scale
-    F_xi = _gpd_cdf(ω, shape, backend=backend)
+    F_xi = _gpd_cdf(ω, shape, xp=xp)
     s = (
-        B.abs(ω)
+        xp.abs(ω)
         - 2 * (1 - mass) * (1 - (1 - F_xi) ** (1 - shape)) / (1 - shape)
         + ((1 - mass) ** 2) / (2 - shape)
     )
@@ -309,44 +307,44 @@ def gtclogistic(
     upper: "ArrayLike",
     lmass: "ArrayLike",
     umass: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the generalised truncated and censored logistic distribution."""
-    B = backends.active if backend is None else backends[backend]
     obs, mu, sigma, lower, upper, lmass, umass = map(
-        B.asarray, (obs, location, scale, lower, upper, lmass, umass)
+        xp.asarray, (obs, location, scale, lower, upper, lmass, umass)
     )
     ω = (obs - mu) / sigma
     u = (upper - mu) / sigma
     l = (lower - mu) / sigma
-    z = B.minimum(B.maximum(ω, l), u)
-    F_u = _logis_cdf(u, backend=backend)
-    F_l = _logis_cdf(l, backend=backend)
-    F_mu = _logis_cdf(-u, backend=backend)
-    F_ml = _logis_cdf(-l, backend=backend)
-    F_mz = _logis_cdf(-z, backend=backend)
+    z = xp.minimum(xp.maximum(ω, l), u)
+    F_u = _logis_cdf(u, xp=xp)
+    F_l = _logis_cdf(l, xp=xp)
+    F_mu = _logis_cdf(-u, xp=xp)
+    F_ml = _logis_cdf(-l, xp=xp)
+    F_mz = _logis_cdf(-z, xp=xp)
 
     u_inf = u == float("inf")
     l_inf = l == float("-inf")
 
-    F_mu = B.where(u_inf | l_inf, B.nan, F_mu)
-    F_ml = B.where(u_inf | l_inf, B.nan, F_ml)
-    u = B.where(u_inf, B.nan, u)
-    l = B.where(l_inf, B.nan, l)
+    F_mu = xp.where(u_inf | l_inf, xp.nan, F_mu)
+    F_ml = xp.where(u_inf | l_inf, xp.nan, F_ml)
+    u = xp.where(u_inf, xp.nan, u)
+    l = xp.where(l_inf, xp.nan, l)
 
-    G_u = B.where(u_inf, 0.0, u * F_u + B.log(F_mu))
-    G_l = B.where(l_inf, 0.0, l * F_l + B.log(F_ml))
-    H_u = B.where(u_inf, 1.0, F_u - u * F_u**2 + (1 - 2 * F_u) * B.log(F_mu))
-    H_l = B.where(l_inf, 0.0, F_l - l * F_l**2 + (1 - 2 * F_l) * B.log(F_ml))
+    G_u = xp.where(u_inf, 0.0, u * F_u + xp.log(F_mu))
+    G_l = xp.where(l_inf, 0.0, l * F_l + xp.log(F_ml))
+    H_u = xp.where(u_inf, 1.0, F_u - u * F_u**2 + (1 - 2 * F_u) * xp.log(F_mu))
+    H_l = xp.where(l_inf, 0.0, F_l - l * F_l**2 + (1 - 2 * F_l) * xp.log(F_ml))
 
     c = (1 - lmass - umass) / (F_u - F_l)
 
-    s1_u = B.where(u_inf & (umass == 0.0), 0.0, u * umass**2)
-    s1_l = B.where(l_inf & (lmass == 0.0), 0.0, l * lmass**2)
+    s1_u = xp.where(u_inf & (umass == 0.0), 0.0, u * umass**2)
+    s1_l = xp.where(l_inf & (lmass == 0.0), 0.0, l * lmass**2)
 
-    s1 = B.abs(ω - z) + s1_u - s1_l
+    s1 = xp.abs(ω - z) + s1_u - s1_l
     s2 = c * z * ((1 - 2 * lmass) * F_u + (1 - 2 * umass) * F_l) / (1 - lmass - umass)
-    s3 = c * (2 * B.log(F_mz) - 2 * G_u * umass - 2 * G_l * lmass)
+    s3 = c * (2 * xp.log(F_mz) - 2 * G_u * umass - 2 * G_l * lmass)
     s4 = c**2 * (H_u - H_l)
     return sigma * (s1 - s2 - s3 - s4)
 
@@ -359,37 +357,37 @@ def gtcnormal(
     upper: "ArrayLike",
     lmass: "ArrayLike",
     umass: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the generalised truncated and censored normal distribution."""
-    B = backends.active if backend is None else backends[backend]
     mu, sigma, lower, upper, lmass, umass, obs = map(
-        B.asarray, (location, scale, lower, upper, lmass, umass, obs)
+        xp.asarray, (location, scale, lower, upper, lmass, umass, obs)
     )
     ω = (obs - mu) / sigma
     u = (upper - mu) / sigma
     l = (lower - mu) / sigma
-    z = B.minimum(B.maximum(ω, l), u)
-    F_u = _norm_cdf(u, backend=backend)
-    F_l = _norm_cdf(l, backend=backend)
-    F_z = _norm_cdf(z, backend=backend)
-    F_u2 = _norm_cdf(u * B.sqrt(2), backend=backend)
-    F_l2 = _norm_cdf(l * B.sqrt(2), backend=backend)
-    f_u = _norm_pdf(u, backend=backend)
-    f_l = _norm_pdf(l, backend=backend)
-    f_z = _norm_pdf(z, backend=backend)
+    z = xp.minimum(xp.maximum(ω, l), u)
+    F_u = _norm_cdf(u, xp=xp)
+    F_l = _norm_cdf(l, xp=xp)
+    F_z = _norm_cdf(z, xp=xp)
+    F_u2 = _norm_cdf(u * xp.sqrt(2), xp=xp)
+    F_l2 = _norm_cdf(l * xp.sqrt(2), xp=xp)
+    f_u = _norm_pdf(u, xp=xp)
+    f_l = _norm_pdf(l, xp=xp)
+    f_z = _norm_pdf(z, xp=xp)
 
     u_inf = u == float("inf")
     l_inf = l == float("-inf")
 
-    u = B.where(u_inf, B.nan, u)
-    l = B.where(l_inf, B.nan, l)
-    s1_u = B.where(u_inf & (umass == 0.0), 0.0, u * umass**2)
-    s1_l = B.where(l_inf & (lmass == 0.0), 0.0, l * lmass**2)
+    u = xp.where(u_inf, xp.nan, u)
+    l = xp.where(l_inf, xp.nan, l)
+    s1_u = xp.where(u_inf & (umass == 0.0), 0.0, u * umass**2)
+    s1_l = xp.where(l_inf & (lmass == 0.0), 0.0, l * lmass**2)
 
     c = (1 - lmass - umass) / (F_u - F_l)
 
-    s1 = B.abs(ω - z) + s1_u - s1_l
+    s1 = xp.abs(ω - z) + s1_u - s1_l
     s2 = (
         c
         * z
@@ -399,7 +397,7 @@ def gtcnormal(
         )
     )
     s3 = c * (2 * f_z - 2 * f_u * umass - 2 * f_l * lmass)
-    s4 = c**2 * (F_u2 - F_l2) / B.sqrt(B.pi)
+    s4 = c**2 * (F_u2 - F_l2) / xp.sqrt(xp.pi)
     return sigma * (s1 + s2 + s3 - s4)
 
 
@@ -412,52 +410,52 @@ def gtct(
     upper: "ArrayLike",
     lmass: "ArrayLike",
     umass: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the generalised truncated and censored t distribution."""
-    B = backends.active if backend is None else backends[backend]
     df, mu, sigma, lower, upper, lmass, umass, obs = map(
-        B.asarray, (df, location, scale, lower, upper, lmass, umass, obs)
+        xp.asarray, (df, location, scale, lower, upper, lmass, umass, obs)
     )
     ω = (obs - mu) / sigma
     u = (upper - mu) / sigma
     l = (lower - mu) / sigma
-    z = B.minimum(B.maximum(ω, l), u)
-    F_u = _t_cdf(u, df, backend=backend)
-    F_l = _t_cdf(l, df, backend=backend)
-    F_z = _t_cdf(z, df, backend=backend)
-    f_u = _t_pdf(u, df, backend=backend)
-    f_l = _t_pdf(l, df, backend=backend)
-    f_z = _t_pdf(z, df, backend=backend)
+    z = xp.minimum(xp.maximum(ω, l), u)
+    F_u = _t_cdf(u, df, xp=xp)
+    F_l = _t_cdf(l, df, xp=xp)
+    F_z = _t_cdf(z, df, xp=xp)
+    f_u = _t_pdf(u, df, xp=xp)
+    f_l = _t_pdf(l, df, xp=xp)
+    f_z = _t_pdf(z, df, xp=xp)
 
     u_inf = u == float("inf")
     l_inf = l == float("-inf")
-    u = B.where(u_inf, B.nan, u)
-    l = B.where(l_inf, B.nan, l)
+    u = xp.where(u_inf, xp.nan, u)
+    l = xp.where(l_inf, xp.nan, l)
 
-    s1_u = B.where(u_inf & (umass == 0.0), 0.0, u * umass**2)
-    s1_l = B.where(l_inf & (lmass == 0.0), 0.0, l * lmass**2)
+    s1_u = xp.where(u_inf & (umass == 0.0), 0.0, u * umass**2)
+    s1_l = xp.where(l_inf & (lmass == 0.0), 0.0, l * lmass**2)
 
-    G_u = B.where(u_inf, 0.0, -f_u * (df + u**2) / (df - 1))
-    G_l = B.where(l_inf, 0.0, -f_l * (df + l**2) / (df - 1))
+    G_u = xp.where(u_inf, 0.0, -f_u * (df + u**2) / (df - 1))
+    G_l = xp.where(l_inf, 0.0, -f_l * (df + l**2) / (df - 1))
     G_z = -f_z * (df + z**2) / (df - 1)
 
-    I_u = B.where(u_inf, 1.0, B.betainc(1 / 2, df - 1 / 2, (u**2) / (df + u**2)))
-    I_l = B.where(l_inf, 1.0, B.betainc(1 / 2, df - 1 / 2, (l**2) / (df + l**2)))
-    sgn_u = B.where(u_inf, 1.0, (u / B.abs(u)))
-    sgn_l = B.where(l_inf, -1.0, (l / B.abs(l)))
+    I_u = xp.where(u_inf, 1.0, xp.betainc(1 / 2, df - 1 / 2, (u**2) / (df + u**2)))
+    I_l = xp.where(l_inf, 1.0, xp.betainc(1 / 2, df - 1 / 2, (l**2) / (df + l**2)))
+    sgn_u = xp.where(u_inf, 1.0, (u / xp.abs(u)))
+    sgn_l = xp.where(l_inf, -1.0, (l / xp.abs(l)))
     H_u = (sgn_u * I_u + 1) / 2
     H_l = (sgn_l * I_l + 1) / 2
 
     Bbar = (
-        (2 * B.sqrt(df) / (df - 1))
-        * B.beta(1 / 2, df - 1 / 2)
-        / (B.beta(1 / 2, df / 2) ** 2)
+        (2 * xp.sqrt(df) / (df - 1))
+        * xp.beta(1 / 2, df - 1 / 2)
+        / (xp.beta(1 / 2, df / 2) ** 2)
     )
 
     c = (1 - lmass - umass) / (F_u - F_l)
 
-    s1 = B.abs(ω - z) + s1_u - s1_l
+    s1 = xp.abs(ω - z) + s1_u - s1_l
     s2 = (
         c
         * z
@@ -476,7 +474,8 @@ def hypergeometric(
     m: "ArrayLike",
     n: "ArrayLike",
     k: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the hypergeometric distribution.
 
@@ -494,43 +493,44 @@ def hypergeometric(
     n: number of success states in the population.
     N: sample size (number of draws).
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, m, n, k = map(B.asarray, (obs, m, n, k))
+    obs, m, n, k = map(xp.asarray, (obs, m, n, k))
 
     # scipy uses different notation
     M = m + n
     N = k
 
     # if n is a scalar, x always has the same shape, which simplifies the computation
-    if B.size(n) == 1:
-        x = B.arange(0, n + 1)
-        out_ndims = B.max(B.asarray([_input.ndim for _input in [obs, M, m, N]]), axis=0)
-        x = B.expand_dims(x, axis=tuple(range(-out_ndims, 0)))
-        x, M, m, N = B.broadcast_arrays(x, M, m, N)
-        f_np = _hypergeo_pdf(x, M, m, N, backend=backend)
-        F_np = _hypergeo_cdf(x, M, m, N, backend=backend)
-        s = 2 * B.sum(
-            f_np * (B.asarray((obs < x), dtype=float) - F_np + f_np / 2) * (x - obs),
+    if xp.size(n) == 1:
+        x = xp.arange(0, n + 1)
+        out_ndims = xp.max(
+            xp.asarray([_input.ndim for _input in [obs, M, m, N]]), axis=0
+        )
+        x = xp.expand_dims(x, axis=tuple(range(-out_ndims, 0)))
+        x, M, m, N = xp.broadcast_arrays(x, M, m, N)
+        f_np = _hypergeo_pdf(x, M, m, N, xp=xp)
+        F_np = _hypergeo_cdf(x, M, m, N, xp=xp)
+        s = 2 * xp.sum(
+            f_np * (xp.asarray((obs < x), dtype=float) - F_np + f_np / 2) * (x - obs),
             axis=0,
         )
     # if n is an array, we need to loop over the elements
     else:
-        obs, M, m, N = B.broadcast_arrays(obs, M, m, N)
+        obs, M, m, N = xp.broadcast_arrays(obs, M, m, N)
         s = []
         for i, _n in enumerate(n.reshape(-1)):
-            x = B.arange(_n + 1)
+            x = xp.arange(_n + 1)
             f_np = _hypergeo_pdf(
-                x, M.reshape(-1)[i], m.reshape(-1)[i], N.reshape(-1)[i], backend=backend
+                x, M.reshape(-1)[i], m.reshape(-1)[i], N.reshape(-1)[i], xp=xp
             )
             F_np = _hypergeo_cdf(
-                x, M.reshape(-1)[i], m.reshape(-1)[i], N.reshape(-1)[i], backend=backend
+                x, M.reshape(-1)[i], m.reshape(-1)[i], N.reshape(-1)[i], xp=xp
             )
             s.append(
                 2
-                * B.sum(
+                * xp.sum(
                     f_np
                     * (
-                        B.asarray((obs.reshape(-1)[i] < x), dtype=float)
+                        xp.asarray((obs.reshape(-1)[i] < x), dtype=float)
                         - F_np
                         + f_np / 2
                     )
@@ -538,7 +538,7 @@ def hypergeometric(
                     axis=0,
                 )
             )
-        s = B.asarray(s).reshape(obs.shape)
+        s = xp.asarray(s).reshape(obs.shape)
     return s
 
 
@@ -546,60 +546,60 @@ def laplace(
     obs: "ArrayLike",
     location: "ArrayLike",
     scale: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the laplace distribution."""
-    B = backends.active if backend is None else backends[backend]
-    obs, mu, sigma = map(B.asarray, (obs, location, scale))
+    obs, mu, sigma = map(xp.asarray, (obs, location, scale))
     obs = (obs - mu) / sigma
-    return sigma * (B.abs(obs) + B.exp(-B.abs(obs)) - 3 / 4)
+    return sigma * (xp.abs(obs) + xp.exp(-xp.abs(obs)) - 3 / 4)
 
 
 def logistic(
     obs: "ArrayLike",
     mu: "ArrayLike",
     sigma: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the normal distribution."""
-    B = backends.active if backend is None else backends[backend]
-    mu, sigma, obs = map(B.asarray, (mu, sigma, obs))
+    mu, sigma, obs = map(xp.asarray, (mu, sigma, obs))
     ω = (obs - mu) / sigma
-    return sigma * (ω - 2 * B.log(_logis_cdf(ω, backend=backend)) - 1)
+    return sigma * (ω - 2 * xp.log(_logis_cdf(ω, xp=xp)) - 1)
 
 
 def loglaplace(
     obs: "ArrayLike",
     locationlog: "ArrayLike",
     scalelog: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the log-laplace distribution."""
-    B = backends.active if backend is None else backends[backend]
-    obs, mulog, sigmalog = map(B.asarray, (obs, locationlog, scalelog))
-    obs, mulog, sigmalog = B.broadcast_arrays(obs, mulog, sigmalog)
+    obs, mulog, sigmalog = map(xp.asarray, (obs, locationlog, scalelog))
+    obs, mulog, sigmalog = xp.broadcast_arrays(obs, mulog, sigmalog)
 
-    logx_norm = (B.log(obs) - mulog) / sigmalog
+    logx_norm = (xp.log(obs) - mulog) / sigmalog
 
     cond_0 = obs <= 0.0
-    cond_1 = obs < B.exp(mulog)
+    cond_1 = obs < xp.exp(mulog)
 
-    F_case_0 = B.asarray(cond_0, dtype=int)
-    F_case_1 = B.asarray(~cond_0 & cond_1, dtype=int)
-    F_case_2 = B.asarray(~cond_1, dtype=int)
+    F_case_0 = xp.asarray(cond_0, dtype=int)
+    F_case_1 = xp.asarray(~cond_0 & cond_1, dtype=int)
+    F_case_2 = xp.asarray(~cond_1, dtype=int)
     F = (
         F_case_0 * 0.0
-        + F_case_1 * (0.5 * B.exp(logx_norm))
-        + F_case_2 * (1 - 0.5 * B.exp(-logx_norm))
+        + F_case_1 * (0.5 * xp.exp(logx_norm))
+        + F_case_2 * (1 - 0.5 * xp.exp(-logx_norm))
     )
 
-    A_case_0 = B.asarray(cond_1, dtype=int)
-    A_case_1 = B.asarray(~cond_1, dtype=int)
+    A_case_0 = xp.asarray(cond_1, dtype=int)
+    A_case_1 = xp.asarray(~cond_1, dtype=int)
     A = A_case_0 * 1 / (1 + sigmalog) * (
         1 - (2 * F) ** (1 + sigmalog)
     ) + A_case_1 * -1 / (1 - sigmalog) * (1 - (2 * (1 - F)) ** (1 - sigmalog))
 
-    s = obs * (2 * F - 1) + B.exp(mulog) * (A + sigmalog / (4 - sigmalog**2))
+    s = obs * (2 * F - 1) + xp.exp(mulog) * (A + sigmalog / (4 - sigmalog**2))
     return s
 
 
@@ -607,15 +607,15 @@ def loglogistic(
     obs: "ArrayLike",
     mulog: "ArrayLike",
     sigmalog: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the log-logistic distribution."""
-    B = backends.active if backend is None else backends[backend]
-    mulog, sigmalog, obs = map(B.asarray, (mulog, sigmalog, obs))
-    F_ms = 1 / (1 + B.exp(-(B.log(obs) - mulog) / sigmalog))
-    b = B.beta(1 + sigmalog, 1 - sigmalog)
-    I_B = B.betainc(1 + sigmalog, 1 - sigmalog, F_ms)
-    s = obs * (2 * F_ms - 1) - B.exp(mulog) * b * (2 * I_B + sigmalog - 1)
+    mulog, sigmalog, obs = map(xp.asarray, (mulog, sigmalog, obs))
+    F_ms = 1 / (1 + xp.exp(-(xp.log(obs) - mulog) / sigmalog))
+    b = xp.beta(1 + sigmalog, 1 - sigmalog)
+    I_B = xp.betainc(1 + sigmalog, 1 - sigmalog, F_ms)
+    s = obs * (2 * F_ms - 1) - xp.exp(mulog) * b * (2 * I_B + sigmalog - 1)
     return s
 
 
@@ -623,16 +623,16 @@ def lognormal(
     obs: "ArrayLike",
     mulog: "ArrayLike",
     sigmalog: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the lognormal distribution."""
-    B = backends.active if backend is None else backends[backend]
-    mulog, sigmalog, obs = map(B.asarray, (mulog, sigmalog, obs))
-    ω = (B.log(obs) - mulog) / sigmalog
-    ex = 2 * B.exp(mulog + sigmalog**2 / 2)
-    return obs * (2.0 * _norm_cdf(ω, backend=backend) - 1) - ex * (
-        _norm_cdf(ω - sigmalog, backend=backend)
-        + _norm_cdf(sigmalog / B.sqrt(B.asarray(2.0)), backend=backend)
+    mulog, sigmalog, obs = map(xp.asarray, (mulog, sigmalog, obs))
+    ω = (xp.log(obs) - mulog) / sigmalog
+    ex = 2 * xp.exp(mulog + sigmalog**2 / 2)
+    return obs * (2.0 * _norm_cdf(ω, xp=xp) - 1) - ex * (
+        _norm_cdf(ω - sigmalog, xp=xp)
+        + _norm_cdf(sigmalog / xp.sqrt(xp.asarray(2.0)), xp=xp)
         - 1
     )
 
@@ -642,26 +642,24 @@ def mixnorm(
     m: "ArrayLike",
     s: "ArrayLike",
     w: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for a mixture of normal distributions."""
-    B = backends.active if backend is None else backends[backend]
-    m, s, w, obs = map(B.asarray, (m, s, w, obs))
+    m, s, w, obs = map(xp.asarray, (m, s, w, obs))
 
     m_y = obs[..., None] - m
     m_X = m[..., None] - m[..., None, :]
-    s_X = B.sqrt(s[..., None] ** 2 + s[..., None, :] ** 2)
+    s_X = xp.sqrt(s[..., None] ** 2 + s[..., None, :] ** 2)
     w_X = w[..., None] * w[..., None, :]
 
-    A_y = m_y * (2 * _norm_cdf(m_y / s, backend=backend) - 1) + 2 * s * _norm_pdf(
-        m_y / s, backend=backend
-    )
-    A_X = m_X * (2 * _norm_cdf(m_X / s_X, backend=backend) - 1) + 2 * s_X * _norm_pdf(
-        m_X / s_X, backend=backend
+    A_y = m_y * (2 * _norm_cdf(m_y / s, xp=xp) - 1) + 2 * s * _norm_pdf(m_y / s, xp=xp)
+    A_X = m_X * (2 * _norm_cdf(m_X / s_X, xp=xp) - 1) + 2 * s_X * _norm_pdf(
+        m_X / s_X, xp=xp
     )
 
-    sc_1 = B.sum(w * A_y, axis=-1)
-    sc_2 = B.sum(w_X * A_X, axis=(-1, -2))
+    sc_1 = xp.sum(w * A_y, axis=-1)
+    sc_2 = xp.sum(w_X * A_X, axis=(-1, -2))
 
     return sc_1 - 0.5 * sc_2
 
@@ -670,14 +668,14 @@ def negbinom(
     obs: "ArrayLike",
     n: "ArrayLike",
     prob: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the negative binomial distribution."""
-    B = backends.active if backend is None else backends[backend]
-    n, prob, obs = map(B.asarray, (n, prob, obs))
-    F_np = _negbinom_cdf(obs, n, prob, backend=backend)
-    F_n1p = _negbinom_cdf(obs - 1, n + 1, prob, backend=backend)
-    F2 = B.hypergeometric(n + 1, 1 / 2, 2, -4 * (1 - prob) / (prob**2))
+    n, prob, obs = map(xp.asarray, (n, prob, obs))
+    F_np = _negbinom_cdf(obs, n, prob, xp=xp)
+    F_n1p = _negbinom_cdf(obs - 1, n + 1, prob, xp=xp)
+    F2 = xp.hypergeometric(n + 1, 1 / 2, 2, -4 * (1 - prob) / (prob**2))
     s = obs * (2 * F_np - 1) - n * (1 - prob) * (prob * (2 * F_n1p - 1) + F2) / (
         prob**2
     )
@@ -688,35 +686,35 @@ def normal(
     obs: "ArrayLike",
     mu: "ArrayLike",
     sigma: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the logistic distribution."""
-    B = backends.active if backend is None else backends[backend]
-    mu, sigma, obs = map(B.asarray, (mu, sigma, obs))
+    mu, sigma, obs = map(xp.asarray, (mu, sigma, obs))
     ω = (obs - mu) / sigma
     return sigma * (
-        ω * (2.0 * _norm_cdf(ω, backend=backend) - 1.0)
-        + 2.0 * _norm_pdf(ω, backend=backend)
-        - 1.0 / B.sqrt(B.pi)
+        ω * (2.0 * _norm_cdf(ω, xp=xp) - 1.0)
+        + 2.0 * _norm_pdf(ω, xp=xp)
+        - 1.0 / xp.sqrt(xp.pi)
     )
 
 
 def poisson(
     obs: "ArrayLike",
     mean: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the poisson distribution."""
-    B = backends.active if backend is None else backends[backend]
-    mean, obs = map(B.asarray, (mean, obs))
-    F_m = _pois_cdf(obs, mean, backend=backend)
-    f_m = _pois_pdf(B.floor(obs), mean, backend=backend)
-    I0 = B.mbessel0(2 * mean)
-    I1 = B.mbessel1(2 * mean)
+    mean, obs = map(xp.asarray, (mean, obs))
+    F_m = _pois_cdf(obs, mean, xp=xp)
+    f_m = _pois_pdf(xp.floor(obs), mean, xp=xp)
+    I0 = xp.mbessel0(2 * mean)
+    I1 = xp.mbessel1(2 * mean)
     s = (
         (obs - mean) * (2 * F_m - 1)
         + 2 * mean * f_m
-        - mean * B.exp(-2 * mean) * (I0 + I1)
+        - mean * xp.exp(-2 * mean) * (I0 + I1)
     )
     return s
 
@@ -726,21 +724,21 @@ def t(
     df: "ArrayLike",
     location: "ArrayLike",
     scale: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the t distribution."""
-    B = backends.active if backend is None else backends[backend]
-    df, mu, sigma, obs = map(B.asarray, (df, location, scale, obs))
+    df, mu, sigma, obs = map(xp.asarray, (df, location, scale, obs))
     z = (obs - mu) / sigma
-    F_z = _t_cdf(z, df, backend=backend)
-    f_z = _t_pdf(z, df, backend=backend)
+    F_z = _t_cdf(z, df, xp=xp)
+    f_z = _t_pdf(z, df, xp=xp)
     G_z = (df + z**2) / (df - 1)
     s1 = z * (2 * F_z - 1)
     s2 = 2 * f_z * G_z
     s3 = (
-        (2 * B.sqrt(df) / (df - 1))
-        * B.beta(1 / 2, df - 1 / 2)
-        / (B.beta(1 / 2, df / 2) ** 2)
+        (2 * xp.sqrt(df) / (df - 1))
+        * xp.beta(1 / 2, df - 1 / 2)
+        / (xp.beta(1 / 2, df / 2) ** 2)
     )
     return sigma * (s1 + s2 - s3)
 
@@ -751,15 +749,15 @@ def uniform(
     max: "ArrayLike",
     lmass: "ArrayLike",
     umass: "ArrayLike",
-    backend: "Backend" = None,
+    *,
+    xp,
 ) -> "Array":
     """Compute the CRPS for the uniform distribution."""
-    B = backends.active if backend is None else backends[backend]
-    min, max, lmass, umass, obs = map(B.asarray, (min, max, lmass, umass, obs))
+    min, max, lmass, umass, obs = map(xp.asarray, (min, max, lmass, umass, obs))
     ω = (obs - min) / (max - min)
-    F_ω = B.minimum(B.maximum(ω, B.asarray(0)), B.asarray(1))
+    F_ω = xp.minimum(xp.maximum(ω, xp.asarray(0)), xp.asarray(1))
     s = (
-        B.abs(ω - F_ω)
+        xp.abs(ω - F_ω)
         + (F_ω**2) * (1 - lmass - umass)
         - F_ω * (1 - 2 * lmass)
         + ((1 - lmass - umass) ** 2) / 3
diff --git a/scoringrules/core/stats_xp.py b/scoringrules/core/stats_xp.py
new file mode 100644
index 0000000..b96607a
--- /dev/null
+++ b/scoringrules/core/stats_xp.py
@@ -0,0 +1,199 @@
+"""Transitional copy of ``core/stats.py`` for the array-API migration.
+
+These are ``xp``-parameterised copies of the statistics helpers needed by the
+CRPS closed-form scores (``core/crps/_closed.py``). They are a faithful
+translation of the corresponding helpers in ``core/stats.py``: instead of
+resolving a backend object ``B`` from the registry, they take a keyword-only
+``xp`` augmented array-API namespace (see ``scoringrules.backend.get_namespace``).
+
+``core/stats.py`` is intentionally left untouched because it is still shared by
+the not-yet-migrated logarithmic-score family.
+"""
+
+import typing as tp
+
+if tp.TYPE_CHECKING:
+    from scoringrules.core.typing import Array, ArrayLike
+
+
+def _norm_pdf(x: "ArrayLike", *, xp) -> "Array":
+    """Probability density function for the standard normal distribution."""
+    return (1.0 / xp.sqrt(2.0 * xp.pi)) * xp.exp(-(x**2) / 2)
+
+
+def _norm_cdf(x: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the standard normal distribution."""
+    return (1.0 + xp.erf(x / xp.sqrt(2.0))) / 2.0
+
+
+def _laplace_pdf(x: "ArrayLike", *, xp) -> "Array":
+    """Probability density function for the standard laplace distribution."""
+    return xp.exp(-xp.abs(x)) / 2.0
+
+
+def _logis_pdf(x: "ArrayLike", *, xp) -> "Array":
+    """Probability density function for the standard logistic distribution."""
+    return xp.exp(-x) / (1 + xp.exp(-x)) ** 2
+
+
+def _logis_cdf(x: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the standard logistic distribution."""
+    return 1 / (1 + xp.exp(-x))
+
+
+def _exp_pdf(x: "ArrayLike", rate: "ArrayLike", *, xp) -> "Array":
+    """Probability density function for the exponential distribution."""
+    return xp.where(x < 0.0, 0.0, rate * xp.exp(-rate * x))
+
+
+def _exp_cdf(x: "ArrayLike", rate: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the exponential distribution."""
+    return xp.maximum(1 - xp.exp(-rate * x), xp.asarray(0.0))
+
+
+def _gamma_pdf(
+    x: "ArrayLike",
+    shape: "ArrayLike",
+    rate: "ArrayLike",
+    *,
+    xp,
+) -> "Array":
+    """Probability density function for the gamma distribution."""
+    prob = (rate**shape) * (x ** (shape - 1)) * (xp.exp(-rate * x)) / xp.gamma(shape)
+    return xp.where(x <= 0.0, 0.0, prob)
+
+
+def _gamma_cdf(
+    x: "ArrayLike",
+    shape: "ArrayLike",
+    rate: "ArrayLike",
+    *,
+    xp,
+) -> "Array":
+    """Cumulative distribution function for the gamma distribution."""
+    zero = xp.asarray(0.0)
+    return xp.maximum(xp.gammainc(shape, rate * xp.maximum(x, zero)), zero)
+
+
+def _pois_pdf(x: "ArrayLike", mean: "ArrayLike", *, xp) -> "Array":
+    """Probability mass function for the Poisson distribution."""
+    x_plus = xp.abs(x)
+    d = xp.where(
+        xp.floor(x_plus) < x_plus,
+        0.0,
+        mean ** (x_plus) * xp.exp(-mean) / xp.factorial(x_plus),
+    )
+    return xp.where(mean < 0.0, xp.nan, xp.where(x < 0.0, 0.0, d))
+
+
+def _pois_cdf(x: "ArrayLike", mean: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the Poisson distribution."""
+    x_plus = xp.abs(x)
+    p = xp.gammauinc(xp.floor(x_plus + 1), mean) / xp.gamma(xp.floor(x_plus + 1))
+    return xp.where(x < 0.0, 0.0, p)
+
+
+def _t_pdf(x: "ArrayLike", df: "ArrayLike", *, xp) -> "Array":
+    """Probability density function for the standard Student's t distribution."""
+    x_inf = xp.abs(x) == float("inf")
+    x = xp.where(x_inf, xp.nan, x)
+    s = ((1 + x**2 / df) ** (-(df + 1) / 2)) / (xp.sqrt(df) * xp.beta(1 / 2, df / 2))
+    return xp.where(x_inf, 0.0, s)
+
+
+def _t_cdf(x: "ArrayLike", df: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the standard Student's t distribution."""
+    t = df / (x**2 + df)
+    ibeta = xp.betainc(df / 2.0, 0.5, t)
+    s = xp.where(x >= 0, 1 - 0.5 * ibeta, 0.5 * ibeta)
+    return s
+
+
+def _gev_cdf(s: "ArrayLike", xi: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the standard GEV distribution."""
+    zero_shape = xi == 0
+    xi = xp.where(zero_shape, xp.nan, xi)
+    general_case = ~zero_shape & (xi * s > -1)
+    cdf = xp.nan * s
+    cdf = xp.where(zero_shape, xp.exp(-xp.exp(-s)), cdf)  # Gumbel CDF
+    cdf = xp.where(
+        general_case,
+        xp.exp(-((1 + xi * xp.where(general_case, s, xp.nan)) ** (-1 / xi))),
+        cdf,
+    )  # General CDF
+    cdf = xp.where((xi > 0) & (s <= -1 / xi), 0, cdf)  # Lower bound CDF
+    cdf = xp.where((xi < 0) & (s >= 1 / xp.abs(xi)), 1, cdf)  # Upper bound CDF
+    return cdf
+
+
+def _gpd_cdf(x: "ArrayLike", shape: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the standard GPD distribution."""
+    # masks to handle the different cases
+    shape_0 = shape == 0
+    shape = xp.where(shape_0, xp.nan, shape)
+    shape_p = shape > 0
+    shape_n = ~(shape_0 | shape_p)
+    x_pos = x >= 0
+    x_gt_invxi = x > -1 / shape
+    x = xp.where(x_pos, x, xp.nan)
+    shape = xp.where(shape_n & x_gt_invxi, xp.nan, shape)
+
+    cdf = 0.0
+    cdf = xp.where(shape_0 & x_pos, 1 - xp.exp(-x), cdf)
+    cdf = xp.where(
+        (shape_n & x_pos) | (shape_p & x_pos),
+        1 - (1 + shape * x) ** (-1 / shape),
+        cdf,
+    )
+    cdf = xp.where(shape_n & x_gt_invxi, 1.0, cdf)
+    return cdf
+
+
+def _binom_pdf(k: "ArrayLike", n: "ArrayLike", prob: "ArrayLike", *, xp) -> "Array":
+    """Probability mass function for the binomial distribution."""
+    return xp.comb(n, k) * prob**k * (1 - prob) ** (n - k)
+
+
+def _binom_cdf(k: "ArrayLike", n: "ArrayLike", prob: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the binomial distribution."""
+    return xp.where(
+        k < 0, 0.0, xp.betainc(n - xp.minimum(k, n) + 1e-36, k + 1, 1 - prob)
+    )
+
+
+def _hypergeo_pdf(k, M, n, N, *, xp):
+    """
+    Calculate the PMF of the hypergeometric distribution.
+
+    We follow scipy.stats.hypergeom.pmf.
+
+    k: number of observed successes.
+    M: total population size.
+    n: number of success states in the population.
+    N: sample size (number of draws).
+    """
+    ind = (k >= xp.maximum(xp.asarray(0), N - M + n)) & (k <= xp.minimum(n, N))
+    return ind * xp.comb(n, k) * xp.comb(M - n, N - k) / xp.comb(M, N)
+
+
+def _hypergeo_cdf(k, M, n, N, *, xp):
+    """Cumulative distribution function for the hypergeometric distribution."""
+
+    def _inner(m, M, n, N):
+        return xp.sum(_hypergeo_pdf(m, M, n, N, xp=xp), axis=0)
+
+    # if k.size == 1:
+    #     m = xp.arange(k + 1)
+    #     M, n, N = xp.broadcast_arrays(M, n, N)
+    #     return _inner(m[:, None], M[None], n[None], N[None])
+    # else:
+    k, M, n, N = xp.broadcast_arrays(k, M, n, N)
+    _iter = zip(k.ravel(), M.ravel(), n.ravel(), N.ravel(), strict=True)
+    return xp.asarray(
+        [_inner(xp.arange(0, _args[0] + 1), *_args[1:]) for _args in _iter]
+    ).reshape(k.shape)
+
+
+def _negbinom_cdf(x: "ArrayLike", n: "ArrayLike", prob: "ArrayLike", *, xp) -> "Array":
+    """Cumulative distribution function for the negative binomial distribution."""
+    return xp.where(x < 0.0, 0.0, xp.betainc(n, xp.floor(x + 1), prob))
diff --git a/tests/test_crps_core_xp.py b/tests/test_crps_core_xp.py
index a982af8..0a519be 100644
--- a/tests/test_crps_core_xp.py
+++ b/tests/test_crps_core_xp.py
@@ -33,3 +33,53 @@ def test_core_ensemble_w_runs(backend, to_backend):
         crps.ensemble_w(obs_np, fct_np, w_np, "nrg", xp=get_namespace(obs_np, fct_np))
     )
     assert out == pytest.approx(ref, abs=1e-4)
+
+
+def _closed_ref(fn, *args):
+    return np.asarray(
+        fn(
+            *[np.asarray(a) for a in args],
+            xp=get_namespace(*[np.asarray(a) for a in args]),
+        )
+    )
+
+
+@pytest.mark.parametrize(
+    "dist,args",
+    [
+        ("normal", (0.3, 0.0, 1.0)),
+        ("logistic", (0.3, 0.0, 1.0)),
+        ("exponential", (0.8, 3.0)),
+        ("gamma", (0.5, 2.0, 1.5)),
+        ("poisson", (2.0, 3.0)),
+    ],
+)
+def test_core_closed_matches_numpy(dist, args, backend, to_backend):
+    from scoringrules.core import crps
+
+    if backend == "torch" and dist == "normal":
+        # _norm_cdf/_norm_pdf call xp.sqrt(2.0) / xp.sqrt(xp.pi) with python-float
+        # scalars; the torch array-API namespace does not coerce them to tensors
+        # (the legacy torch backend did). This is a namespace-layer gap, out of
+        # scope for the closed-form migration; see DONE_WITH_CONCERNS report.
+        pytest.skip("torch namespace: xp.sqrt rejects python-float scalar")
+
+    fn = getattr(crps, dist)
+    bargs = [to_backend(np.asarray(float(a))) for a in args]
+    out = np.asarray(fn(*bargs, xp=get_namespace(*bargs)))
+    ref = _closed_ref(fn, *args)
+    assert out == pytest.approx(ref, abs=1e-4)
+
+
+def test_core_beta_works_numpy_jax_blocks_torch(backend, to_backend):
+    from scoringrules.core import crps
+
+    args = (0.3, 0.7, 1.1)
+    bargs = [to_backend(np.asarray(float(a))) for a in args]
+    if backend == "torch":
+        with pytest.raises(NotImplementedError):
+            crps.beta(*bargs, xp=get_namespace(*bargs))
+    else:
+        out = np.asarray(crps.beta(*bargs, xp=get_namespace(*bargs)))
+        ref = _closed_ref(crps.beta, *args)
+        assert out == pytest.approx(ref, abs=1e-4)

From db9d5f5061e458a0e0e25c53b7a43ca9c2f8629e Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 02:18:37 +0200
Subject: [PATCH 17/26] Coerce python scalars in elementwise namespace ops so
 torch sqrt/log work

---
 scoringrules/backend/namespace.py | 54 ++++++++++++++++++++++++++++++-
 tests/test_crps_core_xp.py        |  7 ----
 tests/test_namespace.py           | 16 +++++++++
 3 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/scoringrules/backend/namespace.py b/scoringrules/backend/namespace.py
index 4e2f88a..0593c5c 100644
--- a/scoringrules/backend/namespace.py
+++ b/scoringrules/backend/namespace.py
@@ -8,6 +8,48 @@
 import numpy as np
 from array_api_compat import array_namespace, is_array_api_obj
 
+# Elementwise math functions that the core may call on Python-scalar constants
+# (e.g. ``xp.sqrt(2.0)``, ``xp.log(2)``). The array-API standard requires array
+# arguments and torch's namespace rejects bare Python floats, whereas the old
+# backend methods coerced them. We restore that leniency by wrapping these so a
+# Python-scalar first argument is converted to an array (a no-op for arrays).
+_SCALAR_COERCE_FUNCS = frozenset(
+    {
+        "sqrt",
+        "exp",
+        "expm1",
+        "log",
+        "log1p",
+        "log2",
+        "log10",
+        "abs",
+        "floor",
+        "ceil",
+        "round",
+        "trunc",
+        "sign",
+        "square",
+        "reciprocal",
+        "negative",
+        "positive",
+        "sin",
+        "cos",
+        "tan",
+        "asin",
+        "acos",
+        "atan",
+        "sinh",
+        "cosh",
+        "tanh",
+        "asinh",
+        "acosh",
+        "atanh",
+        "isnan",
+        "isinf",
+        "isfinite",
+    }
+)
+
 
 class ArrayAPINamespace:
     """A superset of an array-API namespace (bound to ``xp`` at call sites)."""
@@ -22,7 +64,17 @@ def __getattr__(self, name):
         # re-enter __getattr__("_xp") forever.
         if name == "_xp":
             raise AttributeError("ArrayAPINamespace._xp is not set")
-        return getattr(self._xp, name)
+        attr = getattr(self._xp, name)
+        if name in _SCALAR_COERCE_FUNCS and callable(attr):
+            xp = self._xp
+
+            def _scalar_tolerant(x, *args, **kwargs):
+                if isinstance(x, (bool, int, float, complex)):
+                    x = xp.asarray(x)
+                return attr(x, *args, **kwargs)
+
+            return _scalar_tolerant
+        return attr
 
     # --- linear algebra (thin delegations so call sites stay mechanical) ---
     def norm(self, x, axis=None):
diff --git a/tests/test_crps_core_xp.py b/tests/test_crps_core_xp.py
index 0a519be..7d0f537 100644
--- a/tests/test_crps_core_xp.py
+++ b/tests/test_crps_core_xp.py
@@ -57,13 +57,6 @@ def _closed_ref(fn, *args):
 def test_core_closed_matches_numpy(dist, args, backend, to_backend):
     from scoringrules.core import crps
 
-    if backend == "torch" and dist == "normal":
-        # _norm_cdf/_norm_pdf call xp.sqrt(2.0) / xp.sqrt(xp.pi) with python-float
-        # scalars; the torch array-API namespace does not coerce them to tensors
-        # (the legacy torch backend did). This is a namespace-layer gap, out of
-        # scope for the closed-form migration; see DONE_WITH_CONCERNS report.
-        pytest.skip("torch namespace: xp.sqrt rejects python-float scalar")
-
     fn = getattr(crps, dist)
     bargs = [to_backend(np.asarray(float(a))) for a in args]
     out = np.asarray(fn(*bargs, xp=get_namespace(*bargs)))
diff --git a/tests/test_namespace.py b/tests/test_namespace.py
index 0b1a381..08b519c 100644
--- a/tests/test_namespace.py
+++ b/tests/test_namespace.py
@@ -37,6 +37,22 @@ def test_gather_delegates_to_take_along_axis():
     assert out.ravel().tolist() == pytest.approx([30.0, 10.0, 20.0])
 
 
+def test_elementwise_math_coerces_python_scalars():
+    # array-API torch rejects xp.sqrt(2.0); the wrapper must coerce scalars so
+    # core code that does xp.sqrt(2.0) / xp.log(2) works on every backend.
+    x = np.asarray([1.0])  # numpy namespace; behaviour must be uniform
+    xp = get_namespace(x)
+    assert np.asarray(xp.sqrt(2.0)) == pytest.approx(np.sqrt(2.0))
+    assert np.asarray(xp.log(2.0)) == pytest.approx(np.log(2.0))
+
+
+def test_elementwise_math_coerces_scalars_torch():
+    torch = pytest.importorskip("torch")
+    xp = get_namespace(torch.empty(1))
+    assert float(xp.sqrt(2.0)) == pytest.approx(float(np.sqrt(2.0)))
+    assert float(xp.sqrt(xp.pi)) == pytest.approx(float(np.sqrt(np.pi)))
+
+
 def test_missing_xp_raises_attributeerror_not_recursion():
     from scoringrules.backend.namespace import ArrayAPINamespace
 

From 76e14a030288070b459018d30eb93d0767827b2b Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 02:27:57 +0200
Subject: [PATCH 18/26] Migrate CRPS ensemble-family public functions to
 array-api dispatch

---
 scoringrules/_crps.py         | 189 +++++++++++++++++++---------------
 scoringrules/core/utils_xp.py |  33 ++++++
 2 files changed, 138 insertions(+), 84 deletions(-)

diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py
index 9d7a220..7d9f183 100644
--- a/scoringrules/_crps.py
+++ b/scoringrules/_crps.py
@@ -1,8 +1,8 @@
 import typing as tp
 
-from scoringrules.backend import backends
+from scoringrules.backend import backends, get_namespace
 from scoringrules.core import crps, stats
-from scoringrules.core.utils import (
+from scoringrules.core.utils_xp import (
     univariate_array_check,
     univariate_weight_check,
     uv_weighted_score_weights,
@@ -10,6 +10,7 @@
     univariate_sort_ens,
     apply_nan_policy_ens_uv,
 )
+from scoringrules._dispatch import use_numba, resolve_backend_arg
 
 if tp.TYPE_CHECKING:
     from scoringrules.core.typing import Array, ArrayLike, Backend, NanPolicy
@@ -122,37 +123,46 @@ def crps_ensemble(
     array([0.69605316, 0.32865417, 0.39048665])
     """
 
-    # check required input values
-    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    resolve_backend_arg(backend)
+    if use_numba(backend, obs, fct):
+        # numba path: inputs are numpy-compatible; reuse the existing numpy-based
+        # helpers (gufuncs consume numpy arrays — no behaviour change).
+        from scoringrules.core.utils import (
+            apply_nan_policy_ens_uv as _nan,
+            univariate_array_check as _uac,
+            univariate_sort_ens as _sort,
+            univariate_weight_check as _wchk,
+        )
 
-    # apply nan policy on the raw forecasts/weights, before normalisation and
-    # sorting, so NaN positions in a user-supplied ens_w are still detectable.
-    # ens_w is returned aligned with the ensemble axis last.
-    obs, fct, ens_w = apply_nan_policy_ens_uv(
-        obs, fct, nan_policy, ens_w, estimator=estimator, m_axis=m_axis, backend=backend
-    )
+        obs, fct = _uac(obs, fct, m_axis, backend="numba")
+        obs, fct, ens_w = _nan(
+            obs,
+            fct,
+            nan_policy,
+            ens_w,
+            estimator=estimator,
+            m_axis=m_axis,
+            backend="numba",
+        )
+        fct, ens_w = _sort(fct, ens_w, -1, estimator, sorted_ensemble, backend="numba")
+        if ens_w is not None:
+            ens_w = _wchk(ens_w, fct, -1, backend="numba")
+        if ens_w is None:
+            return crps.estimator_gufuncs(estimator)(obs, fct)
+        return crps.estimator_gufuncs_w(estimator)(obs, fct, ens_w)
 
-    # sort ensemble (for "qd", "pwm", "int" estimators); the ensemble axis is
-    # already last on both fct and ens_w, so use -1 here
-    fct, ens_w = univariate_sort_ens(
-        fct, ens_w, -1, estimator, sorted_ensemble, backend=backend
+    xp = get_namespace(obs, fct)
+    obs, fct = univariate_array_check(obs, fct, m_axis, xp=xp)
+    obs, fct, ens_w = apply_nan_policy_ens_uv(
+        obs, fct, nan_policy, ens_w, estimator=estimator, m_axis=m_axis, xp=xp
     )
-
-    # check and normalize ensemble weights (optional)
+    fct, ens_w = univariate_sort_ens(fct, ens_w, -1, estimator, sorted_ensemble, xp=xp)
     if ens_w is not None:
-        ens_w = univariate_weight_check(ens_w, fct, -1, backend=backend)
-
-    # dispatch implementation
-    if backend == "numba":
-        if ens_w is None:
-            return crps.estimator_gufuncs(estimator)(obs, fct)
-        else:
-            return crps.estimator_gufuncs_w(estimator)(obs, fct, ens_w)
+        ens_w = univariate_weight_check(ens_w, fct, -1, xp=xp)
 
     if ens_w is None:
-        return crps.ensemble(obs, fct, estimator, backend=backend)
-    else:
-        return crps.ensemble_w(obs, fct, ens_w, estimator, backend=backend)
+        return crps.ensemble(obs, fct, estimator, xp=xp)
+    return crps.ensemble_w(obs, fct, ens_w, estimator, xp=xp)
 
 
 def twcrps_ensemble(
@@ -251,9 +261,9 @@ def twcrps_ensemble(
     >>> sr.twcrps_ensemble(obs, fct, v_func=v_func)
     array([0.69605316, 0.32865417, 0.39048665])
     """
-    obs, fct = uv_weighted_score_chain(
-        obs, fct, a=a, b=b, v_func=v_func, backend=backend
-    )
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, fct)
+    obs, fct = uv_weighted_score_chain(obs, fct, a=a, b=b, v_func=v_func, xp=xp)
     return crps_ensemble(
         obs,
         fct,
@@ -357,35 +367,40 @@ def owcrps_ensemble(
     array([0.91103733, 0.45212402, 0.35686667])
     """
 
-    # check input values
-    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    resolve_backend_arg(backend)
+    if use_numba(backend, obs, fct):
+        from scoringrules.core.utils import (
+            apply_nan_policy_ens_uv as _nan,
+            univariate_array_check as _uac,
+            univariate_weight_check as _wchk,
+            uv_weighted_score_weights as _wts,
+        )
 
-    # compute outcome weights
-    obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend)
+        obs, fct = _uac(obs, fct, m_axis, backend="numba")
+        obs_w, fct_w = _wts(obs, fct, a, b, w_func, backend="numba")
+        obs, fct, ens_w = _nan(obs, fct, nan_policy, ens_w, backend="numba")
+        obs_w, fct_w, ens_w = _nan(
+            obs_w, fct_w, nan_policy, ens_w=ens_w, backend="numba"
+        )
+        if ens_w is not None:
+            ens_w = _wchk(ens_w, fct, m_axis, backend="numba")
+        if ens_w is None:
+            return crps.estimator_gufuncs("ownrg")(obs, fct, obs_w, fct_w)
+        return crps.estimator_gufuncs_w("ownrg")(obs, fct, obs_w, fct_w, ens_w)
 
-    # apply nan policy
-    obs, fct, ens_w = apply_nan_policy_ens_uv(
-        obs, fct, nan_policy, ens_w, backend=backend
-    )
+    xp = get_namespace(obs, fct)
+    obs, fct = univariate_array_check(obs, fct, m_axis, xp=xp)
+    obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, xp=xp)
+    obs, fct, ens_w = apply_nan_policy_ens_uv(obs, fct, nan_policy, ens_w, xp=xp)
     obs_w, fct_w, ens_w = apply_nan_policy_ens_uv(
-        obs_w, fct_w, nan_policy, ens_w=ens_w, backend=backend
+        obs_w, fct_w, nan_policy, ens_w=ens_w, xp=xp
     )
-
-    # check and normalize ensemble weights (optional)
     if ens_w is not None:
-        ens_w = univariate_weight_check(ens_w, fct, m_axis, backend=backend)
-
-    # dispatch to implementation
-    if backend == "numba":
-        if ens_w is None:
-            return crps.estimator_gufuncs("ownrg")(obs, fct, obs_w, fct_w)
-        else:
-            return crps.estimator_gufuncs_w("ownrg")(obs, fct, obs_w, fct_w, ens_w)
+        ens_w = univariate_weight_check(ens_w, fct, m_axis, xp=xp)
 
     if ens_w is None:
-        return crps.ow_ensemble(obs, fct, obs_w, fct_w, backend=backend)
-    else:
-        return crps.ow_ensemble_w(obs, fct, obs_w, fct_w, ens_w, backend=backend)
+        return crps.ow_ensemble(obs, fct, obs_w, fct_w, xp=xp)
+    return crps.ow_ensemble_w(obs, fct, obs_w, fct_w, ens_w, xp=xp)
 
 
 def vrcrps_ensemble(
@@ -477,35 +492,40 @@ def vrcrps_ensemble(
     array([0.90036433, 0.41515255, 0.41653833])
     """
 
-    # check input values
-    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
+    resolve_backend_arg(backend)
+    if use_numba(backend, obs, fct):
+        from scoringrules.core.utils import (
+            apply_nan_policy_ens_uv as _nan,
+            univariate_array_check as _uac,
+            univariate_weight_check as _wchk,
+            uv_weighted_score_weights as _wts,
+        )
 
-    # compute outcome weights
-    obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, backend=backend)
+        obs, fct = _uac(obs, fct, m_axis, backend="numba")
+        obs_w, fct_w = _wts(obs, fct, a, b, w_func, backend="numba")
+        obs, fct, ens_w = _nan(obs, fct, nan_policy, ens_w, backend="numba")
+        obs_w, fct_w, ens_w = _nan(
+            obs_w, fct_w, nan_policy, ens_w=ens_w, backend="numba"
+        )
+        if ens_w is not None:
+            ens_w = _wchk(ens_w, fct, m_axis, backend="numba")
+        if ens_w is None:
+            return crps.estimator_gufuncs("vrnrg")(obs, fct, obs_w, fct_w)
+        return crps.estimator_gufuncs_w("vrnrg")(obs, fct, obs_w, fct_w, ens_w)
 
-    # apply nan policy
-    obs, fct, ens_w = apply_nan_policy_ens_uv(
-        obs, fct, nan_policy, ens_w, backend=backend
-    )
+    xp = get_namespace(obs, fct)
+    obs, fct = univariate_array_check(obs, fct, m_axis, xp=xp)
+    obs_w, fct_w = uv_weighted_score_weights(obs, fct, a, b, w_func, xp=xp)
+    obs, fct, ens_w = apply_nan_policy_ens_uv(obs, fct, nan_policy, ens_w, xp=xp)
     obs_w, fct_w, ens_w = apply_nan_policy_ens_uv(
-        obs_w, fct_w, nan_policy, ens_w=ens_w, backend=backend
+        obs_w, fct_w, nan_policy, ens_w=ens_w, xp=xp
     )
-
-    # check and normalize ensemble weights (optional)
     if ens_w is not None:
-        ens_w = univariate_weight_check(ens_w, fct, m_axis, backend=backend)
-
-    # dispatch to implementation
-    if backend == "numba":
-        if ens_w is None:
-            return crps.estimator_gufuncs("vrnrg")(obs, fct, obs_w, fct_w)
-        else:
-            return crps.estimator_gufuncs_w("vrnrg")(obs, fct, obs_w, fct_w, ens_w)
+        ens_w = univariate_weight_check(ens_w, fct, m_axis, xp=xp)
 
     if ens_w is None:
-        return crps.vr_ensemble(obs, fct, obs_w, fct_w, backend=backend)
-    else:
-        return crps.vr_ensemble_w(obs, fct, obs_w, fct_w, ens_w, backend=backend)
+        return crps.vr_ensemble(obs, fct, obs_w, fct_w, xp=xp)
+    return crps.vr_ensemble_w(obs, fct, obs_w, fct_w, ens_w, xp=xp)
 
 
 def crps_quantile(
@@ -561,20 +581,21 @@ def crps_quantile(
 
     # TODO: add example
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, fct = univariate_array_check(obs, fct, m_axis, backend=backend)
-
-    alpha = B.asarray(alpha)
-    if B.any(alpha <= 0) or B.any(alpha >= 1):
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, fct)
+    obs, fct = univariate_array_check(obs, fct, m_axis, xp=xp)
+    alpha = xp.asarray(alpha)
+    if xp.any(alpha <= 0) or xp.any(alpha >= 1):
         raise ValueError("`alpha` contains entries that are not between 0 and 1.")
-
     if not fct.shape[-1] == alpha.shape[-1]:
         raise ValueError("Expected matching length of `fct` and `alpha` values.")
+    if use_numba(backend, obs, fct):
+        import numpy as np
 
-    if B.name == "numba":
-        return crps.quantile_pinball_gufunc(obs, fct, alpha)
-
-    return crps.quantile_pinball(obs, fct, alpha, backend=backend)
+        return crps.quantile_pinball_gufunc(
+            np.asarray(obs), np.asarray(fct), np.asarray(alpha)
+        )
+    return crps.quantile_pinball(obs, fct, alpha, xp=xp)
 
 
 def crps_beta(
diff --git a/scoringrules/core/utils_xp.py b/scoringrules/core/utils_xp.py
index 8f85437..f8e7e04 100644
--- a/scoringrules/core/utils_xp.py
+++ b/scoringrules/core/utils_xp.py
@@ -50,6 +50,39 @@ def univariate_sort_ens(
     return fct, ens_w
 
 
+def uv_weighted_score_weights(
+    obs, fct, a=float("-inf"), b=float("inf"), w_func=None, *, xp
+):
+    if w_func is None:
+
+        def w_func(x):
+            return ((a <= x) & (x <= b)) * 1.0
+
+    obs_w, fct_w = map(w_func, (obs, fct))
+    obs_w, fct_w = xp.asarray(obs_w), xp.asarray(fct_w)
+    if xp.any(obs_w < 0) or xp.any(fct_w < 0):
+        raise ValueError("`w_func` returns negative values")
+    return obs_w, fct_w
+
+
+def uv_weighted_score_chain(
+    obs, fct, a=float("-inf"), b=float("inf"), v_func=None, *, xp
+):
+    if v_func is None:
+        a, b, obs, fct = (
+            xp.asarray(a),
+            xp.asarray(b),
+            xp.asarray(obs),
+            xp.asarray(fct),
+        )
+
+        def v_func(x):
+            return xp.minimum(xp.maximum(x, a), b)
+
+    obs, fct = map(v_func, (obs, fct))
+    return obs, fct
+
+
 def apply_nan_policy_ens_uv(
     obs, fct, nan_policy="propagate", ens_w=None, estimator=None, m_axis=-1, *, xp
 ):

From def42326ecb62b5d46a9ed05aaa997ce84376b11 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 02:38:33 +0200
Subject: [PATCH 19/26] Migrate CRPS closed-form public wrappers to array-api
 dispatch

---
 scoringrules/_crps.py | 154 ++++++++++++++++++++++++++++--------------
 1 file changed, 104 insertions(+), 50 deletions(-)

diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py
index 7d9f183..e78ea54 100644
--- a/scoringrules/_crps.py
+++ b/scoringrules/_crps.py
@@ -1,6 +1,6 @@
 import typing as tp
 
-from scoringrules.backend import backends, get_namespace
+from scoringrules.backend import get_namespace
 from scoringrules.core import crps, stats
 from scoringrules.core.utils_xp import (
     univariate_array_check,
@@ -658,7 +658,9 @@ def crps_beta(
     >>> sr.crps_beta(0.3, 0.7, 1.1)
     0.08501024366637236
     """
-    return crps.beta(obs, a, b, lower, upper, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, a, b, lower, upper)
+    return crps.beta(obs, a, b, lower, upper, xp=xp)
 
 
 def crps_binomial(
@@ -708,7 +710,9 @@ def crps_binomial(
     >>> sr.crps_binomial(4, 10, 0.5)
     0.5955772399902344
     """
-    return crps.binomial(obs, n, prob, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, n, prob)
+    return crps.binomial(obs, n, prob, xp=xp)
 
 
 def crps_exponential(
@@ -756,7 +760,9 @@ def crps_exponential(
     >>> sr.crps_exponential(np.array([0.8, 0.9]), np.array([3.0, 2.0]))
     array([0.36047864, 0.31529889])
     """
-    return crps.exponential(obs, rate, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, rate)
+    return crps.exponential(obs, rate, xp=xp)
 
 
 def crps_exponentialM(
@@ -818,7 +824,9 @@ def crps_exponentialM(
     >>> sr.crps_exponentialM(0.4, 0.2, 0.0, 1.0)
     0.19251207365702294
     """
-    return crps.exponentialM(obs, mass, location, scale, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, mass, location, scale)
+    return crps.exponentialM(obs, mass, location, scale, xp=xp)
 
 
 def crps_2pexponential(
@@ -873,7 +881,9 @@ def crps_2pexponential(
     >>> sr.crps_2pexponential(0.8, 3.0, 1.4, 0.0)
     array(1.18038524)
     """
-    return crps.twopexponential(obs, scale1, scale2, location, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, scale1, scale2, location)
+    return crps.twopexponential(obs, scale1, scale2, location, xp=xp)
 
 
 def crps_gamma(
@@ -943,7 +953,9 @@ def crps_gamma(
     if rate is None:
         rate = 1.0 / scale
 
-    return crps.gamma(obs, shape, rate, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, shape, rate, scale)
+    return crps.gamma(obs, shape, rate, xp=xp)
 
 
 def crps_csg0(
@@ -1019,7 +1031,9 @@ def crps_csg0(
     if rate is None:
         rate = 1.0 / scale
 
-    return crps.csg0(obs, shape, rate, shift, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, shape, rate, scale, shift)
+    return crps.csg0(obs, shape, rate, shift, xp=xp)
 
 
 def crps_gev(
@@ -1113,7 +1127,9 @@ def crps_gev(
     >>> sr.crps_gev(0.3, 0.1)
     0.2924712413052034
     """
-    return crps.gev(obs, shape, location, scale, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, shape, location, scale)
+    return crps.gev(obs, shape, location, scale, xp=xp)
 
 
 def crps_gpd(
@@ -1175,7 +1191,9 @@ def crps_gpd(
     >>> sr.crps_gpd(0.3, 0.9)
     0.6849331901197213
     """
-    return crps.gpd(obs, shape, location, scale, mass, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, shape, location, scale, mass)
+    return crps.gpd(obs, shape, location, scale, mass, xp=xp)
 
 
 def crps_gtclogistic(
@@ -1246,6 +1264,8 @@ def crps_gtclogistic(
     >>> sr.crps_gtclogistic(0.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1)
     0.1658713056903939
     """
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, location, scale, lower, upper, lmass, umass)
     return crps.gtclogistic(
         obs,
         location,
@@ -1254,7 +1274,7 @@ def crps_gtclogistic(
         upper,
         lmass,
         umass,
-        backend=backend,
+        xp=xp,
     )
 
 
@@ -1296,9 +1316,9 @@ def crps_tlogistic(
     >>> sr.crps_tlogistic(0.0, 0.1, 0.4, -1.0, 1.0)
     0.12714830546327846
     """
-    return crps.gtclogistic(
-        obs, location, scale, lower, upper, 0.0, 0.0, backend=backend
-    )
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, location, scale, lower, upper)
+    return crps.gtclogistic(obs, location, scale, lower, upper, 0.0, 0.0, xp=xp)
 
 
 def crps_clogistic(
@@ -1341,6 +1361,8 @@ def crps_clogistic(
     """
     lmass = stats._logis_cdf((lower - location) / scale)
     umass = 1 - stats._logis_cdf((upper - location) / scale)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, location, scale, lower, upper)
     return crps.gtclogistic(
         obs,
         location,
@@ -1349,7 +1371,7 @@ def crps_clogistic(
         upper,
         lmass,
         umass,
-        backend=backend,
+        xp=xp,
     )
 
 
@@ -1396,6 +1418,8 @@ def crps_gtcnormal(
     >>> sr.crps_gtcnormal(0.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1)
     0.1351100832878575
     """
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, location, scale, lower, upper, lmass, umass)
     return crps.gtcnormal(
         obs,
         location,
@@ -1404,7 +1428,7 @@ def crps_gtcnormal(
         upper,
         lmass,
         umass,
-        backend=backend,
+        xp=xp,
     )
 
 
@@ -1446,7 +1470,9 @@ def crps_tnormal(
     >>> sr.crps_tnormal(0.0, 0.1, 0.4, -1.0, 1.0)
     0.10070146718008832
     """
-    return crps.gtcnormal(obs, location, scale, lower, upper, 0.0, 0.0, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, location, scale, lower, upper)
+    return crps.gtcnormal(obs, location, scale, lower, upper, 0.0, 0.0, xp=xp)
 
 
 def crps_cnormal(
@@ -1489,6 +1515,8 @@ def crps_cnormal(
     """
     lmass = stats._norm_cdf((lower - location) / scale)
     umass = 1 - stats._norm_cdf((upper - location) / scale)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, location, scale, lower, upper)
     return crps.gtcnormal(
         obs,
         location,
@@ -1497,7 +1525,7 @@ def crps_cnormal(
         upper,
         lmass,
         umass,
-        backend=backend,
+        xp=xp,
     )
 
 
@@ -1575,6 +1603,8 @@ def crps_gtct(
     >>> sr.crps_gtct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1)
     0.13997789333289662
     """
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, df, location, scale, lower, upper, lmass, umass)
     return crps.gtct(
         obs,
         df,
@@ -1584,7 +1614,7 @@ def crps_gtct(
         upper,
         lmass,
         umass,
-        backend=backend,
+        xp=xp,
     )
 
 
@@ -1629,6 +1659,8 @@ def crps_tt(
     >>> sr.crps_tt(0.0, 2.0, 0.1, 0.4, -1.0, 1.0)
     0.10323007471747117
     """
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, df, location, scale, lower, upper)
     return crps.gtct(
         obs,
         df,
@@ -1638,7 +1670,7 @@ def crps_tt(
         upper,
         0.0,
         0.0,
-        backend=backend,
+        xp=xp,
     )
 
 
@@ -1685,6 +1717,8 @@ def crps_ct(
     """
     lmass = stats._t_cdf((lower - location) / scale, df)
     umass = 1 - stats._t_cdf((upper - location) / scale, df)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, df, location, scale, lower, upper)
     return crps.gtct(
         obs,
         df,
@@ -1694,7 +1728,7 @@ def crps_ct(
         upper,
         lmass,
         umass,
-        backend=backend,
+        xp=xp,
     )
 
 
@@ -1749,7 +1783,9 @@ def crps_hypergeometric(
     >>> sr.crps_hypergeometric(5, 7, 13, 12)
     0.44697415547610597
     """
-    return crps.hypergeometric(obs, m, n, k, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, m, n, k)
+    return crps.hypergeometric(obs, m, n, k, xp=xp)
 
 
 def crps_laplace(
@@ -1799,7 +1835,9 @@ def crps_laplace(
     >>> sr.crps_laplace(0.3, 0.1, 0.2)
     0.12357588823428847
     """
-    return crps.laplace(obs, location, scale, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, location, scale)
+    return crps.laplace(obs, location, scale, xp=xp)
 
 
 def crps_logistic(
@@ -1846,7 +1884,9 @@ def crps_logistic(
     >>> sr.crps_logistic(0.0, 0.4, 0.1)
     0.3036299855835619
     """
-    return crps.logistic(obs, mu, sigma, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, mu, sigma)
+    return crps.logistic(obs, mu, sigma, xp=xp)
 
 
 def crps_loglaplace(
@@ -1906,7 +1946,9 @@ def crps_loglaplace(
     >>> sr.crps_loglaplace(3.0, 0.1, 0.9)
     1.162020513653791
     """
-    return crps.loglaplace(obs, locationlog, scalelog, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, locationlog, scalelog)
+    return crps.loglaplace(obs, locationlog, scalelog, xp=xp)
 
 
 def crps_loglogistic(
@@ -1966,7 +2008,9 @@ def crps_loglogistic(
     >>> sr.crps_loglogistic(3.0, 0.1, 0.9)
     1.1329527730161177
     """
-    return crps.loglogistic(obs, mulog, sigmalog, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, mulog, sigmalog)
+    return crps.loglogistic(obs, mulog, sigmalog, xp=xp)
 
 
 def crps_lognormal(
@@ -2016,7 +2060,9 @@ def crps_lognormal(
     >>> sr.crps_lognormal(0.1, 0.4, 0.0)
     1.3918246976412703
     """
-    return crps.lognormal(obs, mulog, sigmalog, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, mulog, sigmalog)
+    return crps.lognormal(obs, mulog, sigmalog, xp=xp)
 
 
 def crps_mixnorm(
@@ -2071,21 +2117,22 @@ def crps_mixnorm(
     >>> sr.crps_mixnorm(0.0, [0.1, -0.3, 1.0], [0.4, 2.1, 0.7], [0.1, 0.2, 0.7])
     0.46806866729387275
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, m, s = map(B.asarray, (obs, m, s))
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, m, s, w)
+    obs, m, s = map(xp.asarray, (obs, m, s))
 
     if w is None:
         M: int = m.shape[m_axis]
-        w = B.zeros(m.shape) + 1 / M
+        w = xp.zeros(m.shape) + 1 / M
     else:
-        w = B.asarray(w)
+        w = xp.asarray(w)
 
     if m_axis != -1:
-        m = B.moveaxis(m, m_axis, -1)
-        s = B.moveaxis(s, m_axis, -1)
-        w = B.moveaxis(w, m_axis, -1)
+        m = xp.moveaxis(m, m_axis, -1)
+        s = xp.moveaxis(s, m_axis, -1)
+        w = xp.moveaxis(w, m_axis, -1)
 
-    return crps.mixnorm(obs, m, s, w, backend=backend)
+    return crps.mixnorm(obs, m, s, w, xp=xp)
 
 
 def crps_negbinom(
@@ -2148,7 +2195,9 @@ def crps_negbinom(
     if prob is None:
         prob = n / (n + mu)
 
-    return crps.negbinom(obs, n, prob, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, n, prob, mu)
+    return crps.negbinom(obs, n, prob, xp=xp)
 
 
 def crps_normal(
@@ -2194,7 +2243,9 @@ def crps_normal(
     >>> sr.crps_normal(0.0, 0.1, 0.4)
     0.10339992515976162
     """
-    return crps.normal(obs, mu, sigma, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, mu, sigma)
+    return crps.normal(obs, mu, sigma, xp=xp)
 
 
 def crps_2pnormal(
@@ -2247,24 +2298,21 @@ def crps_2pnormal(
     >>> sr.crps_2pnormal(0.0, 0.4, 2.0, 0.1)
     0.7243199144002115
     """
-    B = backends.active if backend is None else backends[backend]
-    obs, scale1, scale2, location = map(B.asarray, (obs, scale1, scale2, location))
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, scale1, scale2, location)
+    obs, scale1, scale2, location = map(xp.asarray, (obs, scale1, scale2, location))
     lower = float("-inf")
     upper = 0.0
     lmass = 0.0
     umass = scale2 / (scale1 + scale2)
-    z = B.minimum(B.asarray(0.0), B.asarray(obs - location)) / scale1
-    s1 = scale1 * crps.gtcnormal(
-        z, 0.0, 1.0, lower, upper, lmass, umass, backend=backend
-    )
+    z = xp.minimum(xp.asarray(0.0), xp.asarray(obs - location)) / scale1
+    s1 = scale1 * crps.gtcnormal(z, 0.0, 1.0, lower, upper, lmass, umass, xp=xp)
     lower = 0.0
     upper = float("inf")
     lmass = scale1 / (scale1 + scale2)
     umass = 0.0
-    z = B.maximum(B.asarray(0.0), B.asarray(obs - location)) / scale2
-    s2 = scale2 * crps.gtcnormal(
-        z, 0.0, 1.0, lower, upper, lmass, umass, backend=backend
-    )
+    z = xp.maximum(xp.asarray(0.0), xp.asarray(obs - location)) / scale2
+    s2 = scale2 * crps.gtcnormal(z, 0.0, 1.0, lower, upper, lmass, umass, xp=xp)
     return s1 + s2
 
 
@@ -2310,7 +2358,9 @@ def crps_poisson(
     >>> sr.crps_poisson(1, 2)
     0.4991650450203817
     """
-    return crps.poisson(obs, mean, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, mean)
+    return crps.poisson(obs, mean, xp=xp)
 
 
 def crps_t(
@@ -2365,7 +2415,9 @@ def crps_t(
     >>> sr.crps_t(0.0, 0.1, 0.4, 0.1)
     0.07687151141732129
     """
-    return crps.t(obs, df, location, scale, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, df, location, scale)
+    return crps.t(obs, df, location, scale, xp=xp)
 
 
 def crps_uniform(
@@ -2422,7 +2474,9 @@ def crps_uniform(
     >>> sr.crps_uniform(0.4, 0.0, 1.0, 0.0, 0.0)
     0.09333333333333332
     """
-    return crps.uniform(obs, min, max, lmass, umass, backend=backend)
+    resolve_backend_arg(backend)
+    xp = get_namespace(obs, min, max, lmass, umass)
+    return crps.uniform(obs, min, max, lmass, umass, xp=xp)
 
 
 __all__ = [

From 2bfb2834206120425fad02dd9b85e61939d0aa97 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 02:38:34 +0200
Subject: [PATCH 20/26] Fix comb to return 0 for k outside [0, n], matching
 scipy.special.comb

---
 scoringrules/backend/extensions.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/scoringrules/backend/extensions.py b/scoringrules/backend/extensions.py
index 589ea8c..e8b75c7 100644
--- a/scoringrules/backend/extensions.py
+++ b/scoringrules/backend/extensions.py
@@ -161,8 +161,16 @@ def comb(xp, n, k):
     # to 9.0 instead of 10.0. Rounding the exact-in-float ratio is robust on
     # numpy/jax/torch. comb is integer-valued, so this is not differentiable
     # (matching the old floor-division form).
+    n = xp.asarray(n)
+    k = xp.asarray(k)
     ratio = factorial(xp, n) / (factorial(xp, k) * factorial(xp, n - k))
-    return xp.round(ratio)
+    result = xp.round(ratio)
+    # ``comb`` is 0 outside ``0 <= k <= n`` (matches scipy.special.comb, the
+    # behaviour the old backends relied on). Without this guard, factorial of a
+    # negative argument yields inf/nan and propagates through masked terms (e.g.
+    # the hypergeometric PMF, where ``0 * inf`` becomes nan).
+    valid = (k >= 0) & (k <= n)
+    return xp.where(valid, result, xp.asarray(0.0))
 
 
 # --- non-standard helpers ---

From 1617a91b276293381326547ae8434931435a7d54 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 02:47:13 +0200
Subject: [PATCH 21/26] Compute censored-distribution tail masses via xp so
 clogistic/cnormal/ct honour the input framework

---
 scoringrules/_crps.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py
index e78ea54..eee9b59 100644
--- a/scoringrules/_crps.py
+++ b/scoringrules/_crps.py
@@ -1,7 +1,7 @@
 import typing as tp
 
 from scoringrules.backend import get_namespace
-from scoringrules.core import crps, stats
+from scoringrules.core import crps, stats_xp
 from scoringrules.core.utils_xp import (
     univariate_array_check,
     univariate_weight_check,
@@ -1359,10 +1359,10 @@ def crps_clogistic(
     >>> sr.crps_clogistic(0.0, 0.1, 0.4, -1.0, 1.0)
     0.15805632276434345
     """
-    lmass = stats._logis_cdf((lower - location) / scale)
-    umass = 1 - stats._logis_cdf((upper - location) / scale)
     resolve_backend_arg(backend)
     xp = get_namespace(obs, location, scale, lower, upper)
+    lmass = stats_xp._logis_cdf((lower - location) / scale, xp=xp)
+    umass = 1 - stats_xp._logis_cdf((upper - location) / scale, xp=xp)
     return crps.gtclogistic(
         obs,
         location,
@@ -1513,10 +1513,10 @@ def crps_cnormal(
     >>> sr.crps_cnormal(0.0, 0.1, 0.4, -1.0, 1.0)
     0.10338851213123085
     """
-    lmass = stats._norm_cdf((lower - location) / scale)
-    umass = 1 - stats._norm_cdf((upper - location) / scale)
     resolve_backend_arg(backend)
     xp = get_namespace(obs, location, scale, lower, upper)
+    lmass = stats_xp._norm_cdf((lower - location) / scale, xp=xp)
+    umass = 1 - stats_xp._norm_cdf((upper - location) / scale, xp=xp)
     return crps.gtcnormal(
         obs,
         location,
@@ -1715,10 +1715,10 @@ def crps_ct(
     >>> sr.crps_ct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0)
     0.12672580744453948
     """
-    lmass = stats._t_cdf((lower - location) / scale, df)
-    umass = 1 - stats._t_cdf((upper - location) / scale, df)
     resolve_backend_arg(backend)
     xp = get_namespace(obs, df, location, scale, lower, upper)
+    lmass = stats_xp._t_cdf((lower - location) / scale, df, xp=xp)
+    umass = 1 - stats_xp._t_cdf((upper - location) / scale, df, xp=xp)
     return crps.gtct(
         obs,
         df,

From e574a9dead14946a97e9ad408daefae7dca6d933 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 03:08:45 +0200
Subject: [PATCH 22/26] Exercise CRPS tests with native arrays and assert
 backend inference

---
 tests/test_crps.py | 1418 ++++++++++++++++++++++++++++----------------
 1 file changed, 908 insertions(+), 510 deletions(-)

diff --git a/tests/test_crps.py b/tests/test_crps.py
index 8a46dfa..0742580 100644
--- a/tests/test_crps.py
+++ b/tests/test_crps.py
@@ -2,6 +2,7 @@
 import pytest
 import scipy.stats as st
 import scoringrules as sr
+from .conftest import assert_inferred
 
 ENSEMBLE_SIZE = 11
 N = 5
@@ -9,74 +10,106 @@
 ESTIMATORS = ["nrg", "fair", "pwm", "int", "qd", "akr", "akr_circperm"]
 
 
+def assert_close(result, expected, backend, *, rtol=1e-5, atol=1e-8):
+    """Compare a (possibly framework-native) result to an expected numpy/scalar value.
+
+    jax runs in float32 in the test environment (``jax_enable_x64`` is off), so its
+    tolerances are loosened. torch runs in float64 and stays strict.
+    """
+    arr = np.asarray(result)
+    if backend == "jax":
+        rtol = max(rtol, 1e-4)
+        atol = max(atol, 1e-5)
+    assert np.allclose(
+        arr, np.asarray(expected), rtol=rtol, atol=atol
+    ), f"[{backend}] {arr} != {expected}"
+
+
 @pytest.mark.parametrize("estimator", ESTIMATORS)
-def test_crps_ensemble(estimator, backend):
+def test_crps_ensemble(estimator, backend, to_backend, backend_kwargs):
     """Test general behavior of scoringrules.crps_ensemble."""
 
-    kwargs = {"estimator": estimator, "backend": backend}
+    kwargs = {"estimator": estimator, **backend_kwargs}
 
     # test data
-    obs = np.random.randn(N)
-    fct = np.random.randn(N, ENSEMBLE_SIZE)
+    obs0 = np.random.randn(N)
+    fct0 = np.random.randn(N, ENSEMBLE_SIZE)
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
 
-    # test exceptions: indefined estimator
+    # test exceptions: undefined estimator
     with pytest.raises(ValueError):
-        est = "undefined_estimator"
-        sr.crps_ensemble(obs, fct, estimator=est, backend=backend)
+        sr.crps_ensemble(obs, fct, estimator="undefined_estimator", **backend_kwargs)
 
     # test shapes: default case
     res = sr.crps_ensemble(obs, fct, **kwargs)
+    assert_inferred(res, backend)
     res = np.asarray(res)
     res_mean = np.mean(res)
     assert res.shape == (N,)
     # test shapes: with extra dimension
-    res = sr.crps_ensemble(obs[None], fct[None, :], **kwargs)
+    res = sr.crps_ensemble(to_backend(obs0[None]), to_backend(fct0[None, :]), **kwargs)
+    assert_inferred(res, backend)
     res = np.asarray(res)
     assert res.shape == (1, N)
-    assert np.mean(res) == res_mean
+    if backend == "jax":
+        assert np.allclose(np.mean(res), res_mean, rtol=1e-5)
+    else:
+        assert np.mean(res) == res_mean
     # test shapes: with non-default ensemble axis
-    res = sr.crps_ensemble(obs[..., None], fct[..., None], m_axis=-2, **kwargs)
+    res = sr.crps_ensemble(
+        to_backend(obs0[..., None]), to_backend(fct0[..., None]), m_axis=-2, **kwargs
+    )
+    assert_inferred(res, backend)
     res = np.asarray(res)
     assert res.shape == (N, 1)
-    assert np.mean(res) == res_mean
+    if backend == "jax":
+        assert np.allclose(np.mean(res), res_mean, rtol=1e-5)
+    else:
+        assert np.mean(res) == res_mean
 
     # non-negative values
     if estimator not in ["akr", "akr_circperm"]:
         res = sr.crps_ensemble(obs, fct, **kwargs)
+        assert_inferred(res, backend)
         res = np.asarray(res)
         assert not np.any(res < 0.0)
 
     # approx zero when perfect forecast
-    perfect_fct = obs[..., None] + np.random.randn(N, ENSEMBLE_SIZE) * 0.00001
-    res = sr.crps_ensemble(obs, perfect_fct, **kwargs)
+    perfect_fct = obs0[..., None] + np.random.randn(N, ENSEMBLE_SIZE) * 0.00001
+    res = sr.crps_ensemble(obs, to_backend(perfect_fct), **kwargs)
+    assert_inferred(res, backend)
     res = np.asarray(res)
     assert not np.any(res - 0.0 > 0.0001)
 
 
 @pytest.mark.parametrize("estimator", ESTIMATORS)
-def test_crps_ensemble_nan_policy(estimator, backend):
+def test_crps_ensemble_nan_policy(estimator, backend, to_backend, backend_kwargs):
     """Test behavior of scoringrules.crps_ensemble with NaN values."""
 
-    kwargs = {"estimator": estimator, "backend": backend}
+    kwargs = {"estimator": estimator, **backend_kwargs}
 
     # test data
-    obs = np.random.randn(N)
-    fct = np.random.randn(N, ENSEMBLE_SIZE)
+    obs0 = np.random.randn(N)
+    fct0 = np.random.randn(N, ENSEMBLE_SIZE)
 
-    fct_nan = fct.copy()
-    fct_nan[0, [0, 3, 6]] = np.nan
-    fct_nan[2, [5]] = np.nan
-    nan_positions = np.isnan(fct_nan).any(axis=1)
+    fct_nan0 = fct0.copy()
+    fct_nan0[0, [0, 3, 6]] = np.nan
+    fct_nan0[2, [5]] = np.nan
+    nan_positions = np.isnan(fct_nan0).any(axis=1)
+
+    obs = to_backend(obs0)
+    fct_nan = to_backend(fct_nan0)
 
     # default nan policy (propagate)
     res = sr.crps_ensemble(obs, fct_nan, **kwargs)
-    res = np.asarray(res)
-    assert np.all(np.isnan(res[nan_positions]))
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'propagate' nan policy
     res = sr.crps_ensemble(obs, fct_nan, nan_policy="propagate", **kwargs)
-    res = np.asarray(res)
-    assert np.all(np.isnan(res[nan_positions]))
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'raise' nan policy
     with pytest.raises(ValueError):
@@ -90,105 +123,129 @@ def test_crps_ensemble_nan_policy(estimator, backend):
 
     # 'omit' nan policy: no nans in results
     res = sr.crps_ensemble(obs, fct_nan, nan_policy="omit", **kwargs)
-    res = np.asarray(res)
-    assert not np.any(np.isnan(res))
+    assert_inferred(res, backend)
+    assert not np.any(np.isnan(np.asarray(res)))
 
     # 'omit' nan policy: equivalence with clean ensemble
-    for i in range(fct.shape[0]):
-        fct_clean = fct_nan[i, ~np.isnan(fct_nan[i])]
-        res_clean = sr.crps_ensemble(obs[i : i + 1], fct_clean[None, :], **kwargs)
+    rtol = 1e-4 if backend == "jax" else 1e-5
+    for i in range(fct0.shape[0]):
+        fct_clean = fct_nan0[i, ~np.isnan(fct_nan0[i])]
+        res_clean = sr.crps_ensemble(
+            to_backend(obs0[i : i + 1]), to_backend(fct_clean[None, :]), **kwargs
+        )
         res = sr.crps_ensemble(
-            obs[i : i + 1], fct_nan[i : i + 1], nan_policy="omit", **kwargs
+            to_backend(obs0[i : i + 1]),
+            to_backend(fct_nan0[i : i + 1]),
+            nan_policy="omit",
+            **kwargs,
         )
-        assert np.allclose(res, res_clean)
+        assert np.allclose(np.asarray(res), np.asarray(res_clean), rtol=rtol)
 
 
 @pytest.mark.parametrize("estimator", ESTIMATORS)
-def test_crps_ensemble_w_ens(estimator, backend):
+def test_crps_ensemble_w_ens(estimator, backend, to_backend, backend_kwargs):
     """Test behavior of scoringrules.crps_ensemble with ensemble weights."""
-    kwargs = {"estimator": estimator, "backend": backend}
+    kwargs = {"estimator": estimator, **backend_kwargs}
 
     # test data
-    obs = np.random.randn(N)
-    mu = obs + np.random.randn(N) * 0.3
+    obs0 = np.random.randn(N)
+    mu = obs0 + np.random.randn(N) * 0.3
     sigma = abs(np.random.randn(N)) * 0.5
-    fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None]
-    uniform_ens_w = np.ones(fct.shape)
-    non_uniform_ens_w = np.random.rand(*fct.shape)
+    fct0 = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None]
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
+    uniform_ens_w = to_backend(np.ones(fct0.shape))
+    non_uniform_ens_w = to_backend(np.random.rand(*fct0.shape))
 
     # default
     res = sr.crps_ensemble(obs, fct, **kwargs)
+    assert_inferred(res, backend)
 
     # equivalence for uniform weights
     res_uniform_weights = sr.crps_ensemble(obs, fct, ens_w=uniform_ens_w, **kwargs)
-    assert np.allclose(res_uniform_weights, res, atol=1e-6)
+    assert_inferred(res_uniform_weights, backend)
+    atol = 1e-4 if backend == "jax" else 1e-6
+    assert np.allclose(np.asarray(res_uniform_weights), np.asarray(res), atol=atol)
 
     # non-equivalence for non-uniform weights
     res_non_uniform_weights = sr.crps_ensemble(
         obs, fct, ens_w=non_uniform_ens_w, **kwargs
     )
-    assert not np.allclose(res_non_uniform_weights, res, atol=1e-6)
+    assert_inferred(res_non_uniform_weights, backend)
+    assert not np.allclose(
+        np.asarray(res_non_uniform_weights), np.asarray(res), atol=1e-6
+    )
 
     # estimator equivalence with weights
-    w = np.abs(np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None])
-    res_nrg = sr.crps_ensemble(obs, fct, ens_w=w, estimator="nrg", backend=backend)
-    res_qd = sr.crps_ensemble(obs, fct, ens_w=w, estimator="qd", backend=backend)
+    w = to_backend(np.abs(np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None]))
+    res_nrg = sr.crps_ensemble(obs, fct, ens_w=w, estimator="nrg", **backend_kwargs)
+    res_qd = sr.crps_ensemble(obs, fct, ens_w=w, estimator="qd", **backend_kwargs)
+    assert_inferred(res_nrg, backend)
+    assert_inferred(res_qd, backend)
     if backend in ["torch", "jax"]:
-        assert np.allclose(res_nrg, res_qd, rtol=1e-03)
+        assert np.allclose(np.asarray(res_nrg), np.asarray(res_qd), rtol=1e-03)
     else:
-        assert np.allclose(res_nrg, res_qd)
-
-    # correctness against a known value (qd estimator with integer weights)
-    obs_known = -0.6042506
-    fct_known = np.array(
-        [
-            1.7812118,
-            0.5863797,
-            0.7038174,
-            -0.7743998,
-            -0.2751647,
-            1.1863249,
-            1.2990966,
-            -0.3242982,
-            -0.5968781,
-            0.9064937,
-        ]
-    )
-    res_known = sr.crps_ensemble(
-        obs_known, fct_known, ens_w=np.arange(10), estimator="qd"
-    )
-    assert np.isclose(res_known, 0.4923673)
+        assert np.allclose(np.asarray(res_nrg), np.asarray(res_qd))
+
+    # correctness against a known value (qd estimator with integer weights);
+    # exercised once on the pure-python/numpy path
+    if backend == "numpy":
+        obs_known = -0.6042506
+        fct_known = np.array(
+            [
+                1.7812118,
+                0.5863797,
+                0.7038174,
+                -0.7743998,
+                -0.2751647,
+                1.1863249,
+                1.2990966,
+                -0.3242982,
+                -0.5968781,
+                0.9064937,
+            ]
+        )
+        res_known = sr.crps_ensemble(
+            obs_known, fct_known, ens_w=np.arange(10), estimator="qd"
+        )
+        assert np.isclose(res_known, 0.4923673)
 
 
 @pytest.mark.parametrize("estimator", ESTIMATORS)
-def test_crps_ensemble_w_ens_nan_policy(estimator, backend):
+def test_crps_ensemble_w_ens_nan_policy(estimator, backend, to_backend, backend_kwargs):
     """Test behavior of scoringrules.crps_ensemble with ensemble weights and NaN values."""
 
-    kwargs = {"estimator": estimator, "backend": backend}
+    kwargs = {"estimator": estimator, **backend_kwargs}
 
     # test data
-    obs = np.random.randn(N)
-    mu = obs + np.random.randn(N) * 0.3
+    obs0 = np.random.randn(N)
+    mu = obs0 + np.random.randn(N) * 0.3
     sigma = abs(np.random.randn(N)) * 0.5
-    fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None]
-    uniform_ens_w = np.ones(fct.shape)
-    non_uniform_ens_w = np.random.rand(*fct.shape)
-    fct_nan = fct.copy()
-    fct_nan[0, [0, 3, 6]] = np.nan
-    fct_nan[2, [5]] = np.nan
-    nan_positions = np.isnan(fct_nan).any(axis=1)
+    fct0 = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None]
+    uniform_ens_w0 = np.ones(fct0.shape)
+    non_uniform_ens_w0 = np.random.rand(*fct0.shape)
+    fct_nan0 = fct0.copy()
+    fct_nan0[0, [0, 3, 6]] = np.nan
+    fct_nan0[2, [5]] = np.nan
+    nan_positions = np.isnan(fct_nan0).any(axis=1)
+
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
+    fct_nan = to_backend(fct_nan0)
+    uniform_ens_w = to_backend(uniform_ens_w0)
+    non_uniform_ens_w = to_backend(non_uniform_ens_w0)
 
     # default nan policy (propagate): ensembles with NaN members return NaN
     res = sr.crps_ensemble(obs, fct_nan, ens_w=uniform_ens_w, **kwargs)
-    res = np.asarray(res)
-    assert np.all(np.isnan(res[nan_positions]))
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'propagate' nan policy: ensembles with NaN members return NaN
     res = sr.crps_ensemble(
         obs, fct_nan, ens_w=uniform_ens_w, nan_policy="propagate", **kwargs
     )
-    res = np.asarray(res)
-    assert np.all(np.isnan(res[nan_positions]))
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'raise' nan policy: error if NaN is encountered
     with pytest.raises(ValueError):
@@ -208,8 +265,8 @@ def test_crps_ensemble_w_ens_nan_policy(estimator, backend):
     res = sr.crps_ensemble(
         obs, fct_nan, ens_w=uniform_ens_w, nan_policy="omit", **kwargs
     )
-    res = np.asarray(res)
-    assert not np.any(np.isnan(res))
+    assert_inferred(res, backend)
+    assert not np.any(np.isnan(np.asarray(res)))
 
     # 'omit' nan policy: non-equivalence if non-uniform weights are used
     res = sr.crps_ensemble(
@@ -218,45 +275,60 @@ def test_crps_ensemble_w_ens_nan_policy(estimator, backend):
     res_nans = sr.crps_ensemble(
         obs, fct_nan, ens_w=non_uniform_ens_w, nan_policy="omit", **kwargs
     )
+    assert_inferred(res, backend)
+    assert_inferred(res_nans, backend)
     res, res_nans = np.asarray(res), np.asarray(res_nans)
     assert not np.any(np.isnan(res_nans))
     assert not np.allclose(res[nan_positions], res_nans[nan_positions])
     assert np.allclose(res[~nan_positions], res_nans[~nan_positions])
 
     # 'omit' nan policy: equivalence with clean ensemble
-    for i in range(fct.shape[0]):
-        fct_clean = fct_nan[i, ~np.isnan(fct_nan[i])]
-        uniform_ens_w_clean = uniform_ens_w[i, ~np.isnan(fct_nan[i])]
+    rtol = 1e-4 if backend == "jax" else 1e-5
+    for i in range(fct0.shape[0]):
+        valid = ~np.isnan(fct_nan0[i])
+        fct_clean = fct_nan0[i, valid]
+        uniform_ens_w_clean = uniform_ens_w0[i, valid]
         res_clean = sr.crps_ensemble(
-            obs[i], fct_clean, ens_w=uniform_ens_w_clean, nan_policy="omit", **kwargs
+            to_backend(obs0[i]),
+            to_backend(fct_clean),
+            ens_w=to_backend(uniform_ens_w_clean),
+            nan_policy="omit",
+            **kwargs,
         )
         res = sr.crps_ensemble(
-            obs[i], fct_nan[i], ens_w=uniform_ens_w[i], nan_policy="omit", **kwargs
+            to_backend(obs0[i]),
+            to_backend(fct_nan0[i]),
+            ens_w=to_backend(uniform_ens_w0[i]),
+            nan_policy="omit",
+            **kwargs,
         )
-        assert np.isclose(res, res_clean)
+        assert np.allclose(np.asarray(res), np.asarray(res_clean), rtol=rtol)
 
 
 @pytest.mark.parametrize("estimator", ESTIMATORS)
-def test_crps_ensemble_nan_weights(estimator, backend):
+def test_crps_ensemble_nan_weights(estimator, backend, to_backend, backend_kwargs):
     """Test crps_ensemble when the ensemble weights (ens_w) contain NaN.
 
     A member is invalid if its forecast value OR its weight is NaN:
     'propagate' -> NaN, 'raise' -> error, 'omit' -> zero weight (dropped).
     """
-    kwargs = {"estimator": estimator, "backend": backend}
+    kwargs = {"estimator": estimator, **backend_kwargs}
 
-    obs = np.random.randn(N)
-    fct = np.random.randn(N, ENSEMBLE_SIZE)
-    ens_w = np.random.rand(N, ENSEMBLE_SIZE)
-    ens_w[0, [0, 3, 6]] = np.nan
-    ens_w[2, [5]] = np.nan
-    nan_positions = np.isnan(ens_w).any(axis=1)
+    obs0 = np.random.randn(N)
+    fct0 = np.random.randn(N, ENSEMBLE_SIZE)
+    ens_w0 = np.random.rand(N, ENSEMBLE_SIZE)
+    ens_w0[0, [0, 3, 6]] = np.nan
+    ens_w0[2, [5]] = np.nan
+    nan_positions = np.isnan(ens_w0).any(axis=1)
+
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
+    ens_w = to_backend(ens_w0)
 
     # 'propagate': a NaN weight yields NaN output
-    res = np.asarray(
-        sr.crps_ensemble(obs, fct, ens_w=ens_w, nan_policy="propagate", **kwargs)
-    )
-    assert np.all(np.isnan(res[nan_positions]))
+    res = sr.crps_ensemble(obs, fct, ens_w=ens_w, nan_policy="propagate", **kwargs)
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'raise': error if a weight is NaN
     with pytest.raises(ValueError):
@@ -269,42 +341,52 @@ def test_crps_ensemble_nan_weights(estimator, backend):
         return
 
     # 'omit': NaN-weighted members get zero weight (equivalent to dropping them)
-    res = np.asarray(
-        sr.crps_ensemble(obs, fct, ens_w=ens_w, nan_policy="omit", **kwargs)
-    )
+    res = sr.crps_ensemble(obs, fct, ens_w=ens_w, nan_policy="omit", **kwargs)
+    assert_inferred(res, backend)
+    res = np.asarray(res)
     assert not np.any(np.isnan(res))
-    for i in range(fct.shape[0]):
-        valid = ~np.isnan(ens_w[i])
+    rtol = 1e-4 if backend == "jax" else 1e-5
+    for i in range(fct0.shape[0]):
+        valid = ~np.isnan(ens_w0[i])
         res_clean = sr.crps_ensemble(
-            obs[i], fct[i, valid], ens_w=ens_w[i, valid], nan_policy="omit", **kwargs
-        )
-        assert np.isclose(res[i], res_clean)
-
-    # the same result is obtained with the ensemble on a non-default axis
-    res_axis = np.asarray(
-        sr.crps_ensemble(
-            obs[..., None],
-            fct[..., None],
-            ens_w=ens_w[..., None],
-            m_axis=-2,
+            to_backend(obs0[i]),
+            to_backend(fct0[i, valid]),
+            ens_w=to_backend(ens_w0[i, valid]),
             nan_policy="omit",
             **kwargs,
         )
+        assert np.allclose(res[i], np.asarray(res_clean), rtol=rtol)
+
+    # the same result is obtained with the ensemble on a non-default axis
+    res_axis = sr.crps_ensemble(
+        to_backend(obs0[..., None]),
+        to_backend(fct0[..., None]),
+        ens_w=to_backend(ens_w0[..., None]),
+        m_axis=-2,
+        nan_policy="omit",
+        **kwargs,
     )
-    assert np.allclose(res_axis.ravel(), res)
+    assert_inferred(res_axis, backend)
+    assert np.allclose(np.asarray(res_axis).ravel(), res, rtol=rtol)
 
 
-def test_crps_ensemble_correctness(backend):
-    obs = np.random.randn(N)
-    mu = obs + np.random.randn(N) * 0.3
+def test_crps_ensemble_correctness(backend, to_backend, backend_kwargs):
+    obs0 = np.random.randn(N)
+    mu = obs0 + np.random.randn(N) * 0.3
     sigma = abs(np.random.randn(N)) * 0.5
-    fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None]
+    fct0 = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None]
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
 
     # test equivalence of different estimators
-    res_nrg = sr.crps_ensemble(obs, fct, estimator="nrg", backend=backend)
-    res_qd = sr.crps_ensemble(obs, fct, estimator="qd", backend=backend)
-    res_fair = sr.crps_ensemble(obs, fct, estimator="fair", backend=backend)
-    res_pwm = sr.crps_ensemble(obs, fct, estimator="pwm", backend=backend)
+    res_nrg = sr.crps_ensemble(obs, fct, estimator="nrg", **backend_kwargs)
+    res_qd = sr.crps_ensemble(obs, fct, estimator="qd", **backend_kwargs)
+    res_fair = sr.crps_ensemble(obs, fct, estimator="fair", **backend_kwargs)
+    res_pwm = sr.crps_ensemble(obs, fct, estimator="pwm", **backend_kwargs)
+    for r in (res_nrg, res_qd, res_fair, res_pwm):
+        assert_inferred(r, backend)
+    res_nrg, res_qd = np.asarray(res_nrg), np.asarray(res_qd)
+    res_fair, res_pwm = np.asarray(res_fair), np.asarray(res_pwm)
     if backend in ["torch", "jax"]:
         assert np.allclose(res_nrg, res_qd, rtol=1e-03)
         assert np.allclose(res_fair, res_pwm, rtol=1e-03)
@@ -312,282 +394,381 @@ def test_crps_ensemble_correctness(backend):
         assert np.allclose(res_nrg, res_qd)
         assert np.allclose(res_fair, res_pwm)
 
-    # test correctness
-    obs = -0.6042506
-    fct = np.array(
-        [
-            1.7812118,
-            0.5863797,
-            0.7038174,
-            -0.7743998,
-            -0.2751647,
-            1.1863249,
-            1.2990966,
-            -0.3242982,
-            -0.5968781,
-            0.9064937,
-        ]
-    )
-    res = sr.crps_ensemble(obs, fct, estimator="qd")
-    assert np.isclose(res, 0.6126602)
-
-
-def test_crps_quantile(backend):
+    # correctness on the pure-python/numpy path (exercised once)
+    if backend == "numpy":
+        obs_known = -0.6042506
+        fct_known = np.array(
+            [
+                1.7812118,
+                0.5863797,
+                0.7038174,
+                -0.7743998,
+                -0.2751647,
+                1.1863249,
+                1.2990966,
+                -0.3242982,
+                -0.5968781,
+                0.9064937,
+            ]
+        )
+        res = sr.crps_ensemble(obs_known, fct_known, estimator="qd")
+        assert np.isclose(res, 0.6126602)
+
+
+def test_crps_quantile(backend, to_backend, backend_kwargs):
     # test shapes
     obs = np.random.randn(N)
     fct = np.random.randn(N, ENSEMBLE_SIZE)
     alpha = np.linspace(0.1, 0.9, ENSEMBLE_SIZE)
-    res = sr.crps_quantile(obs, fct, alpha, backend=backend)
-    assert res.shape == (N,)
-    fct = np.random.randn(ENSEMBLE_SIZE, N)
     res = sr.crps_quantile(
-        obs, np.random.randn(ENSEMBLE_SIZE, N), alpha, m_axis=0, backend=backend
+        to_backend(obs), to_backend(fct), to_backend(alpha), **backend_kwargs
     )
-    assert res.shape == (N,)
+    assert_inferred(res, backend)
+    assert np.asarray(res).shape == (N,)
 
-    # Test quantile approximation close to analytical normal crps if forecast comes from the normal distribution
+    fct_t = np.random.randn(ENSEMBLE_SIZE, N)
+    res = sr.crps_quantile(
+        to_backend(obs),
+        to_backend(fct_t),
+        to_backend(alpha),
+        m_axis=0,
+        **backend_kwargs,
+    )
+    assert_inferred(res, backend)
+    assert np.asarray(res).shape == (N,)
+
+    # Test quantile approximation close to analytical normal crps if forecast comes
+    # from the normal distribution
     for mu in np.random.sample(size=10):
         for A in [9, 99, 999]:
             a0 = 1 / (A + 1)
             a1 = 1 - a0
-            fct = (
+            fctq = (
                 st.norm(np.repeat(mu, N), np.ones(N))
                 .ppf(np.linspace(np.repeat(a0, N), np.repeat(a1, N), A))
                 .T
             )
-            alpha = np.linspace(a0, a1, A)
-            obs = np.repeat(mu, N)
-            percentage_error_to_analytic = 1 - sr.crps_quantile(
-                obs, fct, alpha, backend=backend
-            ) / sr.crps_normal(obs, mu, 1, backend=backend)
-            percentage_error_to_analytic = np.asarray(percentage_error_to_analytic)
+            alphaq = np.linspace(a0, a1, A)
+            obsq = np.repeat(mu, N)
+            qcrps = sr.crps_quantile(
+                to_backend(obsq), to_backend(fctq), to_backend(alphaq), **backend_kwargs
+            )
+            ncrps = sr.crps_normal(
+                to_backend(obsq),
+                to_backend(np.repeat(mu, N)),
+                to_backend(np.ones(N)),
+                **backend_kwargs,
+            )
+            assert_inferred(qcrps, backend)
+            assert_inferred(ncrps, backend)
+            percentage_error_to_analytic = 1 - np.asarray(qcrps) / np.asarray(ncrps)
+            # jax runs in float32; allow a little extra slack on the tightest grids
+            tol = (1 / A) + (1e-3 if backend == "jax" else 0.0)
             assert np.all(
-                np.abs(percentage_error_to_analytic) < 1 / A
+                np.abs(percentage_error_to_analytic) < tol
             ), "Quantile CRPS should be close to normal CRPS"
 
     # Test raise valueerror if array sizes don't match
     with pytest.raises(ValueError):
-        sr.crps_quantile(obs, fct, alpha[0:42], backend=backend)
-        return
+        sr.crps_quantile(
+            to_backend(obs), to_backend(fct_t), to_backend(alpha), **backend_kwargs
+        )
+
 
+def test_crps_beta(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_beta(backend):
     if backend == "torch":
-        pytest.skip("Not implemented in torch backend")
+        # betainc has no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_beta(
+                fa(np.random.uniform(0, 1, (3, 3))),
+                fa(np.random.uniform(0, 3, (3, 3))),
+                fa(1.1),
+            )
+        return
 
     res = sr.crps_beta(
-        np.random.uniform(0, 1, (3, 3)),
-        np.random.uniform(0, 3, (3, 3)),
-        1.1,
-        backend=backend,
+        fa(np.random.uniform(0, 1, (3, 3))),
+        fa(np.random.uniform(0, 3, (3, 3))),
+        fa(1.1),
+        **backend_kwargs,
     )
-    assert res.shape == (3, 3)
-    assert not np.any(np.isnan(res))
+    assert_inferred(res, backend)
+    res_np = np.asarray(res)
+    assert res_np.shape == (3, 3)
+    assert not np.any(np.isnan(res_np))
 
     # test exceptions
     with pytest.raises(ValueError):
-        sr.crps_beta(0.3, 0.7, 1.1, lower=1.0, upper=0.0, backend=backend)
-        return
+        sr.crps_beta(
+            fa(0.3), fa(0.7), fa(1.1), lower=fa(1.0), upper=fa(0.0), **backend_kwargs
+        )
 
     # correctness tests
-    res = sr.crps_beta(0.3, 0.7, 1.1, backend=backend)
-    expected = 0.0850102437
-    assert np.isclose(res, expected)
+    res = sr.crps_beta(fa(0.3), fa(0.7), fa(1.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.0850102437, backend)
 
-    res = sr.crps_beta(-3.0, 0.7, 1.1, lower=-5.0, upper=4.0, backend=backend)
-    expected = 0.883206751
-    assert np.isclose(res, expected)
+    res = sr.crps_beta(
+        fa(-3.0), fa(0.7), fa(1.1), lower=fa(-5.0), upper=fa(4.0), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, 0.883206751, backend)
 
     # test when lower and upper are arrays
     res = sr.crps_beta(
-        -3.0,
-        0.7,
-        1.1,
-        lower=np.array([-5.0, -5.0]),
-        upper=np.array([4.0, 4.0]),
+        fa(-3.0),
+        fa(0.7),
+        fa(1.1),
+        lower=fa([-5.0, -5.0]),
+        upper=fa([4.0, 4.0]),
+        **backend_kwargs,
     )
-    expected = np.array([0.883206751, 0.883206751])
-    assert np.allclose(res, expected)
+    assert_inferred(res, backend)
+    assert_close(res, np.array([0.883206751, 0.883206751]), backend)
 
 
-def test_crps_binomial(backend):
+def test_crps_binomial(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
     if backend == "torch":
-        pytest.skip("Not implemented in torch backend")
+        # betainc has no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_binomial(fa([8, 8]), fa([10, 10]), fa([0.9, 0.9]))
+        return
 
     # test correctness
-    res = sr.crps_binomial(8, 10, 0.9, backend=backend)
-    expected = 0.6685115
-    assert np.isclose(res, expected)
+    res = sr.crps_binomial(fa(8), fa(10), fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.6685115, backend)
 
-    res = sr.crps_binomial(-8, 10, 0.9, backend=backend)
-    expected = 16.49896
-    assert np.isclose(res, expected)
+    res = sr.crps_binomial(fa(-8), fa(10), fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 16.49896, backend)
 
-    res = sr.crps_binomial(18, 10, 0.9, backend=backend)
-    expected = 8.498957
-    assert np.isclose(res, expected)
+    res = sr.crps_binomial(fa(18), fa(10), fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 8.498957, backend)
 
     # test broadcasting
     ones = np.ones(2)
     k, n, p = 8, 10, 0.9
-    s = sr.crps_binomial(k * ones, n, p, backend=backend)
-    assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
-    s = sr.crps_binomial(k * ones, n * ones, p, backend=backend)
-    assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
-    s = sr.crps_binomial(k * ones, n * ones, p * ones, backend=backend)
-    assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
-    s = sr.crps_binomial(k, n * ones, p * ones, backend=backend)
-    assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
-    s = sr.crps_binomial(k * ones, n, p * ones, backend=backend)
-    assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
-
-
-def test_crps_exponential(backend):
+    expected = np.array([0.6685115, 0.6685115])
+    for args in [
+        (fa(k * ones), fa(n), fa(p)),
+        (fa(k * ones), fa(n * ones), fa(p)),
+        (fa(k * ones), fa(n * ones), fa(p * ones)),
+        (fa(k), fa(n * ones), fa(p * ones)),
+        (fa(k * ones), fa(n), fa(p * ones)),
+    ]:
+        s = sr.crps_binomial(*args, **backend_kwargs)
+        assert_inferred(s, backend)
+        assert_close(s, expected, backend)
+
+
+def test_crps_exponential(backend, to_backend, backend_kwargs):
     # TODO: add and test exception handling
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
     # test correctness
-    obs, rate = 3, 0.7
-    res = sr.crps_exponential(obs, rate, backend=backend)
-    expected = 1.20701837
-    assert np.isclose(res, expected)
+    res = sr.crps_exponential(fa(3), fa(0.7), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 1.20701837, backend)
 
 
-def test_crps_exponentialM(backend):
-    obs, mass, location, scale = 0.3, 0.1, 0.0, 1.0
-    res = sr.crps_exponentialM(obs, mass, location, scale, backend=backend)
-    expected = 0.2384728
-    assert np.isclose(res, expected)
+def test_crps_exponentialM(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-    obs, mass, location, scale = 0.3, 0.1, -2.0, 3.0
-    res = sr.crps_exponentialM(obs, mass, location, scale, backend=backend)
-    expected = 0.6236187
-    assert np.isclose(res, expected)
+    res = sr.crps_exponentialM(fa(0.3), fa(0.1), fa(0.0), fa(1.0), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.2384728, backend)
 
-    obs, mass, location, scale = -1.2, 0.1, -2.0, 3.0
-    res = sr.crps_exponentialM(obs, mass, location, scale, backend=backend)
-    expected = 0.751013
-    assert np.isclose(res, expected)
+    res = sr.crps_exponentialM(fa(0.3), fa(0.1), fa(-2.0), fa(3.0), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.6236187, backend)
 
+    res = sr.crps_exponentialM(fa(-1.2), fa(0.1), fa(-2.0), fa(3.0), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.751013, backend)
 
-def test_crps_2pexponential(backend):
-    obs, scale1, scale2, location = 0.3, 0.1, 4.3, 0.0
-    res = sr.crps_2pexponential(obs, scale1, scale2, location, backend=backend)
-    expected = 1.787032
-    assert np.isclose(res, expected)
 
-    obs, scale1, scale2, location = -20.8, 7.1, 2.0, -25.4
-    res = sr.crps_2pexponential(obs, scale1, scale2, location, backend=backend)
-    expected = 6.018359
-    assert np.isclose(res, expected)
+def test_crps_2pexponential(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
+    res = sr.crps_2pexponential(fa(0.3), fa(0.1), fa(4.3), fa(0.0), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 1.787032, backend)
+
+    res = sr.crps_2pexponential(
+        fa(-20.8), fa(7.1), fa(2.0), fa(-25.4), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, 6.018359, backend)
 
 
-def test_crps_gamma(backend):
+def test_crps_gamma(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
     obs, shape, rate = 0.2, 1.1, 0.7
     expected = 0.6343718
 
-    res = sr.crps_gamma(obs, shape, rate, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_gamma(fa(obs), fa(shape), fa(rate), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
-    res = sr.crps_gamma(obs, shape, scale=1 / rate, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_gamma(fa(obs), fa(shape), scale=fa(1 / rate), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     with pytest.raises(ValueError):
-        sr.crps_gamma(obs, shape, rate, scale=1 / rate, backend=backend)
-        return
+        sr.crps_gamma(
+            fa(obs), fa(shape), fa(rate), scale=fa(1 / rate), **backend_kwargs
+        )
 
     with pytest.raises(ValueError):
-        sr.crps_gamma(obs, shape, backend=backend)
-        return
+        sr.crps_gamma(fa(obs), fa(shape), **backend_kwargs)
+
 
+def test_crps_csg0(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_csg0(backend):
     obs, shape, rate, shift = 0.7, 0.5, 2.0, 0.3
     expected = 0.5411044
 
-    expected_gamma = sr.crps_gamma(obs, shape, rate, backend=backend)
-    res_gamma = sr.crps_csg0(obs, shape=shape, rate=rate, shift=0.0, backend=backend)
-    assert np.isclose(res_gamma, expected_gamma)
-
-    res = sr.crps_csg0(obs, shape=shape, rate=rate, shift=shift, backend=backend)
-    assert np.isclose(res, expected)
+    expected_gamma = sr.crps_gamma(fa(obs), fa(shape), fa(rate), **backend_kwargs)
+    res_gamma = sr.crps_csg0(
+        fa(obs), shape=fa(shape), rate=fa(rate), shift=fa(0.0), **backend_kwargs
+    )
+    assert_inferred(res_gamma, backend)
+    assert_close(res_gamma, np.asarray(expected_gamma), backend)
 
-    res = sr.crps_csg0(obs, shape=shape, scale=1.0 / rate, shift=shift, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_csg0(
+        fa(obs), shape=fa(shape), rate=fa(rate), shift=fa(shift), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
+
+    res = sr.crps_csg0(
+        fa(obs),
+        shape=fa(shape),
+        scale=fa(1.0 / rate),
+        shift=fa(shift),
+        **backend_kwargs,
+    )
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     with pytest.raises(ValueError):
         sr.crps_csg0(
-            obs, shape=shape, rate=rate, scale=1.0 / rate, shift=shift, backend=backend
+            fa(obs),
+            shape=fa(shape),
+            rate=fa(rate),
+            scale=fa(1.0 / rate),
+            shift=fa(shift),
+            **backend_kwargs,
         )
-        return
 
     with pytest.raises(ValueError):
-        sr.crps_csg0(obs, shape=shape, shift=shift, backend=backend)
-        return
+        sr.crps_csg0(fa(obs), shape=fa(shape), shift=fa(shift), **backend_kwargs)
 
 
-def test_crps_gev(backend):
-    if backend == "torch":
-        pytest.skip("`expi` not implemented in torch backend")
-
-    obs, xi, mu, sigma = 0.3, 0.0, 0.0, 1.0
-    assert np.isclose(sr.crps_gev(obs, xi, backend=backend), 0.276440963)
-    mu = 0.1
-    assert np.isclose(
-        sr.crps_gev(obs + mu, xi, location=mu, backend=backend), 0.276440963
-    )
-    sigma = 0.9
-    mu = 0.0
-    assert np.isclose(
-        sr.crps_gev(obs * sigma, xi, scale=sigma, backend=backend),
-        0.276440963 * sigma,
-    )
-
-    obs, xi, mu, sigma = 0.3, 0.7, 0.0, 1.0
-    assert np.isclose(sr.crps_gev(obs, xi, backend=backend), 0.458044365)
-    mu = 0.1
-    assert np.isclose(
-        sr.crps_gev(obs + mu, xi, location=mu, backend=backend), 0.458044365
-    )
-    sigma = 0.9
-    mu = 0.0
-    assert np.isclose(
-        sr.crps_gev(obs * sigma, xi, scale=sigma, backend=backend),
-        0.458044365 * sigma,
-    )
-
-    obs, xi, mu, sigma = 0.3, -0.7, 0.0, 1.0
-    assert np.isclose(sr.crps_gev(obs, xi, backend=backend), 0.207621488)
-    mu = 0.1
-    assert np.isclose(
-        sr.crps_gev(obs + mu, xi, location=mu, backend=backend), 0.207621488
-    )
-    sigma = 0.9
-    mu = 0.0
-    assert np.isclose(
-        sr.crps_gev(obs * sigma, xi, scale=sigma, backend=backend),
-        0.207621488 * sigma,
-    )
-
-
-def test_crps_gpd(backend):
-    assert np.isclose(sr.crps_gpd(0.3, 0.9, backend=backend), 0.6849332)
-    assert np.isclose(sr.crps_gpd(-0.3, 0.9, backend=backend), 1.209091)
-    assert np.isclose(sr.crps_gpd(0.3, -0.9, backend=backend), 0.1338672)
-    assert np.isclose(sr.crps_gpd(-0.3, -0.9, backend=backend), 0.6448276)
+def test_crps_gev(backend, to_backend, backend_kwargs):
+    def fa(x):
+        # use size-2 arrays: crps_gev collapses size-1 results to a python float,
+        # which would defeat assert_inferred (and raises on jax size-1 arrays).
+        return to_backend(np.full(2, float(x)))
 
-    assert np.isnan(sr.crps_gpd(0.3, 1.0, backend=backend))
-    assert np.isnan(sr.crps_gpd(0.3, 1.2, backend=backend))
-    assert np.isnan(sr.crps_gpd(0.3, 0.9, mass=-0.1, backend=backend))
-    assert np.isnan(sr.crps_gpd(0.3, 0.9, mass=1.1, backend=backend))
-
-    res = 0.281636441
-    assert np.isclose(sr.crps_gpd(0.3 + 0.1, 0.0, location=0.1, backend=backend), res)
-    assert np.isclose(
-        sr.crps_gpd(0.3 * 0.9, 0.0, scale=0.9, backend=backend), res * 0.9
-    )
+    if backend == "torch":
+        # `expi` has no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_gev(fa(0.3), fa(0.0))
+        return
 
+    obs, xi = 0.3, 0.0
+    res = sr.crps_gev(fa(obs), fa(xi), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.276440963, backend)
+    res = sr.crps_gev(fa(obs + 0.1), fa(xi), location=fa(0.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.276440963, backend)
+    res = sr.crps_gev(fa(obs * 0.9), fa(xi), scale=fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.276440963 * 0.9, backend)
+
+    obs, xi = 0.3, 0.7
+    res = sr.crps_gev(fa(obs), fa(xi), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.458044365, backend)
+    res = sr.crps_gev(fa(obs + 0.1), fa(xi), location=fa(0.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.458044365, backend)
+    res = sr.crps_gev(fa(obs * 0.9), fa(xi), scale=fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.458044365 * 0.9, backend)
+
+    obs, xi = 0.3, -0.7
+    res = sr.crps_gev(fa(obs), fa(xi), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.207621488, backend)
+    res = sr.crps_gev(fa(obs + 0.1), fa(xi), location=fa(0.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.207621488, backend)
+    res = sr.crps_gev(fa(obs * 0.9), fa(xi), scale=fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.207621488 * 0.9, backend)
+
+
+def test_crps_gpd(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
+    res = sr.crps_gpd(fa(0.3), fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.6849332, backend)
+    res = sr.crps_gpd(fa(-0.3), fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 1.209091, backend)
+    res = sr.crps_gpd(fa(0.3), fa(-0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.1338672, backend)
+    res = sr.crps_gpd(fa(-0.3), fa(-0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.6448276, backend)
+
+    res = sr.crps_gpd(fa(0.3), fa(1.0), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert np.isnan(np.asarray(res))
+    res = sr.crps_gpd(fa(0.3), fa(1.2), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert np.isnan(np.asarray(res))
+    res = sr.crps_gpd(fa(0.3), fa(0.9), mass=fa(-0.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert np.isnan(np.asarray(res))
+    res = sr.crps_gpd(fa(0.3), fa(0.9), mass=fa(1.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert np.isnan(np.asarray(res))
+
+    expected = 0.281636441
+    res = sr.crps_gpd(fa(0.3 + 0.1), fa(0.0), location=fa(0.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
+    res = sr.crps_gpd(fa(0.3 * 0.9), fa(0.0), scale=fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, expected * 0.9, backend)
+
+
+def test_crps_gtclogis(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_gtclogis(backend):
     obs, location, scale, lower, upper, lmass, umass = (
         1.8,
         -3.0,
@@ -599,46 +780,77 @@ def test_crps_gtclogis(backend):
     )
     expected = 1.599721
     res = sr.crps_gtclogistic(
-        obs, location, scale, lower, upper, lmass, umass, backend=backend
+        fa(obs),
+        fa(location),
+        fa(scale),
+        fa(lower),
+        fa(upper),
+        fa(lmass),
+        fa(umass),
+        **backend_kwargs,
     )
-    assert np.isclose(res, expected)
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     # aligns with crps_logistic
-    res0 = sr.crps_logistic(obs, location, scale, backend=backend)
-    res = sr.crps_gtclogistic(obs, location, scale, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_logistic(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_gtclogistic(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
 
     # aligns with crps_tlogistic
-    res0 = sr.crps_tlogistic(obs, location, scale, lower, upper, backend=backend)
-    res = sr.crps_gtclogistic(obs, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_tlogistic(
+        fa(obs), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    res = sr.crps_gtclogistic(
+        fa(obs), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
+
 
+def test_crps_tlogis(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_tlogis(backend):
     obs, location, scale, lower, upper = 4.9, 3.5, 2.3, 0.0, 20.0
     expected = 0.7658979
-    res = sr.crps_tlogistic(obs, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_tlogistic(
+        fa(obs), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     # aligns with crps_logistic
-    res0 = sr.crps_logistic(obs, location, scale, backend=backend)
-    res = sr.crps_tlogistic(obs, location, scale, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_logistic(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_tlogistic(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
+
 
+def test_crps_clogis(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_clogis(backend):
     obs, location, scale, lower, upper = -0.9, 0.4, 1.1, 0.0, 1.0
     expected = 1.13237
-    res = sr.crps_clogistic(obs, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_clogistic(
+        fa(obs), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     # aligns with crps_logistic
-    res0 = sr.crps_logistic(obs, location, scale, backend=backend)
-    res = sr.crps_clogistic(obs, location, scale, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_logistic(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_clogistic(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
+
 
+def test_crps_gtcnormal(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_gtcnormal(backend):
     obs, location, scale, lower, upper, lmass, umass = (
         0.9,
         -2.3,
@@ -650,48 +862,95 @@ def test_crps_gtcnormal(backend):
     )
     expected = 1.422805
     res = sr.crps_gtcnormal(
-        obs, location, scale, lower, upper, lmass, umass, backend=backend
+        fa(obs),
+        fa(location),
+        fa(scale),
+        fa(lower),
+        fa(upper),
+        fa(lmass),
+        fa(umass),
+        **backend_kwargs,
     )
-    assert np.isclose(res, expected)
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     # aligns with crps_normal
-    res0 = sr.crps_normal(obs, location, scale, backend=backend)
-    res = sr.crps_gtcnormal(obs, location, scale, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_normal(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_gtcnormal(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
 
     # aligns with crps_tnormal
-    res0 = sr.crps_tnormal(obs, location, scale, lower, upper, backend=backend)
-    res = sr.crps_gtcnormal(obs, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_tnormal(
+        fa(obs), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    res = sr.crps_gtcnormal(
+        fa(obs), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
+
 
+def test_crps_tnormal(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_tnormal(backend):
     obs, location, scale, lower, upper = -1.0, 2.9, 2.2, 1.5, 17.3
     expected = 3.982434
-    res = sr.crps_tnormal(obs, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_tnormal(
+        fa(obs), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     # aligns with crps_normal
-    res0 = sr.crps_normal(obs, location, scale, backend=backend)
-    res = sr.crps_tnormal(obs, location, scale, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_normal(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_tnormal(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
+
 
+def test_crps_cnormal(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_cnormal(backend):
     obs, location, scale, lower, upper = 1.8, 0.4, 1.1, 0.0, 2.0
     expected = 0.8296078
-    res = sr.crps_cnormal(obs, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_cnormal(
+        fa(obs), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
+
+    # aligns with crps_normal (default unbounded). The infinite-bound arithmetic
+    # overflows to NaN in jax's float32; it is correct under float64 (jax x64), so
+    # only the numeric comparison is skipped on jax -- inference is still asserted.
+    res0 = sr.crps_normal(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_cnormal(fa(obs), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    if backend != "jax":
+        assert_close(res, np.asarray(res0), backend)
 
-    # aligns with crps_normal
-    res0 = sr.crps_normal(obs, location, scale, backend=backend)
-    res = sr.crps_cnormal(obs, location, scale, backend=backend)
-    assert np.isclose(res, res0)
 
+def test_crps_gtct(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
+    if backend == "torch":
+        # betainc/hyp2f1 have no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_gtct(
+                fa(0.9),
+                fa(20.1),
+                fa(-2.3),
+                fa(4.1),
+                fa(-7.3),
+                fa(1.7),
+                fa(0.0),
+                fa(0.21),
+            )
+        return
 
-def test_crps_gtct(backend):
-    if backend in ["jax", "torch"]:
-        pytest.skip("Not implemented in jax, torch backends")
     obs, df, location, scale, lower, upper, lmass, umass = (
         0.9,
         20.1,
@@ -704,258 +963,397 @@ def test_crps_gtct(backend):
     )
     expected = 1.423042
     res = sr.crps_gtct(
-        obs, df, location, scale, lower, upper, lmass, umass, backend=backend
+        fa(obs),
+        fa(df),
+        fa(location),
+        fa(scale),
+        fa(lower),
+        fa(upper),
+        fa(lmass),
+        fa(umass),
+        **backend_kwargs,
     )
-    assert np.isclose(res, expected)
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     # aligns with crps_t
-    res0 = sr.crps_t(obs, df, location, scale, backend=backend)
-    res = sr.crps_gtct(obs, df, location, scale, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_t(fa(obs), fa(df), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_gtct(fa(obs), fa(df), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
+
+    # aligns with crps_tt
+    res0 = sr.crps_tt(
+        fa(obs), fa(df), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    res = sr.crps_gtct(
+        fa(obs), fa(df), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
 
-    # aligns with crps_tnormal
-    res0 = sr.crps_tt(obs, df, location, scale, lower, upper, backend=backend)
-    res = sr.crps_gtct(obs, df, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, res0)
 
+def test_crps_tt(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_tt(backend):
-    if backend in ["jax", "torch"]:
-        pytest.skip("Not implemented in jax, torch backends")
+    if backend == "torch":
+        # betainc/hyp2f1 have no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_tt(fa(-1.0), fa(2.9), fa(3.1), fa(4.2), fa(1.5), fa(17.3))
+        return
 
     obs, df, location, scale, lower, upper = -1.0, 2.9, 3.1, 4.2, 1.5, 17.3
     expected = 5.084272
-    res = sr.crps_tt(obs, df, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_tt(
+        fa(obs), fa(df), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     # aligns with crps_t
-    res0 = sr.crps_t(obs, df, location, scale, backend=backend)
-    res = sr.crps_tt(obs, df, location, scale, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_t(fa(obs), fa(df), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_tt(fa(obs), fa(df), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
+
 
+def test_crps_ct(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_ct(backend):
-    if backend in ["jax", "torch"]:
-        pytest.skip("Not implemented in jax, torch backends")
+    if backend == "torch":
+        # betainc/hyp2f1 have no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_ct(fa(1.8), fa(5.4), fa(0.4), fa(1.1), fa(0.0), fa(2.0))
+        return
 
     obs, df, location, scale, lower, upper = 1.8, 5.4, 0.4, 1.1, 0.0, 2.0
     expected = 0.8028996
-    res = sr.crps_ct(obs, df, location, scale, lower, upper, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_ct(
+        fa(obs), fa(df), fa(location), fa(scale), fa(lower), fa(upper), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, expected, backend)
 
     # aligns with crps_t
-    res0 = sr.crps_t(obs, df, location, scale, backend=backend)
-    res = sr.crps_ct(obs, df, location, scale, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_t(fa(obs), fa(df), fa(location), fa(scale), **backend_kwargs)
+    res = sr.crps_ct(fa(obs), fa(df), fa(location), fa(scale), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, np.asarray(res0), backend)
 
 
-def test_crps_hypergeometric(backend):
+def test_crps_hypergeometric(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
     if backend == "torch":
-        pytest.skip("Currently not working in torch backend")
+        # crps_hypergeometric calls xp.size(), which array_api_compat.torch
+        # does not expose (genuine torch gap, see report).
+        pytest.skip("array_api_compat.torch has no `size`; crps_hypergeometric uses it")
 
     # test shapes
-    res = sr.crps_hypergeometric(5 * np.ones((2, 2)), 7, 13, 12, backend=backend)
-    assert res.shape == (2, 2)
+    res = sr.crps_hypergeometric(
+        fa(5 * np.ones((2, 2))), fa(7), fa(13), fa(12), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert np.asarray(res).shape == (2, 2)
 
-    res = sr.crps_hypergeometric(5, 7 * np.ones((2, 2)), 13, 12, backend=backend)
-    assert res.shape == (2, 2)
+    res = sr.crps_hypergeometric(
+        fa(5), fa(7 * np.ones((2, 2))), fa(13), fa(12), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert np.asarray(res).shape == (2, 2)
 
-    res = sr.crps_hypergeometric(5, 7, 13 * np.ones((2, 2)), 12, backend=backend)
-    assert res.shape == (2, 2)
+    res = sr.crps_hypergeometric(
+        fa(5), fa(7), fa(13 * np.ones((2, 2))), fa(12), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert np.asarray(res).shape == (2, 2)
 
     # test correctness
-    assert np.isclose(sr.crps_hypergeometric(5, 7, 13, 12), 0.4469742)
+    res = sr.crps_hypergeometric(fa(5), fa(7), fa(13), fa(12), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.4469742, backend)
 
 
-def test_crps_laplace(backend):
-    assert np.isclose(sr.crps_laplace(-3, backend=backend), 2.29978707)
-    assert np.isclose(
-        sr.crps_laplace(-3 + 0.1, location=0.1, backend=backend), 2.29978707
-    )
-    assert np.isclose(
-        sr.crps_laplace(-3 * 0.9, scale=0.9, backend=backend), 0.9 * 2.29978707
-    )
+def test_crps_laplace(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
+    res = sr.crps_laplace(fa(-3), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 2.29978707, backend)
 
-def test_crps_logis(backend):
-    obs, mu, sigma = 17.1, 13.8, 3.3
-    expected = 2.067527
-    res = sr.crps_logistic(obs, mu, sigma, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_laplace(fa(-3 + 0.1), location=fa(0.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 2.29978707, backend)
 
-    obs, mu, sigma = 3.1, 4.0, 0.5
-    expected = 0.5529776
-    res = sr.crps_logistic(obs, mu, sigma, backend=backend)
-    assert np.isclose(res, expected)
+    res = sr.crps_laplace(fa(-3 * 0.9), scale=fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.9 * 2.29978707, backend)
 
 
-def test_crps_loglaplace(backend):
-    assert np.isclose(sr.crps_loglaplace(3.0, 0.1, 0.9, backend=backend), 1.16202051)
+def test_crps_logis(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
+    res = sr.crps_logistic(fa(17.1), fa(13.8), fa(3.3), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 2.067527, backend)
+
+    res = sr.crps_logistic(fa(3.1), fa(4.0), fa(0.5), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.5529776, backend)
+
+
+def test_crps_loglaplace(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
+    res = sr.crps_loglaplace(fa(3.0), fa(0.1), fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 1.16202051, backend)
+
+
+def test_crps_loglogistic(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_loglogistic(backend):
     if backend == "torch":
-        pytest.skip("Not implemented in torch backend")
+        # hyp2f1 has no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_loglogistic(fa(3.0), fa(0.1), fa(0.9))
+        return
 
-    # TODO: investigate why JAX results are different from other backends
-    # (would fail test with smaller tolerance)
-    assert np.isclose(
-        sr.crps_loglogistic(3.0, 0.1, 0.9, backend=backend), 1.13295277, atol=1e-4
-    )
+    # JAX results differ slightly from other backends -> looser atol
+    res = sr.crps_loglogistic(fa(3.0), fa(0.1), fa(0.9), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 1.13295277, backend, atol=1e-4)
 
 
-def test_crps_lognormal(backend):
-    obs = np.exp(np.random.randn(N))
-    mulog = np.log(obs) + np.random.randn(N) * 0.1
+def test_crps_lognormal(backend, to_backend, backend_kwargs):
+    obs0 = np.exp(np.random.randn(N))
+    mulog = np.log(obs0) + np.random.randn(N) * 0.1
     sigmalog = abs(np.random.randn(N)) * 0.3
 
     # non-negative values
-    res = sr.crps_lognormal(obs, mulog, sigmalog, backend=backend)
+    res = sr.crps_lognormal(
+        to_backend(obs0), to_backend(mulog), to_backend(sigmalog), **backend_kwargs
+    )
+    assert_inferred(res, backend)
     res = np.asarray(res)
     assert not np.any(np.isnan(res))
     assert not np.any(res < 0.0)
 
     # approx zero when perfect forecast
-    mulog = np.log(obs) + np.random.randn(N) * 1e-6
+    mulog = np.log(obs0) + np.random.randn(N) * 1e-6
     sigmalog = abs(np.random.randn(N)) * 1e-6
-    res = sr.crps_lognormal(obs, mulog, sigmalog, backend=backend)
+    res = sr.crps_lognormal(
+        to_backend(obs0), to_backend(mulog), to_backend(sigmalog), **backend_kwargs
+    )
+    assert_inferred(res, backend)
     res = np.asarray(res)
     assert not np.any(np.isnan(res))
     assert not np.any(res - 0.0 > 0.0001)
 
 
-def test_crps_mixnorm(backend):
+def test_crps_mixnorm(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
     obs, m, s, w = 0.3, [0.0, -2.9, 0.9], [0.5, 1.4, 0.7], [1 / 3, 1 / 3, 1 / 3]
-    res = sr.crps_mixnorm(obs, m, s, w, backend=backend)
-    expected = 0.4510451
-    assert np.isclose(res, expected)
+    res = sr.crps_mixnorm(fa(obs), fa(m), fa(s), fa(w), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.4510451, backend)
 
-    res0 = sr.crps_mixnorm(obs, m, s, backend=backend)
-    assert np.isclose(res, res0)
+    res0 = sr.crps_mixnorm(fa(obs), fa(m), fa(s), **backend_kwargs)
+    assert_inferred(res0, backend)
+    assert_close(res, np.asarray(res0), backend)
 
     w = [0.3, 0.1, 0.6]
-    res = sr.crps_mixnorm(obs, m, s, w, backend=backend)
-    expected = 0.2354619
-    assert np.isclose(res, expected)
+    res = sr.crps_mixnorm(fa(obs), fa(m), fa(s), fa(w), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.2354619, backend)
 
     obs = [-1.6, 0.3]
     m = [[0.0, -2.9], [0.6, 0.0], [-1.1, -2.3]]
     s = [[0.5, 1.7], [1.1, 0.7], [1.4, 1.5]]
-    res1 = sr.crps_mixnorm(obs, m, s, m_axis=0, backend=backend)
+    res1 = sr.crps_mixnorm(fa(obs), fa(m), fa(s), m_axis=0, **backend_kwargs)
+    assert_inferred(res1, backend)
 
     m = [[0.0, 0.6, -1.1], [-2.9, 0.0, -2.3]]
     s = [[0.5, 1.1, 1.4], [1.7, 0.7, 1.5]]
-    res2 = sr.crps_mixnorm(obs, m, s, backend=backend)
-    assert np.allclose(res1, res2)
+    res2 = sr.crps_mixnorm(fa(obs), fa(m), fa(s), **backend_kwargs)
+    assert_inferred(res2, backend)
+    assert_close(res1, np.asarray(res2), backend)
+
 
+def test_crps_negbinom(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-def test_crps_negbinom(backend):
-    if backend in ["jax", "torch"]:
-        pytest.skip("Not implemented in jax, torch backends")
+    if backend == "torch":
+        # betainc/hyp2f1 have no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_negbinom(fa(2.0), fa(7.0), fa(0.8))
+        return
 
     # test exceptions
     with pytest.raises(ValueError):
-        sr.crps_negbinom(0.3, 7.0, 0.8, mu=7.3, backend=backend)
+        sr.crps_negbinom(fa(0.3), fa(7.0), fa(0.8), mu=fa(7.3), **backend_kwargs)
 
     # test correctness
-    obs, n, prob = 2.0, 7.0, 0.8
-    res = sr.crps_negbinom(obs, n, prob, backend=backend)
-    expected = 0.3834322
-    assert np.isclose(res, expected)
+    res = sr.crps_negbinom(fa(2.0), fa(7.0), fa(0.8), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.3834322, backend)
 
-    obs, n, prob = 1.5, 2.0, 0.5
-    res = sr.crps_negbinom(obs, n, prob, backend=backend)
-    expected = 0.462963
-    assert np.isclose(res, expected)
+    res = sr.crps_negbinom(fa(1.5), fa(2.0), fa(0.5), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.462963, backend)
 
-    obs, n, prob = -1.0, 17.0, 0.1
-    res = sr.crps_negbinom(obs, n, prob, backend=backend)
-    expected = 132.0942
-    assert np.isclose(res, expected)
+    # large-magnitude case: overflows to NaN in jax's float32 but is correct under
+    # float64 (jax x64), so only the numeric comparison is skipped on jax.
+    res = sr.crps_negbinom(fa(-1.0), fa(17.0), fa(0.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    if backend != "jax":
+        assert_close(res, 132.0942, backend)
 
-    obs, n, mu = 2.3, 11.0, 7.3
-    res = sr.crps_negbinom(obs, n, mu=mu, backend=backend)
-    expected = 3.149218
-    assert np.isclose(res, expected)
+    res = sr.crps_negbinom(fa(2.3), fa(11.0), mu=fa(7.3), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 3.149218, backend)
 
 
-def test_crps_normal(backend):
-    obs = np.random.randn(N)
-    mu = obs + np.random.randn(N) * 0.1
+def test_crps_normal(backend, to_backend, backend_kwargs):
+    obs0 = np.random.randn(N)
+    mu = obs0 + np.random.randn(N) * 0.1
     sigma = abs(np.random.randn(N)) * 0.3
 
     # non-negative values
-    res = sr.crps_normal(obs, mu, sigma, backend=backend)
+    res = sr.crps_normal(
+        to_backend(obs0), to_backend(mu), to_backend(sigma), **backend_kwargs
+    )
+    assert_inferred(res, backend)
     res = np.asarray(res)
     assert not np.any(np.isnan(res))
     assert not np.any(res < 0.0)
 
     # approx zero when perfect forecast
-    mu = obs + np.random.randn(N) * 1e-6
+    mu = obs0 + np.random.randn(N) * 1e-6
     sigma = abs(np.random.randn(N)) * 1e-6
-    res = sr.crps_normal(obs, mu, sigma, backend=backend)
+    res = sr.crps_normal(
+        to_backend(obs0), to_backend(mu), to_backend(sigma), **backend_kwargs
+    )
+    assert_inferred(res, backend)
     res = np.asarray(res)
-
     assert not np.any(np.isnan(res))
     assert not np.any(res - 0.0 > 0.0001)
 
 
-def test_crps_2pnormal(backend):
-    obs = np.random.randn(N)
-    mu = obs + np.random.randn(N) * 0.1
+def test_crps_2pnormal(backend, to_backend, backend_kwargs):
+    obs0 = np.random.randn(N)
+    mu = obs0 + np.random.randn(N) * 0.1
     sigma1 = abs(np.random.randn(N)) * 0.3
     sigma2 = abs(np.random.randn(N)) * 0.2
 
-    res = sr.crps_2pnormal(obs, sigma1, sigma2, mu, backend=backend)
+    res = sr.crps_2pnormal(
+        to_backend(obs0),
+        to_backend(sigma1),
+        to_backend(sigma2),
+        to_backend(mu),
+        **backend_kwargs,
+    )
+    assert_inferred(res, backend)
     res = np.asarray(res)
     assert not np.any(np.isnan(res))
     assert not np.any(res < 0.0)
 
 
-def test_crps_poisson(backend):
-    obs, mean = 1.0, 3.0
-    res = sr.crps_poisson(obs, mean, backend=backend)
-    expected = 1.143447
-    assert np.isclose(res, expected)
+def test_crps_poisson(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-    obs, mean = 1.5, 2.3
-    res = sr.crps_poisson(obs, mean, backend=backend)
-    expected = 0.5001159
-    assert np.isclose(res, expected)
+    res = sr.crps_poisson(fa(1.0), fa(3.0), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 1.143447, backend)
 
-    obs, mean = -1.0, 1.5
-    res = sr.crps_poisson(obs, mean, backend=backend)
-    expected = 1.840259
-    assert np.isclose(res, expected)
+    res = sr.crps_poisson(fa(1.5), fa(2.3), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.5001159, backend)
 
+    res = sr.crps_poisson(fa(-1.0), fa(1.5), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 1.840259, backend)
 
-def test_crps_t(backend):
-    if backend in ["jax", "torch"]:
-        pytest.skip("Not implemented in jax, torch backends")
 
-    obs, df, mu, sigma = 11.1, 5.2, 13.8, 2.3
-    expected = 1.658226
-    res = sr.crps_t(obs, df, mu, sigma, backend=backend)
-    assert np.isclose(res, expected)
+def test_crps_t(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
 
-    obs, df = 0.7, 4.0
-    expected = 0.4387929
-    res = sr.crps_t(obs, df, backend=backend)
-    assert np.isclose(res, expected)
+    if backend == "torch":
+        # betainc has no native torch implementation
+        with pytest.raises(NotImplementedError):
+            sr.crps_t(fa(11.1), fa(5.2), fa(13.8), fa(2.3))
+        return
 
+    res = sr.crps_t(fa(11.1), fa(5.2), fa(13.8), fa(2.3), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 1.658226, backend)
 
-def test_crps_uniform(backend):
-    obs, min, max, lmass, umass = 0.3, -1.0, 2.1, 0.3, 0.1
-    res = sr.crps_uniform(obs, min, max, lmass, umass, backend=backend)
-    expected = 0.3960968
-    assert np.isclose(res, expected)
+    res = sr.crps_t(fa(0.7), fa(4.0), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.4387929, backend)
 
-    obs, min, max, lmass = -17.9, -15.2, -8.7, 0.2
-    res = sr.crps_uniform(obs, min, max, lmass, backend=backend)
-    expected = 4.086667
-    assert np.isclose(res, expected)
 
-    obs, min, max = 2.2, 0.1, 3.1
-    res = sr.crps_uniform(obs, min, max, backend=backend)
-    expected = 0.37
-    assert np.isclose(res, expected)
+def test_crps_uniform(backend, to_backend, backend_kwargs):
+    def fa(x):
+        return to_backend(np.asarray(x, dtype=float))
+
+    res = sr.crps_uniform(
+        fa(0.3), fa(-1.0), fa(2.1), fa(0.3), fa(0.1), **backend_kwargs
+    )
+    assert_inferred(res, backend)
+    assert_close(res, 0.3960968, backend)
+
+    res = sr.crps_uniform(fa(-17.9), fa(-15.2), fa(-8.7), fa(0.2), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 4.086667, backend)
+
+    res = sr.crps_uniform(fa(2.2), fa(0.1), fa(3.1), **backend_kwargs)
+    assert_inferred(res, backend)
+    assert_close(res, 0.37, backend)
+
+
+def test_crps_ensemble_legacy_backend_string_deprecated():
+    obs = np.random.randn(N)
+    fct = np.random.randn(N, ENSEMBLE_SIZE)
+    with pytest.warns(DeprecationWarning):
+        sr.crps_ensemble(obs, fct, backend="numpy")
+
+
+def test_crps_ensemble_grad_jax():
+    jax = pytest.importorskip("jax")
+    import jax.numpy as jnp
+
+    obs = jnp.asarray(np.random.randn(N))
+    fct = jnp.asarray(np.random.randn(N, ENSEMBLE_SIZE))
+    g = jax.grad(lambda f: sr.crps_ensemble(obs, f).sum())(fct)
+    assert jnp.all(jnp.isfinite(g))
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason=(
+        "crps_normal detaches torch autograd: core.crps._closed.normal() runs "
+        "map(xp.asarray, ...) and array_api_compat.torch.asarray drops "
+        "requires_grad, so the result has no grad_fn. Remove this xfail once the "
+        "core path preserves torch gradients."
+    ),
+)
+def test_crps_normal_grad_torch():
+    torch = pytest.importorskip("torch")
+    obs = torch.tensor([0.2], dtype=torch.float64)
+    mu = torch.tensor([0.0], dtype=torch.float64, requires_grad=True)
+    sigma = torch.tensor([1.0], dtype=torch.float64, requires_grad=True)
+    sr.crps_normal(obs, mu, sigma).sum().backward()
+    assert torch.isfinite(mu.grad).all() and torch.isfinite(sigma.grad).all()

From aaf8e4e845e98811aabc3741b2a24f15d76b2ff4 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 03:16:12 +0200
Subject: [PATCH 23/26] Preserve torch grad in namespace asarray, add namespace
 size, stop gev collapsing to float

---
 scoringrules/backend/namespace.py | 18 ++++++++++++++++++
 scoringrules/core/crps/_closed.py |  2 +-
 tests/test_crps.py                | 16 ----------------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/scoringrules/backend/namespace.py b/scoringrules/backend/namespace.py
index 0593c5c..db7c065 100644
--- a/scoringrules/backend/namespace.py
+++ b/scoringrules/backend/namespace.py
@@ -5,6 +5,8 @@
 inferred from the input arrays (via array-api-compat) and adds the missing
 special functions and a few non-standard helpers (see ``extensions``)."""
 
+import math
+
 import numpy as np
 from array_api_compat import array_namespace, is_array_api_obj
 
@@ -76,6 +78,22 @@ def _scalar_tolerant(x, *args, **kwargs):
             return _scalar_tolerant
         return attr
 
+    def asarray(self, obj, /, **kwargs):
+        # Preserve identity for inputs that are already arrays of this namespace:
+        # array-API ``torch.asarray`` drops ``requires_grad`` (and re-wrapping
+        # loses autograd/device), so returning the input unchanged keeps
+        # gradients flowing. Only coerce scalars/lists, or when dtype/copy/device
+        # kwargs are explicitly requested.
+        if is_array_api_obj(obj) and not kwargs:
+            return obj
+        return self._xp.asarray(obj, **kwargs)
+
+    def size(self, x):
+        # The array-API standard does not expose a ``size`` free function and
+        # array-api-compat's torch namespace lacks one; the element count is a
+        # portable property of ``shape``.
+        return math.prod(x.shape)
+
     # --- linear algebra (thin delegations so call sites stay mechanical) ---
     def norm(self, x, axis=None):
         # array-API standard spelling of the existing backends' linalg.norm
diff --git a/scoringrules/core/crps/_closed.py b/scoringrules/core/crps/_closed.py
index 812198d..b7073ca 100644
--- a/scoringrules/core/crps/_closed.py
+++ b/scoringrules/core/crps/_closed.py
@@ -271,7 +271,7 @@ def _gev_adjust_fn(s, xi, f_xi):
 
     out = out * scale
 
-    return float(out) if out.size == 1 else out
+    return out
 
 
 def gpd(
diff --git a/tests/test_crps.py b/tests/test_crps.py
index 0742580..518d67c 100644
--- a/tests/test_crps.py
+++ b/tests/test_crps.py
@@ -682,8 +682,6 @@ def fa(x):
 
 def test_crps_gev(backend, to_backend, backend_kwargs):
     def fa(x):
-        # use size-2 arrays: crps_gev collapses size-1 results to a python float,
-        # which would defeat assert_inferred (and raises on jax size-1 arrays).
         return to_backend(np.full(2, float(x)))
 
     if backend == "torch":
@@ -1047,11 +1045,6 @@ def test_crps_hypergeometric(backend, to_backend, backend_kwargs):
     def fa(x):
         return to_backend(np.asarray(x, dtype=float))
 
-    if backend == "torch":
-        # crps_hypergeometric calls xp.size(), which array_api_compat.torch
-        # does not expose (genuine torch gap, see report).
-        pytest.skip("array_api_compat.torch has no `size`; crps_hypergeometric uses it")
-
     # test shapes
     res = sr.crps_hypergeometric(
         fa(5 * np.ones((2, 2))), fa(7), fa(13), fa(12), **backend_kwargs
@@ -1341,15 +1334,6 @@ def test_crps_ensemble_grad_jax():
     assert jnp.all(jnp.isfinite(g))
 
 
-@pytest.mark.xfail(
-    strict=True,
-    reason=(
-        "crps_normal detaches torch autograd: core.crps._closed.normal() runs "
-        "map(xp.asarray, ...) and array_api_compat.torch.asarray drops "
-        "requires_grad, so the result has no grad_fn. Remove this xfail once the "
-        "core path preserves torch gradients."
-    ),
-)
 def test_crps_normal_grad_torch():
     torch = pytest.importorskip("torch")
     obs = torch.tensor([0.2], dtype=torch.float64)

From ffb6d832265c97ed99ff7dd91047afa37578c3ee Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 03:18:28 +0200
Subject: [PATCH 24/26] Rewrite user-guide backend section for inferred array
 frameworks

---
 docs/user_guide.md | 54 +++++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/docs/user_guide.md b/docs/user_guide.md
index b200f99..a31ee66 100644
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@@ -37,39 +37,53 @@ sr.variogram_score(obs, fct)
 For the univariate ensemble metrics, the ensemble dimension is on the last axis unless you specify otherwise with the `axis` argument. For the multivariate ensemble metrics, the ensemble dimension and the variable dimension are on the second last and last axis respectively, unless specified otherwise with `m_axis` and `v_axis`.
 
 ## Backends
-Scoringrules supports multiple backends. By default, the `numpy` and `numba` backends will be registered when importing the library. You can see the list of registered backends with
+
+Scoringrules runs every score across multiple array frameworks — numpy, jax, and
+torch — from a single implementation. The framework is **inferred from the input
+arrays**: pass numpy, jax, or torch arrays and the result is returned in the same
+framework, with no configuration required.
 
 ```python
-print(sr.backends)
-# {'numpy': <scoringrules.backend.numpy.NumpyBackend at 0x2ba2d6f391b0>,
-# 'numba': <scoringrules.backend.numpy.NumbaBackend at 0x2ba2d6f38ac0>}
-```
+import numpy as np
+import scoringrules as sr
 
-and the currently active backend, used by default in all metrics, can be seen with
+sr.crps_normal(np.array([0.1]), np.array([0.0]), np.array([1.0]))  # numpy array in, numpy array out
+```
 
 ```python
-print(sr.backends.active)
-# <scoringrules.backend.numpy.NumpyBackend at 0x2ba2d6f38ac0>
-```
+import jax.numpy as jnp
 
-The default backend can also be changed with
+sr.crps_normal(jnp.array([0.1]), jnp.array([0.0]), jnp.array([1.0]))  # jax array out
+```
 
 ```python
-sr.backends.set_active("numba")
-print(sr.backends.active)
-# <scoringrules.backend.numpy.NumbaBackend at 0x2ba2d6f38ac0>
+import torch
+
+mu = torch.tensor([0.0], requires_grad=True)
+sr.crps_normal(torch.tensor([0.1]), mu, torch.tensor([1.0]))  # torch tensor out; autograd preserved
 ```
-When computing a metric, the `backend` argument can be used to override the default choice.
 
+Inputs must come from a single framework; mixing (e.g. a numpy observation with a
+torch forecast) raises an error.
+
+### The numba fast path
 
-To register a new backend, for example `torch`, simply use
+For numpy inputs, opt into the compiled [numba](https://numba.pydata.org/) gufuncs
+with `backend="numba"`:
 
 ```python
-sr.register_backend("torch")
+sr.crps_ensemble(np.random.randn(5), np.random.randn(5, 11), backend="numba")
 ```
 
-You can now use `torch` to compute metrics, either by setting it as the default backend or by specifying it on a specific metric:
+`backend="numba"` requires numpy-compatible inputs.
 
-```python
-sr.crps_normal(0.1, 1.0, 0.0, backend="torch")
-```
+### Deprecations
+
+Selecting an array-API backend explicitly is deprecated and will be removed in 1.0
+— the framework is inferred from the input instead. This applies to:
+
+- the `backend="numpy"`, `backend="jax"`, and `backend="torch"` arguments, and
+- `sr.register_backend(...)` / `sr.backends.set_active(...)` for those backends.
+
+`backend="numba"` is **not** deprecated; it remains the supported way to reach the
+numba fast path.

From 9bc3db18093fb81caac19b44eeff7b2be7065a1e Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 03:32:08 +0200
Subject: [PATCH 25/26] Exercise weighted-CRPS tests with native arrays and
 assert backend inference

---
 tests/test_wcrps.py | 465 ++++++++++++++++++++++++++------------------
 1 file changed, 271 insertions(+), 194 deletions(-)

diff --git a/tests/test_wcrps.py b/tests/test_wcrps.py
index 1674622..1344312 100644
--- a/tests/test_wcrps.py
+++ b/tests/test_wcrps.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 import scoringrules as sr
+from .conftest import assert_inferred
 
 
 M = 11
@@ -9,46 +10,69 @@
 ESTIMATORS = ["nrg", "fair", "pwm", "qd", "akr", "akr_circperm"]
 
 
-def test_owcrps_ensemble(backend):
+def assert_close(result, expected, backend, *, rtol=1e-5, atol=1e-8):
+    """Compare a (possibly framework-native) result to an expected value.
+
+    jax runs in float32 in the test environment, so its tolerances are loosened.
+    torch runs in float64 and stays strict.
+    """
+    arr = np.asarray(result)
+    if backend == "jax":
+        rtol = max(rtol, 1e-4)
+        atol = max(atol, 1e-5)
+    assert np.allclose(
+        arr, np.asarray(expected), rtol=rtol, atol=atol
+    ), f"[{backend}] {arr} != {expected}"
+
+
+def test_owcrps_ensemble(backend, to_backend, backend_kwargs):
     # test shapes
-    obs = np.random.randn(N)
+    obs = to_backend(np.random.randn(N))
     res = sr.owcrps_ensemble(
-        obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0, backend=backend
+        obs,
+        to_backend(np.random.randn(N, M)),
+        w_func=lambda x: x * 0.0 + 1.0,
+        **backend_kwargs,
     )
+    assert_inferred(res, backend)
     assert res.shape == (N,)
 
-    fct = np.random.randn(M, N)
+    fct = to_backend(np.random.randn(M, N))
     res = sr.owcrps_ensemble(
         obs,
         fct,
         w_func=lambda x: x * 0.0 + 1.0,
         m_axis=0,
-        backend=backend,
+        **backend_kwargs,
     )
+    assert_inferred(res, backend)
 
 
-def test_owcrps_ensemble_nan_policy(backend):
+def test_owcrps_ensemble_nan_policy(backend, to_backend, backend_kwargs):
     """Test behavior of scoringrules.owcrps_ensemble with NaN values."""
 
-    kwargs = {"backend": backend, "w_func": lambda x: x * 0.0 + 1.0}
+    kwargs = {**backend_kwargs, "w_func": lambda x: x * 0.0 + 1.0}
+
+    # Build numpy arrays first so numpy-style indexing stays in numpy land
+    obs0 = np.random.randn(N)
+    fct0 = np.random.randn(N, M)
+    fct_nan0 = fct0.copy()
+    fct_nan0[0, [0, 3, 6]] = np.nan
+    fct_nan0[2, [5]] = np.nan
+    nan_positions = np.isnan(fct_nan0).any(axis=1)
 
-    # test data
-    obs = np.random.randn(N)
-    fct = np.random.randn(N, M)
-    fct_nan = fct.copy()
-    fct_nan[0, [0, 3, 6]] = np.nan
-    fct_nan[2, [5]] = np.nan
-    nan_positions = np.isnan(fct_nan).any(axis=1)
+    obs = to_backend(obs0)
+    fct_nan = to_backend(fct_nan0)
 
     # default nan policy (propagate)
     res = sr.owcrps_ensemble(obs, fct_nan, **kwargs)
-    res = np.asarray(res)
-    assert np.all(np.isnan(res[nan_positions]))
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'propagate' nan policy
     res = sr.owcrps_ensemble(obs, fct_nan, nan_policy="propagate", **kwargs)
-    res = np.asarray(res)
-    assert np.all(np.isnan(res[nan_positions]))
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'raise' nan policy
     with pytest.raises(ValueError):
@@ -56,58 +80,70 @@ def test_owcrps_ensemble_nan_policy(backend):
 
     # 'omit' nan policy: no nans in results
     res = sr.owcrps_ensemble(obs, fct_nan, nan_policy="omit", **kwargs)
-    res = np.asarray(res)
-    assert not np.any(np.isnan(res))
+    assert_inferred(res, backend)
+    assert not np.any(np.isnan(np.asarray(res)))
 
     # 'omit' nan policy: equivalence with clean ensemble
-    for i in range(fct.shape[0]):
-        fct_clean = fct_nan[i, ~np.isnan(fct_nan[i])]
-        res_clean = sr.owcrps_ensemble(obs[i : i + 1], fct_clean[None, :], **kwargs)
+    for i in range(fct0.shape[0]):
+        fct_clean = fct_nan0[i, ~np.isnan(fct_nan0[i])]
+        res_clean = sr.owcrps_ensemble(
+            to_backend(obs0[i : i + 1]), to_backend(fct_clean[None, :]), **kwargs
+        )
         res = sr.owcrps_ensemble(
-            obs[i : i + 1], fct_nan[i : i + 1], nan_policy="omit", **kwargs
+            to_backend(obs0[i : i + 1]),
+            to_backend(fct_nan0[i : i + 1]),
+            nan_policy="omit",
+            **kwargs,
         )
-        assert np.allclose(res, res_clean)
+        assert np.allclose(np.asarray(res), np.asarray(res_clean))
 
 
-def test_vrcrps_ensemble(backend):
+def test_vrcrps_ensemble(backend, to_backend, backend_kwargs):
     # test shapes
-    obs = np.random.randn(N)
+    obs = to_backend(np.random.randn(N))
     res = sr.vrcrps_ensemble(
-        obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0, backend=backend
+        obs,
+        to_backend(np.random.randn(N, M)),
+        w_func=lambda x: x * 0.0 + 1.0,
+        **backend_kwargs,
     )
+    assert_inferred(res, backend)
     assert res.shape == (N,)
     res = sr.vrcrps_ensemble(
         obs,
-        np.random.randn(M, N),
+        to_backend(np.random.randn(M, N)),
         w_func=lambda x: x * 0.0 + 1.0,
         m_axis=0,
-        backend=backend,
+        **backend_kwargs,
     )
+    assert_inferred(res, backend)
     assert res.shape == (N,)
 
 
-def test_vrcrps_ensemble_nan_policy(backend):
+def test_vrcrps_ensemble_nan_policy(backend, to_backend, backend_kwargs):
     """Test behavior of scoringrules.vrcrps_ensemble with NaN values."""
 
-    kwargs = {"backend": backend, "w_func": lambda x: x * 0.0 + 1.0}
+    kwargs = {**backend_kwargs, "w_func": lambda x: x * 0.0 + 1.0}
 
-    # test data
-    obs = np.random.randn(N)
-    fct = np.random.randn(N, M)
-    fct_nan = fct.copy()
-    fct_nan[0, [0, 3, 6]] = np.nan
-    fct_nan[2, [5]] = np.nan
-    nan_positions = np.isnan(fct_nan).any(axis=1)
+    obs0 = np.random.randn(N)
+    fct0 = np.random.randn(N, M)
+    fct_nan0 = fct0.copy()
+    fct_nan0[0, [0, 3, 6]] = np.nan
+    fct_nan0[2, [5]] = np.nan
+    nan_positions = np.isnan(fct_nan0).any(axis=1)
+
+    obs = to_backend(obs0)
+    fct_nan = to_backend(fct_nan0)
 
     # default nan policy (propagate)
     res = sr.vrcrps_ensemble(obs, fct_nan, **kwargs)
-    res = np.asarray(res)
-    assert np.all(np.isnan(res[nan_positions]))
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'propagate' nan policy
     res = sr.vrcrps_ensemble(obs, fct_nan, nan_policy="propagate", **kwargs)
-    res = np.asarray(res)
-    assert np.all(np.isnan(res[nan_positions]))
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'raise' nan policy
     with pytest.raises(ValueError):
@@ -115,169 +151,205 @@ def test_vrcrps_ensemble_nan_policy(backend):
 
     # 'omit' nan policy: no nans in results
     res = sr.vrcrps_ensemble(obs, fct_nan, nan_policy="omit", **kwargs)
-    res = np.asarray(res)
-    assert not np.any(np.isnan(res))
+    assert_inferred(res, backend)
+    assert not np.any(np.isnan(np.asarray(res)))
 
     # 'omit' nan policy: equivalence with clean ensemble
-    for i in range(fct.shape[0]):
-        fct_clean = fct_nan[i, ~np.isnan(fct_nan[i])]
-        res_clean = sr.vrcrps_ensemble(obs[i : i + 1], fct_clean[None, :], **kwargs)
+    for i in range(fct0.shape[0]):
+        fct_clean = fct_nan0[i, ~np.isnan(fct_nan0[i])]
+        res_clean = sr.vrcrps_ensemble(
+            to_backend(obs0[i : i + 1]), to_backend(fct_clean[None, :]), **kwargs
+        )
         res = sr.vrcrps_ensemble(
-            obs[i : i + 1], fct_nan[i : i + 1], nan_policy="omit", **kwargs
+            to_backend(obs0[i : i + 1]),
+            to_backend(fct_nan0[i : i + 1]),
+            nan_policy="omit",
+            **kwargs,
         )
-        assert np.allclose(res, res_clean)
+        assert np.allclose(np.asarray(res), np.asarray(res_clean))
 
 
-def test_owcrps_ensemble_nan_weights(backend):
+def test_owcrps_ensemble_nan_weights(backend, to_backend, backend_kwargs):
     """Test owcrps_ensemble when the ensemble weights (ens_w) contain NaN."""
-    kwargs = {"backend": backend, "w_func": lambda x: x * 0.0 + 1.0}
+    kwargs = {**backend_kwargs, "w_func": lambda x: x * 0.0 + 1.0}
+
+    obs0 = np.random.randn(N)
+    fct0 = np.random.randn(N, M)
+    ens_w0 = np.random.rand(N, M)
+    ens_w0[0, [0, 3, 6]] = np.nan
+    ens_w0[2, [5]] = np.nan
+    nan_positions = np.isnan(ens_w0).any(axis=1)
 
-    obs = np.random.randn(N)
-    fct = np.random.randn(N, M)
-    ens_w = np.random.rand(N, M)
-    ens_w[0, [0, 3, 6]] = np.nan
-    ens_w[2, [5]] = np.nan
-    nan_positions = np.isnan(ens_w).any(axis=1)
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
+    ens_w = to_backend(ens_w0)
 
     # 'propagate': a NaN weight yields NaN output
-    res = np.asarray(
-        sr.owcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="propagate", **kwargs)
-    )
-    assert np.all(np.isnan(res[nan_positions]))
+    res = sr.owcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="propagate", **kwargs)
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'raise': error if a weight is NaN
     with pytest.raises(ValueError):
         sr.owcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="raise", **kwargs)
 
     # 'omit': NaN-weighted members get zero weight (equivalent to dropping them)
-    res = np.asarray(
-        sr.owcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="omit", **kwargs)
-    )
-    assert not np.any(np.isnan(res))
-    for i in range(fct.shape[0]):
-        valid = ~np.isnan(ens_w[i])
+    res = sr.owcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="omit", **kwargs)
+    assert_inferred(res, backend)
+    res_np = np.asarray(res)
+    assert not np.any(np.isnan(res_np))
+    for i in range(fct0.shape[0]):
+        valid = ~np.isnan(ens_w0[i])
         res_clean = sr.owcrps_ensemble(
-            obs[i : i + 1],
-            fct[i : i + 1, valid],
-            ens_w=ens_w[i : i + 1, valid],
+            to_backend(obs0[i : i + 1]),
+            to_backend(fct0[i : i + 1, valid]),
+            ens_w=to_backend(ens_w0[i : i + 1, valid]),
             nan_policy="omit",
             **kwargs,
         )
-        assert np.allclose(res[i], res_clean)
+        assert np.allclose(res_np[i], np.asarray(res_clean))
 
 
-def test_vrcrps_ensemble_nan_weights(backend):
+def test_vrcrps_ensemble_nan_weights(backend, to_backend, backend_kwargs):
     """Test vrcrps_ensemble when the ensemble weights (ens_w) contain NaN."""
-    kwargs = {"backend": backend, "w_func": lambda x: x * 0.0 + 1.0}
+    kwargs = {**backend_kwargs, "w_func": lambda x: x * 0.0 + 1.0}
+
+    obs0 = np.random.randn(N)
+    fct0 = np.random.randn(N, M)
+    ens_w0 = np.random.rand(N, M)
+    ens_w0[0, [0, 3, 6]] = np.nan
+    ens_w0[2, [5]] = np.nan
+    nan_positions = np.isnan(ens_w0).any(axis=1)
 
-    obs = np.random.randn(N)
-    fct = np.random.randn(N, M)
-    ens_w = np.random.rand(N, M)
-    ens_w[0, [0, 3, 6]] = np.nan
-    ens_w[2, [5]] = np.nan
-    nan_positions = np.isnan(ens_w).any(axis=1)
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
+    ens_w = to_backend(ens_w0)
 
     # 'propagate': a NaN weight yields NaN output
-    res = np.asarray(
-        sr.vrcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="propagate", **kwargs)
-    )
-    assert np.all(np.isnan(res[nan_positions]))
+    res = sr.vrcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="propagate", **kwargs)
+    assert_inferred(res, backend)
+    assert np.all(np.isnan(np.asarray(res)[nan_positions]))
 
     # 'raise': error if a weight is NaN
     with pytest.raises(ValueError):
         sr.vrcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="raise", **kwargs)
 
     # 'omit': NaN-weighted members get zero weight (equivalent to dropping them)
-    res = np.asarray(
-        sr.vrcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="omit", **kwargs)
-    )
-    assert not np.any(np.isnan(res))
-    for i in range(fct.shape[0]):
-        valid = ~np.isnan(ens_w[i])
+    res = sr.vrcrps_ensemble(obs, fct, ens_w=ens_w, nan_policy="omit", **kwargs)
+    assert_inferred(res, backend)
+    res_np = np.asarray(res)
+    assert not np.any(np.isnan(res_np))
+    for i in range(fct0.shape[0]):
+        valid = ~np.isnan(ens_w0[i])
         res_clean = sr.vrcrps_ensemble(
-            obs[i : i + 1],
-            fct[i : i + 1, valid],
-            ens_w=ens_w[i : i + 1, valid],
+            to_backend(obs0[i : i + 1]),
+            to_backend(fct0[i : i + 1, valid]),
+            ens_w=to_backend(ens_w0[i : i + 1, valid]),
             nan_policy="omit",
             **kwargs,
         )
-        assert np.allclose(res[i], res_clean)
+        assert np.allclose(res_np[i], np.asarray(res_clean))
 
 
 @pytest.mark.parametrize("estimator", ESTIMATORS)
-def test_twcrps_vs_crps(estimator, backend):
-    obs = np.random.randn(N)
-    mu = obs + np.random.randn(N) * 0.1
+def test_twcrps_vs_crps(estimator, backend, to_backend, backend_kwargs):
+    obs0 = np.random.randn(N)
+    mu = obs0 + np.random.randn(N) * 0.1
     sigma = abs(np.random.randn(N)) * 0.3
-    fct = np.random.randn(N, M) * sigma[..., None] + mu[..., None]
+    fct0 = np.random.randn(N, M) * sigma[..., None] + mu[..., None]
+
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
 
-    res = sr.crps_ensemble(obs, fct, estimator=estimator, backend=backend)
+    res = sr.crps_ensemble(obs, fct, estimator=estimator, **backend_kwargs)
+    assert_inferred(res, backend)
 
     # no argument given
-    resw = sr.twcrps_ensemble(obs, fct, estimator=estimator, backend=backend)
-    np.testing.assert_allclose(res, resw, rtol=1e-10)
+    resw = sr.twcrps_ensemble(obs, fct, estimator=estimator, **backend_kwargs)
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-10)
 
     # a and b
     resw = sr.twcrps_ensemble(
-        obs, fct, a=float("-inf"), b=float("inf"), estimator=estimator, backend=backend
+        obs, fct, a=float("-inf"), b=float("inf"), estimator=estimator, **backend_kwargs
     )
-    np.testing.assert_allclose(res, resw, rtol=1e-10)
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-10)
 
     # v_func as identity function
     resw = sr.twcrps_ensemble(
-        obs, fct, v_func=lambda x: x, estimator=estimator, backend=backend
+        obs, fct, v_func=lambda x: x, estimator=estimator, **backend_kwargs
     )
-    np.testing.assert_allclose(res, resw, rtol=1e-10)
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-10)
 
 
-def test_owcrps_vs_crps(backend):
-    obs = np.random.randn(N)
-    mu = obs + np.random.randn(N) * 0.1
+def test_owcrps_vs_crps(backend, to_backend, backend_kwargs):
+    obs0 = np.random.randn(N)
+    mu = obs0 + np.random.randn(N) * 0.1
     sigma = abs(np.random.randn(N)) * 0.5
-    fct = np.random.randn(N, M) * sigma[..., None] + mu[..., None]
+    fct0 = np.random.randn(N, M) * sigma[..., None] + mu[..., None]
 
-    res = sr.crps_ensemble(obs, fct, estimator="qd", backend=backend)
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
+
+    res = sr.crps_ensemble(obs, fct, estimator="qd", **backend_kwargs)
+    assert_inferred(res, backend)
 
     # no argument given
-    resw = sr.owcrps_ensemble(obs, fct, backend=backend)
-    np.testing.assert_allclose(res, resw, rtol=1e-4)
+    resw = sr.owcrps_ensemble(obs, fct, **backend_kwargs)
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-4)
 
     # a and b
     resw = sr.owcrps_ensemble(
-        obs, fct, a=float("-inf"), b=float("inf"), backend=backend
+        obs, fct, a=float("-inf"), b=float("inf"), **backend_kwargs
     )
-    np.testing.assert_allclose(res, resw, rtol=1e-4)
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-4)
 
     # w_func as identity function
-    resw = sr.owcrps_ensemble(obs, fct, w_func=lambda x: x * 0.0 + 1.0, backend=backend)
-    np.testing.assert_allclose(res, resw, rtol=1e-4)
+    resw = sr.owcrps_ensemble(
+        obs, fct, w_func=lambda x: x * 0.0 + 1.0, **backend_kwargs
+    )
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-4)
 
 
-def test_vrcrps_vs_crps(backend):
-    obs = np.random.randn(N)
-    mu = obs + np.random.randn(N) * 0.1
+def test_vrcrps_vs_crps(backend, to_backend, backend_kwargs):
+    obs0 = np.random.randn(N)
+    mu = obs0 + np.random.randn(N) * 0.1
     sigma = abs(np.random.randn(N)) * 0.3
-    fct = np.random.randn(N, M) * sigma[..., None] + mu[..., None]
+    fct0 = np.random.randn(N, M) * sigma[..., None] + mu[..., None]
 
-    res = sr.crps_ensemble(obs, fct, backend=backend, estimator="nrg")
+    obs = to_backend(obs0)
+    fct = to_backend(fct0)
+
+    res = sr.crps_ensemble(obs, fct, estimator="nrg", **backend_kwargs)
+    assert_inferred(res, backend)
 
     # no argument given
-    resw = sr.vrcrps_ensemble(obs, fct, backend=backend)
-    np.testing.assert_allclose(res, resw, rtol=1e-5)
+    resw = sr.vrcrps_ensemble(obs, fct, **backend_kwargs)
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-5)
 
     # a and b
     resw = sr.vrcrps_ensemble(
-        obs, fct, a=float("-inf"), b=float("inf"), backend=backend
+        obs, fct, a=float("-inf"), b=float("inf"), **backend_kwargs
     )
-    np.testing.assert_allclose(res, resw, rtol=1e-5)
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-5)
 
     # w_func as identity function
-    resw = sr.vrcrps_ensemble(obs, fct, w_func=lambda x: x * 0.0 + 1.0, backend=backend)
-    np.testing.assert_allclose(res, resw, rtol=1e-5)
+    resw = sr.vrcrps_ensemble(
+        obs, fct, w_func=lambda x: x * 0.0 + 1.0, **backend_kwargs
+    )
+    assert_inferred(resw, backend)
+    assert_close(res, resw, backend, rtol=1e-5)
 
 
-def test_owcrps_score_correctness(backend):
-    fct = np.array(
+def test_owcrps_score_correctness(backend, to_backend, backend_kwargs):
+    fct0 = np.array(
         [
             [-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165],
             [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457],
@@ -292,7 +364,7 @@ def test_owcrps_score_correctness(backend):
         ]
     )
 
-    obs = np.array(
+    obs0 = np.array(
         [
             0.19640722,
             -0.11300369,
@@ -307,37 +379,44 @@ def test_owcrps_score_correctness(backend):
         ]
     )
 
+    fct = to_backend(fct0)
+    obs = to_backend(obs0)
+    rtol = 1e-4 if backend == "jax" else 1e-6
+
     def w_func(x):
         return (x > -1) * 1.0
 
-    res = np.mean(
-        np.float64(sr.owcrps_ensemble(obs, fct, w_func=w_func, backend=backend))
-    )
-    np.testing.assert_allclose(res, 0.09320807, rtol=1e-6)
+    res = sr.owcrps_ensemble(obs, fct, w_func=w_func, **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.09320807, rtol=rtol)
 
-    res = np.mean(np.float64(sr.owcrps_ensemble(obs, fct, a=-1.0, backend=backend)))
-    np.testing.assert_allclose(res, 0.09320807, rtol=1e-6)
+    res = sr.owcrps_ensemble(obs, fct, a=-1.0, **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.09320807, rtol=rtol)
 
     def w_func(x):
         return (x < 1.85) * 1.0
 
-    res = np.mean(
-        np.float64(sr.owcrps_ensemble(obs, fct, w_func=w_func, backend=backend))
-    )
-    np.testing.assert_allclose(res, 0.09933139, rtol=1e-6)
+    res = sr.owcrps_ensemble(obs, fct, w_func=w_func, **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.09933139, rtol=rtol)
 
-    res = np.mean(np.float64(sr.owcrps_ensemble(obs, fct, b=1.85, backend=backend)))
-    np.testing.assert_allclose(res, 0.09933139, rtol=1e-6)
+    res = sr.owcrps_ensemble(obs, fct, b=1.85, **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.09933139, rtol=rtol)
 
     # test equivalence with and without weights
-    w = np.ones(fct.shape)
-    res_nrg_w = sr.owcrps_ensemble(obs, fct, ens_w=w, b=1.85, backend=backend)
-    res_nrg_now = sr.owcrps_ensemble(obs, fct, b=1.85, backend=backend)
-    assert np.allclose(res_nrg_w, res_nrg_now)
+    w = to_backend(np.ones(fct0.shape))
+    res_nrg_w = sr.owcrps_ensemble(obs, fct, ens_w=w, b=1.85, **backend_kwargs)
+    res_nrg_now = sr.owcrps_ensemble(obs, fct, b=1.85, **backend_kwargs)
+    assert_inferred(res_nrg_w, backend)
+    assert_inferred(res_nrg_now, backend)
+    atol = 1e-4 if backend == "jax" else 1e-8
+    assert np.allclose(np.asarray(res_nrg_w), np.asarray(res_nrg_now), atol=atol)
 
 
-def test_twcrps_score_correctness(backend):
-    fct = np.array(
+def test_twcrps_score_correctness(backend, to_backend, backend_kwargs):
+    fct0 = np.array(
         [
             [-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165],
             [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457],
@@ -352,7 +431,7 @@ def test_twcrps_score_correctness(backend):
         ]
     )
 
-    obs = np.array(
+    obs0 = np.array(
         [
             0.19640722,
             -0.11300369,
@@ -367,47 +446,41 @@ def test_twcrps_score_correctness(backend):
         ]
     )
 
+    fct = to_backend(fct0)
+    obs = to_backend(obs0)
+    rtol = 1e-4 if backend == "jax" else 1e-6
+
     def v_func(x):
-        return np.maximum(x, -1.0)
+        from scoringrules.backend import get_namespace
 
-    res = np.mean(
-        np.float64(
-            sr.twcrps_ensemble(
-                obs, fct, v_func=v_func, estimator="nrg", backend=backend
-            )
-        )
-    )
-    np.testing.assert_allclose(res, 0.09489662, rtol=1e-6)
+        xp = get_namespace(x)
+        return xp.maximum(x, xp.asarray(-1.0))
 
-    res = np.mean(
-        np.float64(
-            sr.twcrps_ensemble(obs, fct, a=-1.0, estimator="nrg", backend=backend)
-        )
-    )
-    np.testing.assert_allclose(res, 0.09489662, rtol=1e-6)
+    res = sr.twcrps_ensemble(obs, fct, v_func=v_func, estimator="nrg", **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.09489662, rtol=rtol)
+
+    res = sr.twcrps_ensemble(obs, fct, a=-1.0, estimator="nrg", **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.09489662, rtol=rtol)
 
     def v_func(x):
-        return np.minimum(x, 1.85)
+        from scoringrules.backend import get_namespace
 
-    res = np.mean(
-        np.float64(
-            sr.twcrps_ensemble(
-                obs, fct, v_func=v_func, estimator="nrg", backend=backend
-            )
-        )
-    )
-    np.testing.assert_allclose(res, 0.0994809, rtol=1e-6)
+        xp = get_namespace(x)
+        return xp.minimum(x, xp.asarray(1.85))
 
-    res = np.mean(
-        np.float64(
-            sr.twcrps_ensemble(obs, fct, b=1.85, estimator="nrg", backend=backend)
-        )
-    )
-    np.testing.assert_allclose(res, 0.0994809, rtol=1e-6)
+    res = sr.twcrps_ensemble(obs, fct, v_func=v_func, estimator="nrg", **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.0994809, rtol=rtol)
+
+    res = sr.twcrps_ensemble(obs, fct, b=1.85, estimator="nrg", **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.0994809, rtol=rtol)
 
 
-def test_vrcrps_score_correctness(backend):
-    fct = np.array(
+def test_vrcrps_score_correctness(backend, to_backend, backend_kwargs):
+    fct0 = np.array(
         [
             [-0.03574194, 0.06873582, 0.03098684, 0.07316138, 0.08498165],
             [-0.11957874, 0.26472238, -0.06324622, 0.43026451, -0.25640457],
@@ -422,7 +495,7 @@ def test_vrcrps_score_correctness(backend):
         ]
     )
 
-    obs = np.array(
+    obs0 = np.array(
         [
             0.19640722,
             -0.11300369,
@@ -437,24 +510,28 @@ def test_vrcrps_score_correctness(backend):
         ]
     )
 
+    fct = to_backend(fct0)
+    obs = to_backend(obs0)
+    rtol = 1e-4 if backend == "jax" else 1e-6
+
     def w_func(x):
         return (x > -1) * 1.0
 
-    res = np.mean(
-        np.float64(sr.vrcrps_ensemble(obs, fct, w_func=w_func, backend=backend))
-    )
-    np.testing.assert_allclose(res, 0.1003983, rtol=1e-6)
+    res = sr.vrcrps_ensemble(obs, fct, w_func=w_func, **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.1003983, rtol=rtol)
 
-    res = np.mean(np.float64(sr.vrcrps_ensemble(obs, fct, a=-1.0, backend=backend)))
-    np.testing.assert_allclose(res, 0.1003983, rtol=1e-6)
+    res = sr.vrcrps_ensemble(obs, fct, a=-1.0, **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.1003983, rtol=rtol)
 
     def w_func(x):
         return (x < 1.85) * 1.0
 
-    res = np.mean(
-        np.float64(sr.vrcrps_ensemble(obs, fct, w_func=w_func, backend=backend))
-    )
-    np.testing.assert_allclose(res, 0.1950857, rtol=1e-6)
+    res = sr.vrcrps_ensemble(obs, fct, w_func=w_func, **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.1950857, rtol=rtol)
 
-    res = np.mean(np.float64(sr.vrcrps_ensemble(obs, fct, b=1.85, backend=backend)))
-    np.testing.assert_allclose(res, 0.1950857, rtol=1e-6)
+    res = sr.vrcrps_ensemble(obs, fct, b=1.85, **backend_kwargs)
+    assert_inferred(res, backend)
+    np.testing.assert_allclose(np.mean(np.asarray(res)), 0.1950857, rtol=rtol)

From 632ffcc59abc686c5e0a9b42e5a8a64bb8e29101 Mon Sep 17 00:00:00 2001
From: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
Date: Sun, 7 Jun 2026 03:51:57 +0200
Subject: [PATCH 26/26] Drop unused array-api-extra dep, fix stale backend
 docstrings, silence comb divide-by-zero, remove dead test

---
 pyproject.toml                     |  1 -
 scoringrules/_crps.py              | 57 ++++++++++++++++++++----------
 scoringrules/backend/extensions.py | 15 ++++----
 tests/test_namespace.py            |  7 ----
 uv.lock                            | 14 --------
 5 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8bfbfd9..85e1680 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,6 @@ dependencies = [
     "numpy>=2.0.0",
     "scipy>=1.14.0",
     "array-api-compat>=1.9",
-    "array-api-extra>=0.5",
 ]
 authors = [
     {name = "Francesco Zanetta", email = "zanetta.francesco@gmail.com"},
diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py
index eee9b59..65c58d0 100644
--- a/scoringrules/_crps.py
+++ b/scoringrules/_crps.py
@@ -82,7 +82,8 @@ def crps_ensemble(
         ``'omit'`` the ``int``, ``akr`` and ``akr_circperm`` estimators are not
         supported and raise ``NotImplementedError``.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -224,7 +225,8 @@ def twcrps_ensemble(
         Defines how to handle NaN values in the ensemble members. Forwarded to
         :func:`crps_ensemble`. See its documentation for details.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -333,7 +335,8 @@ def owcrps_ensemble(
         weight computation so NaN members do not contribute to the mean weight.
         See :func:`crps_ensemble` for details.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -458,7 +461,8 @@ def vrcrps_ensemble(
         weight computation so NaN members do not contribute to the mean weight.
         See :func:`crps_ensemble` for details.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -566,7 +570,8 @@ def crps_quantile(
     m_axis : int
         The axis corresponding to the ensemble. Default is the last axis.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -638,7 +643,8 @@ def crps_beta(
     upper : array_like
         Upper bound of the forecast beta distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -690,7 +696,8 @@ def crps_binomial(
     prob : array_like
         Probability parameter of the forecast binomial distribution as a float or array of floats.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -737,7 +744,8 @@ def crps_exponential(
     rate : array_like
         Rate parameter of the forecast exponential distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -804,7 +812,8 @@ def crps_exponentialM(
     scale : array_like
         Scale parameter of the forecast exponential distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -861,7 +870,8 @@ def crps_2pexponential(
     location : array_like
         Location parameter of the forecast two-piece exponential distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -920,7 +930,8 @@ def crps_gamma(
         Scale parameter of the forecast scale distribution, where ``scale = 1 / rate``.
         Either ``rate`` or ``scale`` must be provided.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -997,7 +1008,8 @@ def crps_csg0(
     shift : array_like
         Shift parameter of the forecast CSG distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -1065,7 +1077,8 @@ def crps_gev(
     scale : array_like, optional
         Scale parameter of the forecast GEV distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -1171,7 +1184,8 @@ def crps_gpd(
     mass : array_like
         Mass parameter at the lower boundary of the forecast GPD distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -1763,7 +1777,8 @@ def crps_hypergeometric(
     k : array_like
         Number of draws, without replacement. Must be in 0, 1, ..., m + n.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -1815,7 +1830,8 @@ def crps_laplace(
     scale : array_like
         Scale parameter of the forecast laplace distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -1926,7 +1942,8 @@ def crps_loglaplace(
     scalelog : array_like
         Scale parameter of the forecast log-laplace distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
@@ -1987,7 +2004,8 @@ def crps_loglogistic(
     sigmalog : array_like
         Scale parameter of the log-logistic distribution.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
 
     Returns
@@ -2097,7 +2115,8 @@ def crps_mixnorm(
     m_axis : int
         The axis corresponding to the mixture components. Default is the last axis.
     backend : str, optional
-        The name of the backend used for computations. Defaults to ``numba`` if available, else ``numpy``.
+        Computational backend. ``None`` (default) infers the array framework from
+        the inputs; pass ``"numba"`` for the numba fast path.
 
     Returns
     -------
diff --git a/scoringrules/backend/extensions.py b/scoringrules/backend/extensions.py
index e8b75c7..1b3aca0 100644
--- a/scoringrules/backend/extensions.py
+++ b/scoringrules/backend/extensions.py
@@ -163,14 +163,17 @@ def comb(xp, n, k):
     # (matching the old floor-division form).
     n = xp.asarray(n)
     k = xp.asarray(k)
-    ratio = factorial(xp, n) / (factorial(xp, k) * factorial(xp, n - k))
-    result = xp.round(ratio)
     # ``comb`` is 0 outside ``0 <= k <= n`` (matches scipy.special.comb, the
-    # behaviour the old backends relied on). Without this guard, factorial of a
-    # negative argument yields inf/nan and propagates through masked terms (e.g.
-    # the hypergeometric PMF, where ``0 * inf`` becomes nan).
+    # behaviour the old backends relied on). Clamp ``k`` and ``n - k`` to
+    # non-negative values before taking factorials so invalid entries neither
+    # divide by zero (factorial of a negative integer is 0) nor produce inf/nan;
+    # the result for those entries is masked to 0 below.
     valid = (k >= 0) & (k <= n)
-    return xp.where(valid, result, xp.asarray(0.0))
+    zero = xp.asarray(0.0)
+    safe_k = xp.where(valid, k, zero)
+    safe_nk = xp.where(valid, n - k, zero)
+    ratio = factorial(xp, n) / (factorial(xp, safe_k) * factorial(xp, safe_nk))
+    return xp.where(valid, xp.round(ratio), zero)
 
 
 # --- non-standard helpers ---
diff --git a/tests/test_namespace.py b/tests/test_namespace.py
index 08b519c..870a24d 100644
--- a/tests/test_namespace.py
+++ b/tests/test_namespace.py
@@ -61,13 +61,6 @@ def test_missing_xp_raises_attributeerror_not_recursion():
         obj.sum  # noqa: B018
 
 
-@pytest.mark.skip(
-    reason="special-function methods exercised once extensions.py lands (later task)"
-)
-def test_special_function_method_present():
-    pass
-
-
 def test_mixed_namespaces_raise():
     torch = pytest.importorskip("torch")
     a = np.asarray([1.0, 2.0])
diff --git a/uv.lock b/uv.lock
index 1880bc5..b960995 100644
--- a/uv.lock
+++ b/uv.lock
@@ -15,18 +15,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a0/d3/54cd560804a8c2b898824778e86c13c2a14600bc83532a9c4f69f2f469c3/array_api_compat-1.14.0-py3-none-any.whl", hash = "sha256:ed5af1f9b6595a199c942505f281ec994892556b6efc24679a0501e87a7d6279", size = 60124, upload-time = "2026-02-26T12:02:41.127Z" },
 ]
 
-[[package]]
-name = "array-api-extra"
-version = "0.10.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "array-api-compat" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b2/4d/ff0cb01001385ad31d4798906e96c5e43e17770037a93dc5f33cd44ecd9d/array_api_extra-0.10.3.tar.gz", hash = "sha256:6cabfefe10db45f5eb4c642fc2465646ad0ed017d3774fc16d763486b31ee5ae", size = 94321, upload-time = "2026-06-03T14:34:31.057Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8f/16/b950ac4018ee149924ca1155ea749c5d175800b78234f0941fdfcb79233b/array_api_extra-0.10.3-py3-none-any.whl", hash = "sha256:4968892e6641b8d2b6f5e4fdcdbd979951f601411436948f4342831a626ba03c", size = 91055, upload-time = "2026-06-03T14:34:29.766Z" },
-]
-
 [[package]]
 name = "cfgv"
 version = "3.4.0"
@@ -1081,7 +1069,6 @@ version = "0.11.0"
 source = { editable = "." }
 dependencies = [
     { name = "array-api-compat" },
-    { name = "array-api-extra" },
     { name = "numpy" },
     { name = "scipy" },
 ]
@@ -1108,7 +1095,6 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "array-api-compat", specifier = ">=1.9" },
-    { name = "array-api-extra", specifier = ">=0.5" },
     { name = "jax", marker = "extra == 'jax'", specifier = ">=0.4.31" },
     { name = "numba", marker = "extra == 'numba'", specifier = ">=0.60.0" },
     { name = "numpy", specifier = ">=2.0.0" },