From 3d66fdfb552bae8ed75c83202793e6e389111087 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 25 Feb 2026 22:35:14 -0700 Subject: [PATCH 1/2] Tweak strategies so we don't reject so many examples. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` ┌─────────────────────────────────────┬────────┬────────┐ │ Metric │ Before │ After │ ├─────────────────────────────────────┼────────┼────────┤ │ array.dtype.kind='f' │ 1.10% │ 8.49% │ ├─────────────────────────────────────┼────────┼────────┤ │ array.dtype.kind='i' │ 3.02% │ 5.26% │ ├─────────────────────────────────────┼────────┼────────┤ │ array.dtype.kind='u' │ 2.06% │ 10.36% │ ├─────────────────────────────────────┼────────┼────────┤ │ Invalid (line 101, not_overflowing) │ 27.57% │ 21.73% │ ├─────────────────────────────────────┼────────┼────────┤ │ Total invalid examples │ 429 │ 289 │ ├─────────────────────────────────────┼────────┼────────┤ │ all-NaN groups │ 1.10% │ 7.64% │ └─────────────────────────────────────┴────────┴────────┘ ``` --- tests/strategies.py | 78 +++++++++++++++++++++++++++++++++++++--- tests/test_properties.py | 15 +++++++- 2 files changed, 87 insertions(+), 6 deletions(-) diff --git a/tests/strategies.py b/tests/strategies.py index ea9c2ed73..ccee424d8 100644 --- a/tests/strategies.py +++ b/tests/strategies.py @@ -9,6 +9,7 @@ import hypothesis.strategies as st import numpy as np import sparse +from hypothesis import assume from . import ALL_FUNCS, SCIPY_STATS_FUNCS @@ -81,16 +82,16 @@ def insert_nans(draw: st.DrawFn, array: np.ndarray) -> np.ndarray: numeric_dtypes = ( - npst.integer_dtypes(endianness="=") + npst.floating_dtypes(endianness="=", sizes=(32, 64)) + | npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=") - | npst.floating_dtypes(endianness="=", sizes=(32, 64)) # TODO: add complex here not in supported_dtypes ) numeric_like_dtypes = ( - npst.boolean_dtypes() - | numeric_dtypes + numeric_dtypes | npst.datetime64_dtypes(endianness="=") | npst.timedelta64_dtypes(endianness="=") + | npst.boolean_dtypes() ) supported_dtypes = ( numeric_like_dtypes @@ -120,7 +121,74 @@ def numpy_arrays(draw: st.DrawFn, *, dtype) -> np.ndarray: numeric_arrays = numpy_arrays(dtype=numeric_dtypes) -numeric_like_arrays = numpy_arrays(dtype=numeric_like_dtypes) + + +@st.composite +def non_overflowing_float_arrays(draw: st.DrawFn) -> np.ndarray[Any, Any]: + """Generate float arrays that satisfy not_overflowing_array by construction. + + Bounds element magnitudes to 2^(nmant+1) / array.size so that sums + cannot overflow the mantissa, avoiding rejection by the assume() filter. + """ + dtype = draw(npst.floating_dtypes(endianness="=", sizes=(32, 64))) + shape = draw(npst.array_shapes()) + size = int(np.prod(shape)) + info = np.finfo(dtype) + limit = float(2 ** (info.nmant + 1)) + # Cast to target dtype so the bound is exactly representable (required by hypothesis) + max_val = float(dtype.type(limit / max(size, 1))) + array = draw( + npst.arrays( + dtype=st.just(dtype), + shape=st.just(shape), + elements={"min_value": -max_val, "max_value": max_val, "allow_subnormal": False}, + ) + ) + array = insert_nans(draw, array) + return array + + +@st.composite +def non_overflowing_int_arrays(draw: st.DrawFn) -> np.ndarray[Any, Any]: + """Generate integer arrays that satisfy not_overflowing_array by construction. + + Bounds elements so that summing the entire array cannot overflow the dtype. + """ + dtype = draw(npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=")) + shape = draw(npst.array_shapes()) + size = max(int(np.prod(shape)), 1) + imax = int(np.iinfo(dtype).max) + imin = int(np.iinfo(dtype).min) + + # Strict <: largest int strictly less than imax / size + max_val = imax // size + if imax % size == 0: + max_val -= 1 + + # Strict >: smallest int strictly greater than imin / size + if imin < 0: + min_val = -((-imin) // size) + if (-imin) % size == 0: + min_val += 1 + else: + # unsigned: imin == 0, need array > 0 + min_val = 1 + + assume(min_val <= max_val) + + array = draw( + npst.arrays( + dtype=st.just(dtype), + shape=st.just(shape), + elements={"min_value": min_val, "max_value": max_val}, + ) + ) + return array + + +numeric_like_arrays = ( + non_overflowing_float_arrays() | non_overflowing_int_arrays() | numpy_arrays(dtype=numeric_like_dtypes) +) all_arrays = numeric_like_arrays | cftime_arrays() diff --git a/tests/test_properties.py b/tests/test_properties.py index 74440b0d8..3f81d646b 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -14,7 +14,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import assume, given, note, settings +from hypothesis import assume, event, given, note, settings import flox from flox.core import groupby_reduce @@ -127,6 +127,19 @@ def test_groupby_reduce(data, array, func: str) -> None: shape=st.just((array.shape[-1],)), ) ) + event(f"{array.dtype.kind=!r}") + event(f"{by.dtype.kind=!r}") + + # Add some all-NaN groups + if data.draw(st.sampled_from([True, True, True, True, False])) and array.dtype.kind == "f": + groups = pd.unique(by.ravel()) + toset = data.draw(st.lists(st.sampled_from(groups), min_size=1, max_size=len(groups))) + note("Setting all-NaN groups") + event("all-NaN groups") + mask = np.zeros(by.shape, dtype=bool) + for g in toset: + mask |= by == g + array[..., mask] = np.nan if func in BLOCKWISE_FUNCS and isinstance(array, dask.array.Array): array = array.rechunk({axis: -1}) assert len(np.unique(by)) == 1 From 0eed3bd0a5254dfd80587c010f10cdc82e11fb77 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 26 Feb 2026 10:30:16 -0700 Subject: [PATCH 2/2] Relax floating-point tolerances in property tests - Add float32-aware tolerance (rtol=1e-5) in test_groupby_reduce_numpy_vs_other since chunked vs unchunked var/std on float32 with large values legitimately differs beyond the default rtol=1e-7. - Relax test_scans_against_numpy tolerance from rtol=1e-13 to rtol=1e-12 to handle cumsum accumulation order differences with large float64 values. Co-Authored-By: Claude Opus 4.6 --- tests/test_properties.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_properties.py b/tests/test_properties.py index 3f81d646b..ad2a78438 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -229,7 +229,8 @@ def test_groupby_reduce_numpy_vs_other(data, array, func: str) -> None: result_other, *_ = groupby_reduce(array, by, **kwargs) result_numpy, *_ = groupby_reduce(numpy_array, by, **kwargs) assert isinstance(result_other, type(array)) - assert_equal(result_other, result_numpy) + tolerance = {"rtol": 1e-5} if array.dtype == np.float32 else None + assert_equal(result_other, result_numpy, tolerance) @given( @@ -270,7 +271,7 @@ def test_scans_against_numpy(data, array: dask.array.Array, func: str) -> None: expected = expected.astype(dtype) note((numpy_array, group_idx, array.chunks)) - tolerance = {"rtol": 1e-13, "atol": 1e-15} + tolerance = {"rtol": 1e-12, "atol": 1e-15} actual = groupby_scan(numpy_array, by, func=func, axis=-1, dtype=dtype) assert_equal(actual, expected, tolerance)