diff --git a/tests/strategies.py b/tests/strategies.py index ea9c2ed7..ccee424d 100644 --- a/tests/strategies.py +++ b/tests/strategies.py @@ -9,6 +9,7 @@ import hypothesis.strategies as st import numpy as np import sparse +from hypothesis import assume from . import ALL_FUNCS, SCIPY_STATS_FUNCS @@ -81,16 +82,16 @@ def insert_nans(draw: st.DrawFn, array: np.ndarray) -> np.ndarray: numeric_dtypes = ( - npst.integer_dtypes(endianness="=") + npst.floating_dtypes(endianness="=", sizes=(32, 64)) + | npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=") - | npst.floating_dtypes(endianness="=", sizes=(32, 64)) # TODO: add complex here not in supported_dtypes ) numeric_like_dtypes = ( - npst.boolean_dtypes() - | numeric_dtypes + numeric_dtypes | npst.datetime64_dtypes(endianness="=") | npst.timedelta64_dtypes(endianness="=") + | npst.boolean_dtypes() ) supported_dtypes = ( numeric_like_dtypes @@ -120,7 +121,74 @@ def numpy_arrays(draw: st.DrawFn, *, dtype) -> np.ndarray: numeric_arrays = numpy_arrays(dtype=numeric_dtypes) -numeric_like_arrays = numpy_arrays(dtype=numeric_like_dtypes) + + +@st.composite +def non_overflowing_float_arrays(draw: st.DrawFn) -> np.ndarray[Any, Any]: + """Generate float arrays that satisfy not_overflowing_array by construction. + + Bounds element magnitudes to 2^(nmant+1) / array.size so that sums + cannot overflow the mantissa, avoiding rejection by the assume() filter. + """ + dtype = draw(npst.floating_dtypes(endianness="=", sizes=(32, 64))) + shape = draw(npst.array_shapes()) + size = int(np.prod(shape)) + info = np.finfo(dtype) + limit = float(2 ** (info.nmant + 1)) + # Cast to target dtype so the bound is exactly representable (required by hypothesis) + max_val = float(dtype.type(limit / max(size, 1))) + array = draw( + npst.arrays( + dtype=st.just(dtype), + shape=st.just(shape), + elements={"min_value": -max_val, "max_value": max_val, "allow_subnormal": False}, + ) + ) + array = insert_nans(draw, array) + return array + + +@st.composite +def non_overflowing_int_arrays(draw: st.DrawFn) -> np.ndarray[Any, Any]: + """Generate integer arrays that satisfy not_overflowing_array by construction. + + Bounds elements so that summing the entire array cannot overflow the dtype. + """ + dtype = draw(npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=")) + shape = draw(npst.array_shapes()) + size = max(int(np.prod(shape)), 1) + imax = int(np.iinfo(dtype).max) + imin = int(np.iinfo(dtype).min) + + # Strict <: largest int strictly less than imax / size + max_val = imax // size + if imax % size == 0: + max_val -= 1 + + # Strict >: smallest int strictly greater than imin / size + if imin < 0: + min_val = -((-imin) // size) + if (-imin) % size == 0: + min_val += 1 + else: + # unsigned: imin == 0, need array > 0 + min_val = 1 + + assume(min_val <= max_val) + + array = draw( + npst.arrays( + dtype=st.just(dtype), + shape=st.just(shape), + elements={"min_value": min_val, "max_value": max_val}, + ) + ) + return array + + +numeric_like_arrays = ( + non_overflowing_float_arrays() | non_overflowing_int_arrays() | numpy_arrays(dtype=numeric_like_dtypes) +) all_arrays = numeric_like_arrays | cftime_arrays() diff --git a/tests/test_properties.py b/tests/test_properties.py index 74440b0d..ad2a7843 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -14,7 +14,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import assume, given, note, settings +from hypothesis import assume, event, given, note, settings import flox from flox.core import groupby_reduce @@ -127,6 +127,19 @@ def test_groupby_reduce(data, array, func: str) -> None: shape=st.just((array.shape[-1],)), ) ) + event(f"{array.dtype.kind=!r}") + event(f"{by.dtype.kind=!r}") + + # Add some all-NaN groups + if data.draw(st.sampled_from([True, True, True, True, False])) and array.dtype.kind == "f": + groups = pd.unique(by.ravel()) + toset = data.draw(st.lists(st.sampled_from(groups), min_size=1, max_size=len(groups))) + note("Setting all-NaN groups") + event("all-NaN groups") + mask = np.zeros(by.shape, dtype=bool) + for g in toset: + mask |= by == g + array[..., mask] = np.nan if func in BLOCKWISE_FUNCS and isinstance(array, dask.array.Array): array = array.rechunk({axis: -1}) assert len(np.unique(by)) == 1 @@ -216,7 +229,8 @@ def test_groupby_reduce_numpy_vs_other(data, array, func: str) -> None: result_other, *_ = groupby_reduce(array, by, **kwargs) result_numpy, *_ = groupby_reduce(numpy_array, by, **kwargs) assert isinstance(result_other, type(array)) - assert_equal(result_other, result_numpy) + tolerance = {"rtol": 1e-5} if array.dtype == np.float32 else None + assert_equal(result_other, result_numpy, tolerance) @given( @@ -257,7 +271,7 @@ def test_scans_against_numpy(data, array: dask.array.Array, func: str) -> None: expected = expected.astype(dtype) note((numpy_array, group_idx, array.chunks)) - tolerance = {"rtol": 1e-13, "atol": 1e-15} + tolerance = {"rtol": 1e-12, "atol": 1e-15} actual = groupby_scan(numpy_array, by, func=func, axis=-1, dtype=dtype) assert_equal(actual, expected, tolerance)