Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/565.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Resolved pandas FutureWarnings to support both pandas 2 and 3, including fixes for DataFrame concatenation with empty or all-NA entries and null-type mismatches in parquet round-trips.
10 changes: 9 additions & 1 deletion packages/climate-ref-core/src/climate_ref_core/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,15 @@ def apply(
# Restore the original index so downstream concatenation is consistent
supplementary_group.index = original_index[list(select)]

return pd.concat([group, supplementary_group])
if supplementary_group.empty:
return group
# Drop all-NA columns before concat as the default behaviour will change in pandas 3
return pd.concat(
[
group.dropna(axis="columns", how="all"),
supplementary_group.dropna(axis="columns", how="all"),
]
)

@classmethod
def from_defaults(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def _convert_file_to_cmip7(cmip6_path: Path, cmip7_facets: dict[str, Any]) -> Pa

# Convert the file and derive the time range from the dataset
logger.info(f"Converting to CMIP7: {cmip6_path.name}")

time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
with xr.open_dataset(cmip6_path, decode_times=time_coder) as ds:
frequency = str(cmip7_facets.get("frequency", "mon"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@ def get_cmip_source_type(
return SourceDatasetType.CMIP6


def mask_fillvalues(array: np.ndarray) -> np.ma.MaskedArray: # type: ignore[type-arg]
def mask_fillvalues(array: np.ndarray) -> np.ma.MaskedArray:
"""Convert netCDF4 fill values in an array to a mask."""
# Workaround for https://github.com/pydata/xarray/issues/2742
defaults = {np.dtype(k): v for k, v in netCDF4.default_fillvals.items()}
return np.ma.masked_equal(array, defaults[array.dtype]) # type: ignore[no-untyped-call,no-any-return]
return np.ma.masked_equal(array, defaults[array.dtype])


def fillvalues_to_nan(array: np.ndarray) -> np.ndarray: # type: ignore[type-arg]
def fillvalues_to_nan(array: np.ndarray) -> np.ndarray:
"""Convert netCDF4 fill values in an array to NaN."""
return mask_fillvalues(array).filled(np.nan) # type: ignore[no-untyped-call,no-any-return]
return mask_fillvalues(array).filled(np.nan)


class ESMValToolDiagnostic(CommandLineDiagnostic):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ def calculate_annual_mean_timeseries(input_files: list[Path]) -> xr.Dataset:

annual_mean = xr_ds.resample(time="YS").mean()

# Drop time_bnds before weighted mean to avoid dtype=object division error
# (resample+mean turns datetime bounds into object arrays)
if "time_bnds" in annual_mean:
annual_mean = annual_mean.drop_vars("time_bnds")

return annual_mean.weighted(xr_ds.areacella.fillna(0)).mean(dim=["lat", "lon"], keep_attrs=True)


Expand Down
2 changes: 1 addition & 1 deletion packages/climate-ref-ilamb/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
]
dependencies = [
"climate-ref-core",
"ilamb3>=2025.9.9",
"ilamb3>=2025.9.9,<2025.12", # to be relaxed in #548
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nocollier This will cause a merge conflict with #548, but we can resolve when merging

"scipy<1.16", # https://github.com/statsmodels/statsmodels/issues/9584
"xarray<2025.11", # ilamb3 incompatibility with integrate_space units handling
]
Expand Down
3 changes: 2 additions & 1 deletion packages/climate-ref-ilamb/src/climate_ref_ilamb/standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,8 @@ def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionRe
# output files
series = []
for ncfile in definition.output_directory.glob("*.nc"):
ds = xr.open_dataset(ncfile, use_cftime=True)
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
ds = xr.open_dataset(ncfile, decode_times=time_coder)
for name, da in ds.items():
# Only create series for 1d DataArray's with these dimensions
if not (da.ndim == 1 and set(da.dims).intersection(["time", "month"])):
Expand Down
1 change: 1 addition & 0 deletions packages/climate-ref/src/climate_ref/alembic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ prepend_sys_path = .
# version_path_separator = space
# version_path_separator = newline
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
path_separator=os

# set to 'true' to search source files recursively
# in each "version_locations" directory
Expand Down
3 changes: 2 additions & 1 deletion packages/climate-ref/src/climate_ref/conftest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,8 @@ def config(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, request: pytest.Fixt
def invoke_cli(monkeypatch: pytest.MonkeyPatch) -> Callable[..., Result]:
"""Invoke the REF CLI and verify exit code."""
runner = CliRunner()
runner.mix_stderr = False
# Older versions of typer mix stderr and stdout. This option has been removed in newer versions
runner.mix_stderr = False # type: ignore[attr-defined]

def _invoke_cli(args: list[str], expected_exit_code: int = 0, always_log: bool = False) -> Result:
monkeypatch.setenv("NO_COLOR", "1")
Expand Down
2 changes: 1 addition & 1 deletion packages/climate-ref/tests/unit/cli/test_root.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def escape_ansi(line):


def test_without_subcommand(invoke_cli):
result = invoke_cli([])
result = invoke_cli([], expected_exit_code=2)
assert "Usage:" in result.stdout
assert "ref [OPTIONS] COMMAND [ARGS]" in result.stdout
assert "A CLI for the Assessment Fast Track Rapid Evaluation Framework" in result.stdout
Expand Down
3 changes: 2 additions & 1 deletion packages/climate-ref/tests/unit/datasets/test_cmip7.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,8 @@ def cmip7_converted_file(self, sample_data_dir, tmp_path) -> Path:
cmip6_file = nc_files[0]

# Open and convert to CMIP7
with xr.open_dataset(cmip6_file, use_cftime=True) as ds:
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
with xr.open_dataset(cmip6_file, decode_times=time_coder) as ds:
ds_cmip7 = convert_cmip6_dataset(ds)

# Create filename
Expand Down
6 changes: 4 additions & 2 deletions packages/climate-ref/tests/unit/datasets/test_netcdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ def test_single_timestep(self, tmp_path):

def test_matches_xarray_output(self, sample_nc_file):
"""Verify netCDF4+cftime produces the same time strings as xarray."""
with xr.open_dataset(sample_nc_file, use_cftime=True) as xr_ds:
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
with xr.open_dataset(sample_nc_file, decode_times=time_coder) as xr_ds:
xr_start = str(xr_ds["time"].values[0])
xr_end = str(xr_ds["time"].values[-1])

Expand All @@ -259,7 +260,8 @@ def test_matches_xarray_with_noleap_calendar(self, tmp_path):
time_var.calendar = "noleap"
time_var[:] = [0.5, 365.5, 730.5]

with xr.open_dataset(filepath, use_cftime=True) as xr_ds:
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
with xr.open_dataset(filepath, decode_times=time_coder) as xr_ds:
xr_start = str(xr_ds["time"].values[0])
xr_end = str(xr_ds["time"].values[-1])

Expand Down
27 changes: 21 additions & 6 deletions packages/climate-ref/tests/unit/test_solve_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@
from climate_ref_core.datasets import SourceDatasetType


def _align_datetime_nulls(source: pd.DataFrame, target: pd.DataFrame) -> pd.DataFrame:
"""Replace ``None`` with ``NaT`` in object columns that became datetime after parquet round-trip.

Parquet deserialises time columns as datetime64, converting ``None`` to
``NaT``. Without alignment, ``assert_frame_equal`` warns about mismatched
null-like values (``None`` vs ``NaT``), which becomes an error in pandas 3.
"""
aligned = source.copy()
for col in aligned.columns:
if col in target.columns and target[col].dtype.kind == "M" and aligned[col].dtype == object:
aligned[col] = aligned[col].fillna(pd.NaT)
return aligned


@pytest.fixture(scope="module")
def cmip6_generated_catalog(sample_data_dir):
"""Module-cached CMIP6 catalog to avoid redundant find_local_datasets calls."""
Expand Down Expand Up @@ -72,8 +86,9 @@ def test_round_trip(self, tmp_path, cmip6_generated_catalog):

assert out_path.exists()
loaded = pd.read_parquet(out_path)
# Parquet may coerce time columns to datetime64, so skip dtype check
pd.testing.assert_frame_equal(cmip6_generated_catalog, loaded, check_dtype=False)
# Parquet coerces time columns (None -> NaT), so align dtypes before comparison
expected = _align_datetime_nulls(cmip6_generated_catalog, loaded)
pd.testing.assert_frame_equal(expected, loaded, check_dtype=False)

def test_load_solve_catalog_missing_dir(self, tmp_path):
result = load_solve_catalog(tmp_path / "nonexistent")
Expand All @@ -94,10 +109,10 @@ def test_load_solve_catalog_with_files(self, tmp_path, cmip6_generated_catalog):
result = load_solve_catalog(catalog_dir)
assert result is not None
assert SourceDatasetType.CMIP6 in result
# Parquet may coerce time columns to datetime64, so skip dtype check
pd.testing.assert_frame_equal(
result[SourceDatasetType.CMIP6], cmip6_generated_catalog, check_dtype=False
)
# Parquet coerces time columns (None -> NaT), so align dtypes before comparison
loaded = result[SourceDatasetType.CMIP6]
expected = _align_datetime_nulls(cmip6_generated_catalog, loaded)
pd.testing.assert_frame_equal(loaded, expected, check_dtype=False)


class TestSolveToResults:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ addopts = [
"--ignore=packages/climate-ref-pmp/src/climate_ref_pmp/drivers",
]
filterwarnings = [
"ignore:pkg_resources is deprecated as an API:DeprecationWarning",
# PytestAssertRewriteWarning: Module already imported so cannot be rewritten; celery.contrib.pytest
"ignore:Module already imported so cannot be rewritten:pytest.PytestAssertRewriteWarning",
# TestCase, TestDataSpecification, TestCaseRunner, TestCaseNotFoundError, TestCasePaths are domain classes
"ignore:cannot collect test class 'TestCase':pytest.PytestCollectionWarning",
"ignore:cannot collect test class 'TestDataSpecification':pytest.PytestCollectionWarning",
Expand Down
Loading
Loading