Climate-REF · lewisjared · Feb 24, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/changelog/565.fix.md b/changelog/565.fix.md
@@ -0,0 +1 @@
+Resolved pandas FutureWarnings to support both pandas 2 and 3, including fixes for DataFrame concatenation with empty or all-NA entries and null-type mismatches in parquet round-trips.
diff --git a/packages/climate-ref-core/src/climate_ref_core/constraints.py b/packages/climate-ref-core/src/climate_ref_core/constraints.py
@@ -271,7 +271,15 @@ def apply(
             # Restore the original index so downstream concatenation is consistent
             supplementary_group.index = original_index[list(select)]
 
-        return pd.concat([group, supplementary_group])
+        if supplementary_group.empty:
+            return group
+        # Drop all-NA columns before concat as the default behaviour will change in pandas 3
+        return pd.concat(
+            [
+                group.dropna(axis="columns", how="all"),
+                supplementary_group.dropna(axis="columns", how="all"),
+            ]
+        )
 
     @classmethod
     def from_defaults(

diff --git a/packages/climate-ref-core/src/climate_ref_core/esgf/cmip7.py b/packages/climate-ref-core/src/climate_ref_core/esgf/cmip7.py
@@ -75,6 +75,7 @@ def _convert_file_to_cmip7(cmip6_path: Path, cmip7_facets: dict[str, Any]) -> Pa
 
     # Convert the file and derive the time range from the dataset
     logger.info(f"Converting to CMIP7: {cmip6_path.name}")
+
     time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
     with xr.open_dataset(cmip6_path, decode_times=time_coder) as ds:
         frequency = str(cmip7_facets.get("frequency", "mon"))

diff --git a/packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/diagnostics/base.py b/packages/climate-ref-esmvaltool/src/climate_ref_esmvaltool/diagnostics/base.py
@@ -39,16 +39,16 @@ def get_cmip_source_type(
     return SourceDatasetType.CMIP6
 
 
-def mask_fillvalues(array: np.ndarray) -> np.ma.MaskedArray:  # type: ignore[type-arg]
+def mask_fillvalues(array: np.ndarray) -> np.ma.MaskedArray:
     """Convert netCDF4 fill values in an array to a mask."""
     # Workaround for https://github.com/pydata/xarray/issues/2742
     defaults = {np.dtype(k): v for k, v in netCDF4.default_fillvals.items()}
-    return np.ma.masked_equal(array, defaults[array.dtype])  # type: ignore[no-untyped-call,no-any-return]
+    return np.ma.masked_equal(array, defaults[array.dtype])
 
 
-def fillvalues_to_nan(array: np.ndarray) -> np.ndarray:  # type: ignore[type-arg]
+def fillvalues_to_nan(array: np.ndarray) -> np.ndarray:
     """Convert netCDF4 fill values in an array to NaN."""
-    return mask_fillvalues(array).filled(np.nan)  # type: ignore[no-untyped-call,no-any-return]
+    return mask_fillvalues(array).filled(np.nan)
 
 
 class ESMValToolDiagnostic(CommandLineDiagnostic):

diff --git a/packages/climate-ref-example/src/climate_ref_example/example.py b/packages/climate-ref-example/src/climate_ref_example/example.py
@@ -42,6 +42,11 @@ def calculate_annual_mean_timeseries(input_files: list[Path]) -> xr.Dataset:
 
     annual_mean = xr_ds.resample(time="YS").mean()
 
+    # Drop time_bnds before weighted mean to avoid dtype=object division error
+    # (resample+mean turns datetime bounds into object arrays)
+    if "time_bnds" in annual_mean:
+        annual_mean = annual_mean.drop_vars("time_bnds")
+
     return annual_mean.weighted(xr_ds.areacella.fillna(0)).mean(dim=["lat", "lon"], keep_attrs=True)
 
 

diff --git a/packages/climate-ref-ilamb/pyproject.toml b/packages/climate-ref-ilamb/pyproject.toml
@@ -24,7 +24,7 @@ classifiers = [
 ]
 dependencies = [
   "climate-ref-core",
-  "ilamb3>=2025.9.9",
+  "ilamb3>=2025.9.9,<2025.12",  # to be relaxed in #548
   "scipy<1.16",        # https://github.com/statsmodels/statsmodels/issues/9584
   "xarray<2025.11",    # ilamb3 incompatibility with integrate_space units handling
 ]

diff --git a/packages/climate-ref-ilamb/src/climate_ref_ilamb/standard.py b/packages/climate-ref-ilamb/src/climate_ref_ilamb/standard.py
@@ -589,7 +589,8 @@ def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionRe
         # output files
         series = []
         for ncfile in definition.output_directory.glob("*.nc"):
-            ds = xr.open_dataset(ncfile, use_cftime=True)
+            time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
+            ds = xr.open_dataset(ncfile, decode_times=time_coder)
             for name, da in ds.items():
                 # Only create series for 1d DataArray's with these dimensions
                 if not (da.ndim == 1 and set(da.dims).intersection(["time", "month"])):

diff --git a/packages/climate-ref/src/climate_ref/alembic.ini b/packages/climate-ref/src/climate_ref/alembic.ini
@@ -51,6 +51,7 @@ prepend_sys_path = .
 # version_path_separator = space
 # version_path_separator = newline
 version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+path_separator=os
 
 # set to 'true' to search source files recursively
 # in each "version_locations" directory

diff --git a/packages/climate-ref/src/climate_ref/conftest_plugin.py b/packages/climate-ref/src/climate_ref/conftest_plugin.py
@@ -251,7 +251,8 @@ def config(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, request: pytest.Fixt
 def invoke_cli(monkeypatch: pytest.MonkeyPatch) -> Callable[..., Result]:
     """Invoke the REF CLI and verify exit code."""
     runner = CliRunner()
-    runner.mix_stderr = False
+    # Older versions of typer mix stderr and stdout. This option has been removed in newer versions
+    runner.mix_stderr = False  # type: ignore[attr-defined]
 
     def _invoke_cli(args: list[str], expected_exit_code: int = 0, always_log: bool = False) -> Result:
         monkeypatch.setenv("NO_COLOR", "1")

diff --git a/packages/climate-ref/tests/unit/cli/test_root.py b/packages/climate-ref/tests/unit/cli/test_root.py
@@ -15,7 +15,7 @@ def escape_ansi(line):
 
 
 def test_without_subcommand(invoke_cli):
-    result = invoke_cli([])
+    result = invoke_cli([], expected_exit_code=2)
     assert "Usage:" in result.stdout
     assert "ref [OPTIONS] COMMAND [ARGS]" in result.stdout
     assert "A CLI for the Assessment Fast Track Rapid Evaluation Framework" in result.stdout

diff --git a/packages/climate-ref/tests/unit/datasets/test_cmip7.py b/packages/climate-ref/tests/unit/datasets/test_cmip7.py
@@ -267,7 +267,8 @@ def cmip7_converted_file(self, sample_data_dir, tmp_path) -> Path:
         cmip6_file = nc_files[0]
 
         # Open and convert to CMIP7
-        with xr.open_dataset(cmip6_file, use_cftime=True) as ds:
+        time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
+        with xr.open_dataset(cmip6_file, decode_times=time_coder) as ds:
             ds_cmip7 = convert_cmip6_dataset(ds)
 
             # Create filename

diff --git a/packages/climate-ref/tests/unit/datasets/test_netcdf_utils.py b/packages/climate-ref/tests/unit/datasets/test_netcdf_utils.py
@@ -239,7 +239,8 @@ def test_single_timestep(self, tmp_path):
 
     def test_matches_xarray_output(self, sample_nc_file):
         """Verify netCDF4+cftime produces the same time strings as xarray."""
-        with xr.open_dataset(sample_nc_file, use_cftime=True) as xr_ds:
+        time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
+        with xr.open_dataset(sample_nc_file, decode_times=time_coder) as xr_ds:
             xr_start = str(xr_ds["time"].values[0])
             xr_end = str(xr_ds["time"].values[-1])
 
@@ -259,7 +260,8 @@ def test_matches_xarray_with_noleap_calendar(self, tmp_path):
             time_var.calendar = "noleap"
             time_var[:] = [0.5, 365.5, 730.5]
 
-        with xr.open_dataset(filepath, use_cftime=True) as xr_ds:
+        time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
+        with xr.open_dataset(filepath, decode_times=time_coder) as xr_ds:
             xr_start = str(xr_ds["time"].values[0])
             xr_end = str(xr_ds["time"].values[-1])
 

diff --git a/packages/climate-ref/tests/unit/test_solve_helpers.py b/packages/climate-ref/tests/unit/test_solve_helpers.py
@@ -22,6 +22,20 @@
 from climate_ref_core.datasets import SourceDatasetType
 
 
+def _align_datetime_nulls(source: pd.DataFrame, target: pd.DataFrame) -> pd.DataFrame:
+    """Replace ``None`` with ``NaT`` in object columns that became datetime after parquet round-trip.
+
+    Parquet deserialises time columns as datetime64, converting ``None`` to
+    ``NaT``.  Without alignment, ``assert_frame_equal`` warns about mismatched
+    null-like values (``None`` vs ``NaT``), which becomes an error in pandas 3.
+    """
+    aligned = source.copy()
+    for col in aligned.columns:
+        if col in target.columns and target[col].dtype.kind == "M" and aligned[col].dtype == object:
+            aligned[col] = aligned[col].fillna(pd.NaT)
+    return aligned
+
+
 @pytest.fixture(scope="module")
 def cmip6_generated_catalog(sample_data_dir):
     """Module-cached CMIP6 catalog to avoid redundant find_local_datasets calls."""
@@ -72,8 +86,9 @@ def test_round_trip(self, tmp_path, cmip6_generated_catalog):
 
         assert out_path.exists()
         loaded = pd.read_parquet(out_path)
-        # Parquet may coerce time columns to datetime64, so skip dtype check
-        pd.testing.assert_frame_equal(cmip6_generated_catalog, loaded, check_dtype=False)
+        # Parquet coerces time columns (None -> NaT), so align dtypes before comparison
+        expected = _align_datetime_nulls(cmip6_generated_catalog, loaded)
+        pd.testing.assert_frame_equal(expected, loaded, check_dtype=False)
 
     def test_load_solve_catalog_missing_dir(self, tmp_path):
         result = load_solve_catalog(tmp_path / "nonexistent")
@@ -94,10 +109,10 @@ def test_load_solve_catalog_with_files(self, tmp_path, cmip6_generated_catalog):
         result = load_solve_catalog(catalog_dir)
         assert result is not None
         assert SourceDatasetType.CMIP6 in result
-        # Parquet may coerce time columns to datetime64, so skip dtype check
-        pd.testing.assert_frame_equal(
-            result[SourceDatasetType.CMIP6], cmip6_generated_catalog, check_dtype=False
-        )
+        # Parquet coerces time columns (None -> NaT), so align dtypes before comparison
+        loaded = result[SourceDatasetType.CMIP6]
+        expected = _align_datetime_nulls(cmip6_generated_catalog, loaded)
+        pd.testing.assert_frame_equal(loaded, expected, check_dtype=False)
 
 
 class TestSolveToResults:

diff --git a/pyproject.toml b/pyproject.toml
@@ -145,7 +145,8 @@ addopts = [
     "--ignore=packages/climate-ref-pmp/src/climate_ref_pmp/drivers",
 ]
 filterwarnings = [
-    "ignore:pkg_resources is deprecated as an API:DeprecationWarning",
+    # PytestAssertRewriteWarning: Module already imported so cannot be rewritten; celery.contrib.pytest
+    "ignore:Module already imported so cannot be rewritten:pytest.PytestAssertRewriteWarning",
     # TestCase, TestDataSpecification, TestCaseRunner, TestCaseNotFoundError, TestCasePaths are domain classes
     "ignore:cannot collect test class 'TestCase':pytest.PytestCollectionWarning",
     "ignore:cannot collect test class 'TestDataSpecification':pytest.PytestCollectionWarning",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Resolved pandas FutureWarnings to support both pandas 2 and 3, including fixes for DataFrame concatenation with empty or all-NA entries and null-type mismatches in parquet round-trips.