From 1899b175d76696a3059d5f3a06beb02db48bb937 Mon Sep 17 00:00:00 2001 From: Rebot Date: Wed, 18 Feb 2026 16:31:59 -0800 Subject: [PATCH 1/8] fix: change Chronos default dtype from bfloat16 to float32 Using bfloat16 can cause precision issues that significantly impact forecast accuracy. As reported in time-bench benchmarks, switching to float32 improved Chronos model rankings from 5th to 1st place in terms of MASE. Closes #307 --- timecopilot/models/foundation/chronos.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/timecopilot/models/foundation/chronos.py b/timecopilot/models/foundation/chronos.py index 95b2906..69a1157 100644 --- a/timecopilot/models/foundation/chronos.py +++ b/timecopilot/models/foundation/chronos.py @@ -77,8 +77,8 @@ def __init__( available, otherwise CPU). - For best performance with large models (e.g., "chronos-t5-large"), a CUDA-compatible GPU is recommended. - - The model weights are loaded with torch_dtype=torch.bfloat16 for - efficiency on supported hardware. + - The model weights are loaded with torch_dtype=torch.float32 for + numerical precision. """ self.repo_id = repo_id @@ -91,7 +91,7 @@ def _get_model(self) -> BaseChronosPipeline: model = BaseChronosPipeline.from_pretrained( self.repo_id, device_map=device_map, - torch_dtype=torch.bfloat16, + torch_dtype=torch.float32, ) try: yield model From ce3eb382420ac99b6083bad6a110b85614e82705 Mon Sep 17 00:00:00 2001 From: Rebot Date: Wed, 18 Feb 2026 16:48:42 -0800 Subject: [PATCH 2/8] fix: update TimeSeriesDataset default dtype and add tests Address review comments: - Change TimeSeriesDataset.from_df default dtype from bfloat16 to float32 to ensure inputs match model precision - Add unit tests to prevent dtype regression --- tests/models/foundation/test_chronos.py | 37 +++++++++++++++++++++++++ timecopilot/models/foundation/utils.py | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tests/models/foundation/test_chronos.py diff --git a/tests/models/foundation/test_chronos.py b/tests/models/foundation/test_chronos.py new file mode 100644 index 0000000..b6ed269 --- /dev/null +++ b/tests/models/foundation/test_chronos.py @@ -0,0 +1,37 @@ +import torch + +from timecopilot.models.foundation.utils import TimeSeriesDataset + + +def test_timeseries_dataset_default_dtype_is_float32(): + """Ensure TimeSeriesDataset defaults to float32 for numerical precision.""" + import pandas as pd + + df = pd.DataFrame( + { + "unique_id": ["A"] * 10, + "ds": pd.date_range("2020-01-01", periods=10), + "y": range(10), + } + ) + dataset = TimeSeriesDataset.from_df(df, batch_size=10) + assert dataset.data[0].dtype == torch.float32 + + +def test_chronos_model_uses_float32(mocker): + """Ensure Chronos loads models with float32 dtype.""" + mock_pipeline = mocker.patch( + "timecopilot.models.foundation.chronos.BaseChronosPipeline.from_pretrained" + ) + mocker.patch("torch.cuda.is_available", return_value=False) + + from timecopilot.models.foundation.chronos import Chronos + + model = Chronos(repo_id="amazon/chronos-t5-tiny") + + with model._get_model(): + pass + + mock_pipeline.assert_called_once() + call_kwargs = mock_pipeline.call_args[1] + assert call_kwargs["torch_dtype"] == torch.float32 diff --git a/timecopilot/models/foundation/utils.py b/timecopilot/models/foundation/utils.py index 287be32..acff5ab 100644 --- a/timecopilot/models/foundation/utils.py +++ b/timecopilot/models/foundation/utils.py @@ -27,7 +27,7 @@ def from_df( cls, df: pd.DataFrame, batch_size: int, - dtype: torch.dtype = torch.bfloat16, + dtype: torch.dtype = torch.float32, ): tensors = [] df_sorted = df.sort_values(by=["unique_id", "ds"]) From 9f323c172123e0a64846ef1a23eeceb6e7b6c099 Mon Sep 17 00:00:00 2001 From: Rebot Date: Wed, 18 Feb 2026 16:52:49 -0800 Subject: [PATCH 3/8] refactor: add dtype attribute to Chronos class - Add dtype parameter to __init__ (default: torch.float32) - Use self.dtype consistently for model loading and dataset creation - Follows same pattern as FlowState for consistency - Update tests to verify dtype configuration --- tests/models/foundation/test_chronos.py | 24 +++++++++++++++++++----- timecopilot/models/foundation/chronos.py | 14 ++++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/tests/models/foundation/test_chronos.py b/tests/models/foundation/test_chronos.py index b6ed269..4c6ae82 100644 --- a/tests/models/foundation/test_chronos.py +++ b/tests/models/foundation/test_chronos.py @@ -18,8 +18,16 @@ def test_timeseries_dataset_default_dtype_is_float32(): assert dataset.data[0].dtype == torch.float32 -def test_chronos_model_uses_float32(mocker): - """Ensure Chronos loads models with float32 dtype.""" +def test_chronos_default_dtype_is_float32(): + """Ensure Chronos defaults to float32 dtype.""" + from timecopilot.models.foundation.chronos import Chronos + + model = Chronos(repo_id="amazon/chronos-t5-tiny") + assert model.dtype == torch.float32 + + +def test_chronos_model_uses_configured_dtype(mocker): + """Ensure Chronos loads models with the configured dtype.""" mock_pipeline = mocker.patch( "timecopilot.models.foundation.chronos.BaseChronosPipeline.from_pretrained" ) @@ -27,11 +35,17 @@ def test_chronos_model_uses_float32(mocker): from timecopilot.models.foundation.chronos import Chronos + # Test default (float32) model = Chronos(repo_id="amazon/chronos-t5-tiny") - with model._get_model(): pass - - mock_pipeline.assert_called_once() call_kwargs = mock_pipeline.call_args[1] assert call_kwargs["torch_dtype"] == torch.float32 + + # Test custom dtype (bfloat16) + mock_pipeline.reset_mock() + model_bf16 = Chronos(repo_id="amazon/chronos-t5-tiny", dtype=torch.bfloat16) + with model_bf16._get_model(): + pass + call_kwargs = mock_pipeline.call_args[1] + assert call_kwargs["torch_dtype"] == torch.bfloat16 diff --git a/timecopilot/models/foundation/chronos.py b/timecopilot/models/foundation/chronos.py index 69a1157..d97013d 100644 --- a/timecopilot/models/foundation/chronos.py +++ b/timecopilot/models/foundation/chronos.py @@ -28,6 +28,7 @@ def __init__( repo_id: str = "amazon/chronos-t5-large", batch_size: int = 16, alias: str = "Chronos", + dtype: torch.dtype = torch.float32, ): # ruff: noqa: E501 """ @@ -45,6 +46,10 @@ def __init__( higher batch sizes (e.g., 256) are possible. alias (str, optional): Name to use for the model in output DataFrames and logs. Defaults to "Chronos". + dtype (torch.dtype, optional): Data type for model weights and + input tensors. Defaults to torch.float32 for numerical + precision. Use torch.bfloat16 for reduced memory usage on + supported hardware. Notes: **Available models:** @@ -77,13 +82,14 @@ def __init__( available, otherwise CPU). - For best performance with large models (e.g., "chronos-t5-large"), a CUDA-compatible GPU is recommended. - - The model weights are loaded with torch_dtype=torch.float32 for - numerical precision. + - Model weights and input tensors use dtype (default: torch.float32) + for numerical precision. Can be overridden via the dtype parameter. """ self.repo_id = repo_id self.batch_size = batch_size self.alias = alias + self.dtype = dtype @contextmanager def _get_model(self) -> BaseChronosPipeline: @@ -91,7 +97,7 @@ def _get_model(self) -> BaseChronosPipeline: model = BaseChronosPipeline.from_pretrained( self.repo_id, device_map=device_map, - torch_dtype=torch.float32, + torch_dtype=self.dtype, ) try: yield model @@ -218,7 +224,7 @@ def forecast( """ freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) - dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size) + dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size, dtype=self.dtype) fcst_df = dataset.make_future_dataframe(h=h, freq=freq) with self._get_model() as model: fcsts_mean_np, fcsts_quantiles_np = self._predict( From f4c0b2575d345cd6b69571755bc51251dc0974a5 Mon Sep 17 00:00:00 2001 From: Rebot Date: Wed, 18 Feb 2026 16:56:30 -0800 Subject: [PATCH 4/8] style: fix ruff formatting --- timecopilot/models/foundation/chronos.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/timecopilot/models/foundation/chronos.py b/timecopilot/models/foundation/chronos.py index d97013d..d414a9e 100644 --- a/timecopilot/models/foundation/chronos.py +++ b/timecopilot/models/foundation/chronos.py @@ -224,7 +224,9 @@ def forecast( """ freq = self._maybe_infer_freq(df, freq) qc = QuantileConverter(level=level, quantiles=quantiles) - dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size, dtype=self.dtype) + dataset = TimeSeriesDataset.from_df( + df, batch_size=self.batch_size, dtype=self.dtype + ) fcst_df = dataset.make_future_dataframe(h=h, freq=freq) with self._get_model() as model: fcsts_mean_np, fcsts_quantiles_np = self._predict( From 43d47997d5f6cde940bb084a91dc8991dab3517b Mon Sep 17 00:00:00 2001 From: Rebot Date: Wed, 18 Feb 2026 16:58:43 -0800 Subject: [PATCH 5/8] fix: keep TimeSeriesDataset default dtype as bfloat16 Revert utils.py default to bfloat16 for backward compatibility. Chronos explicitly passes dtype=self.dtype (float32) to avoid breaking other code that relies on the original default. --- tests/models/foundation/test_chronos.py | 6 +++--- timecopilot/models/foundation/utils.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/models/foundation/test_chronos.py b/tests/models/foundation/test_chronos.py index 4c6ae82..d2717ee 100644 --- a/tests/models/foundation/test_chronos.py +++ b/tests/models/foundation/test_chronos.py @@ -3,8 +3,8 @@ from timecopilot.models.foundation.utils import TimeSeriesDataset -def test_timeseries_dataset_default_dtype_is_float32(): - """Ensure TimeSeriesDataset defaults to float32 for numerical precision.""" +def test_timeseries_dataset_default_dtype_is_bfloat16(): + """Ensure TimeSeriesDataset defaults to bfloat16 for backward compatibility.""" import pandas as pd df = pd.DataFrame( @@ -15,7 +15,7 @@ def test_timeseries_dataset_default_dtype_is_float32(): } ) dataset = TimeSeriesDataset.from_df(df, batch_size=10) - assert dataset.data[0].dtype == torch.float32 + assert dataset.data[0].dtype == torch.bfloat16 def test_chronos_default_dtype_is_float32(): diff --git a/timecopilot/models/foundation/utils.py b/timecopilot/models/foundation/utils.py index acff5ab..287be32 100644 --- a/timecopilot/models/foundation/utils.py +++ b/timecopilot/models/foundation/utils.py @@ -27,7 +27,7 @@ def from_df( cls, df: pd.DataFrame, batch_size: int, - dtype: torch.dtype = torch.float32, + dtype: torch.dtype = torch.bfloat16, ): tensors = [] df_sorted = df.sort_values(by=["unique_id", "ds"]) From d032e931b9d25acdc67c41f6c6d35d2dc82aec07 Mon Sep 17 00:00:00 2001 From: Rebot Date: Wed, 18 Feb 2026 17:05:32 -0800 Subject: [PATCH 6/8] test: reorganize tests and add forecast dtype verification - Move TimeSeriesDataset tests to test_utils.py (shared utility) - Add test_chronos_forecast_uses_configured_dtype to verify dtype is passed correctly to TimeSeriesDataset.from_df --- tests/models/foundation/test_chronos.py | 59 ++++++++++++++++++------- tests/models/foundation/test_utils.py | 33 ++++++++++++++ 2 files changed, 75 insertions(+), 17 deletions(-) create mode 100644 tests/models/foundation/test_utils.py diff --git a/tests/models/foundation/test_chronos.py b/tests/models/foundation/test_chronos.py index d2717ee..18b6fae 100644 --- a/tests/models/foundation/test_chronos.py +++ b/tests/models/foundation/test_chronos.py @@ -1,22 +1,5 @@ import torch -from timecopilot.models.foundation.utils import TimeSeriesDataset - - -def test_timeseries_dataset_default_dtype_is_bfloat16(): - """Ensure TimeSeriesDataset defaults to bfloat16 for backward compatibility.""" - import pandas as pd - - df = pd.DataFrame( - { - "unique_id": ["A"] * 10, - "ds": pd.date_range("2020-01-01", periods=10), - "y": range(10), - } - ) - dataset = TimeSeriesDataset.from_df(df, batch_size=10) - assert dataset.data[0].dtype == torch.bfloat16 - def test_chronos_default_dtype_is_float32(): """Ensure Chronos defaults to float32 dtype.""" @@ -49,3 +32,45 @@ def test_chronos_model_uses_configured_dtype(mocker): pass call_kwargs = mock_pipeline.call_args[1] assert call_kwargs["torch_dtype"] == torch.bfloat16 + + +def test_chronos_forecast_uses_configured_dtype(mocker): + """Ensure Chronos.forecast uses the configured dtype for dataset creation.""" + import pandas as pd + + import pytest + + from timecopilot.models.foundation.chronos import Chronos + + # Patch dataset creation to capture dtype argument + mock_from_df = mocker.patch( + "timecopilot.models.foundation.chronos.TimeSeriesDataset.from_df" + ) + + # Avoid real model loading and CUDA branching + mocker.patch( + "timecopilot.models.foundation.chronos.BaseChronosPipeline.from_pretrained" + ) + mocker.patch("torch.cuda.is_available", return_value=False) + + model_dtype = torch.bfloat16 + model = Chronos(repo_id="amazon/chronos-t5-tiny", dtype=model_dtype) + + df = pd.DataFrame( + { + "unique_id": ["A"] * 10, + "ds": pd.date_range("2020-01-01", periods=10), + "y": range(10), + } + ) + + def _from_df_side_effect(*args, **kwargs): + # Assert that Chronos.forecast passes the configured dtype through + assert kwargs.get("dtype") == model_dtype + # Short-circuit the rest of the forecast call + raise RuntimeError("stop after dtype check") + + mock_from_df.side_effect = _from_df_side_effect + + with pytest.raises(RuntimeError, match="stop after dtype check"): + model.forecast(df=df, h=2) diff --git a/tests/models/foundation/test_utils.py b/tests/models/foundation/test_utils.py new file mode 100644 index 0000000..741cb6d --- /dev/null +++ b/tests/models/foundation/test_utils.py @@ -0,0 +1,33 @@ +import torch + +from timecopilot.models.foundation.utils import TimeSeriesDataset + + +def test_timeseries_dataset_default_dtype_is_bfloat16(): + """Ensure TimeSeriesDataset defaults to bfloat16 for backward compatibility.""" + import pandas as pd + + df = pd.DataFrame( + { + "unique_id": ["A"] * 10, + "ds": pd.date_range("2020-01-01", periods=10), + "y": range(10), + } + ) + dataset = TimeSeriesDataset.from_df(df, batch_size=10) + assert dataset.data[0].dtype == torch.bfloat16 + + +def test_timeseries_dataset_respects_custom_dtype(): + """Ensure TimeSeriesDataset respects custom dtype parameter.""" + import pandas as pd + + df = pd.DataFrame( + { + "unique_id": ["A"] * 10, + "ds": pd.date_range("2020-01-01", periods=10), + "y": range(10), + } + ) + dataset = TimeSeriesDataset.from_df(df, batch_size=10, dtype=torch.float32) + assert dataset.data[0].dtype == torch.float32 From c4664b7d133b0660afb69d97068ae703b498ebb8 Mon Sep 17 00:00:00 2001 From: Rebot Date: Wed, 18 Feb 2026 17:10:40 -0800 Subject: [PATCH 7/8] style: fix import ordering --- tests/models/foundation/test_chronos.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/models/foundation/test_chronos.py b/tests/models/foundation/test_chronos.py index 18b6fae..aa02afc 100644 --- a/tests/models/foundation/test_chronos.py +++ b/tests/models/foundation/test_chronos.py @@ -37,7 +37,6 @@ def test_chronos_model_uses_configured_dtype(mocker): def test_chronos_forecast_uses_configured_dtype(mocker): """Ensure Chronos.forecast uses the configured dtype for dataset creation.""" import pandas as pd - import pytest from timecopilot.models.foundation.chronos import Chronos From 705b33ff0b201ca3fdcfe5cb05ecf6211f9f8f9d Mon Sep 17 00:00:00 2001 From: azul Date: Wed, 18 Feb 2026 17:15:32 -0800 Subject: [PATCH 8/8] fix: add correct name test Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/models/foundation/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/foundation/test_utils.py b/tests/models/foundation/test_utils.py index 741cb6d..c51e657 100644 --- a/tests/models/foundation/test_utils.py +++ b/tests/models/foundation/test_utils.py @@ -3,7 +3,7 @@ from timecopilot.models.foundation.utils import TimeSeriesDataset -def test_timeseries_dataset_default_dtype_is_bfloat16(): +def test_timeseries_dataset_class_default_dtype_is_bfloat16(): """Ensure TimeSeriesDataset defaults to bfloat16 for backward compatibility.""" import pandas as pd