From a8fb580c31b87bcf6852ad0cbaa9dccb20990e48 Mon Sep 17 00:00:00 2001 From: pavle-martinovic_data Date: Wed, 5 Nov 2025 17:59:51 +0100 Subject: [PATCH 1/4] [ML-52572] Add time series identifiers to dataset with deepar covariate --- .../automl_runtime/forecast/deepar/model.py | 1 + .../forecast/deepar/model_test.py | 59 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/runtime/databricks/automl_runtime/forecast/deepar/model.py b/runtime/databricks/automl_runtime/forecast/deepar/model.py index 33c3d37..3be3cea 100644 --- a/runtime/databricks/automl_runtime/forecast/deepar/model.py +++ b/runtime/databricks/automl_runtime/forecast/deepar/model.py @@ -191,6 +191,7 @@ def predict_samples(self, target_array = df[self._target_col].dropna().to_numpy() # keep NaNs for horizon feat_array = df[self._feature_cols].to_numpy().T # transpose for GluonTS list_dataset.append({ + "item_id": ts_id, "start": df.index[0], "target": target_array, "feat_dynamic_real": feat_array diff --git a/runtime/tests/automl_runtime/forecast/deepar/model_test.py b/runtime/tests/automl_runtime/forecast/deepar/model_test.py index 0e9e101..6ad76e2 100644 --- a/runtime/tests/automl_runtime/forecast/deepar/model_test.py +++ b/runtime/tests/automl_runtime/forecast/deepar/model_test.py @@ -823,6 +823,65 @@ def test_model_with_covariates_missing_columns(self): with self.assertRaises(Exception): deepar_model.predict(context=None, model_input=sample_input_missing_features) + def test_multi_series_model_with_covariates_preserves_item_id(self): + """Test that multi-series DeepAR model with covariates preserves series identifiers""" + target_col = "sales" + time_col = "date" + id_col = "store" + feature_cols = ["temperature", "promotion"] + + deepar_model = DeepARModel( + model=self.model, + horizon=self.prediction_length, + frequency_unit="d", + frequency_quantity=3, + num_samples=1, + target_col=target_col, + time_col=time_col, + id_cols=[id_col], + feature_cols=feature_cols, + ) + + num_rows_per_series = 10 + # Create data for two stores + sample_input_store1 = pd.DataFrame({ + time_col: pd.date_range("2020-10-01", periods=num_rows_per_series + self.prediction_length), + target_col: list(range(num_rows_per_series)) + [None] * self.prediction_length, + id_col: [1] * (num_rows_per_series + self.prediction_length), + "temperature": list(range(20, 20 + num_rows_per_series)) + [0] * self.prediction_length, + "promotion": [i % 2 for i in range(num_rows_per_series)] + [0] * self.prediction_length + }) + + sample_input_store2 = pd.DataFrame({ + time_col: pd.date_range("2020-10-01", periods=num_rows_per_series + self.prediction_length), + target_col: list(range(num_rows_per_series)) + [None] * self.prediction_length, + id_col: [2] * (num_rows_per_series + self.prediction_length), + "temperature": list(range(25, 25 + num_rows_per_series)) + [0] * self.prediction_length, + "promotion": [i % 2 for i in range(num_rows_per_series)] + [0] * self.prediction_length + }) + + sample_input = pd.concat([sample_input_store1, sample_input_store2], ignore_index=True) + + with mlflow.start_run() as run: + mlflow_deepar_log_model(deepar_model, sample_input) + run_id = run.info.run_id + + # Load model and test prediction + loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") + pred_df = loaded_model.predict(sample_input) + + # Verify that series identifiers are preserved in the output + self.assertIn(id_col, pred_df.columns, + f"Series identifier '{id_col}' should be preserved in predictions for multi-series model with covariates") + self.assertEqual(pred_df.columns.tolist(), [time_col, "yhat", id_col]) + self.assertEqual(len(pred_df), self.prediction_length * 2) # predictions for both stores + + # Verify both stores are present + unique_stores = pred_df[id_col].unique() + self.assertEqual(len(unique_stores), 2, "Should have predictions for both stores") + self.assertIn('1', unique_stores, "Should have predictions for store 1") + self.assertIn('2', unique_stores, "Should have predictions for store 2") + class TestDeepARModelWithPreprocess(unittest.TestCase): @classmethod From ffc38a7b78ee3c01e5c444a64822acd910ed9d45 Mon Sep 17 00:00:00 2001 From: pavle-martinovic_data Date: Wed, 5 Nov 2025 18:11:59 +0100 Subject: [PATCH 2/4] add setuptools to lock file --- runtime/environment.lock.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/runtime/environment.lock.txt b/runtime/environment.lock.txt index 02e265b..989051e 100644 --- a/runtime/environment.lock.txt +++ b/runtime/environment.lock.txt @@ -482,5 +482,6 @@ zipp==3.23.0 # importlib-metadata # importlib-resources -# The following packages are considered to be unsafe in a requirements file: -# setuptools +setuptools>=68.0.0 + # added manually + From 0a10075acc0896b644fbea2fdaf85eb40559d6b4 Mon Sep 17 00:00:00 2001 From: pavle-martinovic_data Date: Wed, 5 Nov 2025 18:30:28 +0100 Subject: [PATCH 3/4] reduce version of setuptools --- runtime/environment.lock.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/environment.lock.txt b/runtime/environment.lock.txt index 989051e..e2d8111 100644 --- a/runtime/environment.lock.txt +++ b/runtime/environment.lock.txt @@ -482,6 +482,6 @@ zipp==3.23.0 # importlib-metadata # importlib-resources -setuptools>=68.0.0 +setuptools<68 # added manually From d727da87e8a5f50b38afa8918ba4efe2bc87205b Mon Sep 17 00:00:00 2001 From: pavle-martinovic_data Date: Wed, 5 Nov 2025 18:37:46 +0100 Subject: [PATCH 4/4] revert setuptools issue --- runtime/environment.lock.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/runtime/environment.lock.txt b/runtime/environment.lock.txt index e2d8111..02e265b 100644 --- a/runtime/environment.lock.txt +++ b/runtime/environment.lock.txt @@ -482,6 +482,5 @@ zipp==3.23.0 # importlib-metadata # importlib-resources -setuptools<68 - # added manually - +# The following packages are considered to be unsafe in a requirements file: +# setuptools