Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions runtime/databricks/automl_runtime/forecast/deepar/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def predict_samples(self,
target_array = df[self._target_col].dropna().to_numpy() # keep NaNs for horizon
feat_array = df[self._feature_cols].to_numpy().T # transpose for GluonTS
list_dataset.append({
"item_id": ts_id,
"start": df.index[0],
"target": target_array,
"feat_dynamic_real": feat_array
Expand Down
59 changes: 59 additions & 0 deletions runtime/tests/automl_runtime/forecast/deepar/model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,65 @@ def test_model_with_covariates_missing_columns(self):
with self.assertRaises(Exception):
deepar_model.predict(context=None, model_input=sample_input_missing_features)

def test_multi_series_model_with_covariates_preserves_item_id(self):
"""Test that multi-series DeepAR model with covariates preserves series identifiers"""
target_col = "sales"
time_col = "date"
id_col = "store"
feature_cols = ["temperature", "promotion"]

deepar_model = DeepARModel(
model=self.model,
horizon=self.prediction_length,
frequency_unit="d",
frequency_quantity=3,
num_samples=1,
target_col=target_col,
time_col=time_col,
id_cols=[id_col],
feature_cols=feature_cols,
)

num_rows_per_series = 10
# Create data for two stores
sample_input_store1 = pd.DataFrame({
time_col: pd.date_range("2020-10-01", periods=num_rows_per_series + self.prediction_length),
target_col: list(range(num_rows_per_series)) + [None] * self.prediction_length,
id_col: [1] * (num_rows_per_series + self.prediction_length),
"temperature": list(range(20, 20 + num_rows_per_series)) + [0] * self.prediction_length,
"promotion": [i % 2 for i in range(num_rows_per_series)] + [0] * self.prediction_length
})

sample_input_store2 = pd.DataFrame({
time_col: pd.date_range("2020-10-01", periods=num_rows_per_series + self.prediction_length),
target_col: list(range(num_rows_per_series)) + [None] * self.prediction_length,
id_col: [2] * (num_rows_per_series + self.prediction_length),
"temperature": list(range(25, 25 + num_rows_per_series)) + [0] * self.prediction_length,
"promotion": [i % 2 for i in range(num_rows_per_series)] + [0] * self.prediction_length
})

sample_input = pd.concat([sample_input_store1, sample_input_store2], ignore_index=True)

with mlflow.start_run() as run:
mlflow_deepar_log_model(deepar_model, sample_input)
run_id = run.info.run_id

# Load model and test prediction
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
pred_df = loaded_model.predict(sample_input)

# Verify that series identifiers are preserved in the output
self.assertIn(id_col, pred_df.columns,
f"Series identifier '{id_col}' should be preserved in predictions for multi-series model with covariates")
self.assertEqual(pred_df.columns.tolist(), [time_col, "yhat", id_col])
self.assertEqual(len(pred_df), self.prediction_length * 2) # predictions for both stores

# Verify both stores are present
unique_stores = pred_df[id_col].unique()
self.assertEqual(len(unique_stores), 2, "Should have predictions for both stores")
self.assertIn('1', unique_stores, "Should have predictions for store 1")
self.assertIn('2', unique_stores, "Should have predictions for store 2")


class TestDeepARModelWithPreprocess(unittest.TestCase):
@classmethod
Expand Down