From 0b6c19e213ccf120f10ef1a0c1c89642a09190e5 Mon Sep 17 00:00:00 2001 From: Bart Pleiter Date: Tue, 2 Dec 2025 16:29:48 +0100 Subject: [PATCH 1/7] feature: Added option for non-zero flatliner prediction. Signed-off-by: Bart Pleiter --- openstef/data_classes/prediction_job.py | 4 ++ openstef/model/model_creator.py | 1 + openstef/model/regressors/flatliner.py | 33 ++++++++++++----- openstef/pipeline/train_model.py | 7 +++- test/unit/model/regressors/test_flatliner.py | 39 ++++++++++++++++++++ 5 files changed, 74 insertions(+), 10 deletions(-) diff --git a/openstef/data_classes/prediction_job.py b/openstef/data_classes/prediction_job.py index c9cd86476..a57751b4b 100644 --- a/openstef/data_classes/prediction_job.py +++ b/openstef/data_classes/prediction_job.py @@ -105,6 +105,10 @@ class PredictionJobDataClass(BaseModel): False, description="If True, flatliners are also detected on non-zero values (median of the load).", ) + predict_non_zero_flatliner: bool = Field( + False, + description="If True, the flatliner model predicts the mean of the load measurements instead of zero.", + ) data_balancing_ratio: Optional[float] = Field( None, description="If data balancing is enabled, the data will be balanced with data from 1 year ago in the future.", diff --git a/openstef/model/model_creator.py b/openstef/model/model_creator.py index ee6469d4e..e73059da4 100644 --- a/openstef/model/model_creator.py +++ b/openstef/model/model_creator.py @@ -101,6 +101,7 @@ ], ModelType.FLATLINER: [ "quantiles", + "predict_mean", ], ModelType.LINEAR_QUANTILE: [ "alpha", diff --git a/openstef/model/regressors/flatliner.py b/openstef/model/regressors/flatliner.py index 1d9c0dc96..8f204a88e 100644 --- a/openstef/model/regressors/flatliner.py +++ b/openstef/model/regressors/flatliner.py @@ -15,15 +15,23 @@ class FlatlinerRegressor(OpenstfRegressor, RegressorMixin): feature_names_: List[str] = [] - def __init__(self, quantiles=None): + def __init__(self, quantiles=None, predict_mean: bool = False): """Initialize FlatlinerRegressor. - The model always predicts 0.0, regardless of the input features. The model is meant to be used for flatliner - locations that still expect a prediction while preserving the prediction interface. + The model always predicts a constant value, regardless of the input features. + The model is meant to be used for flatliner locations that still expect a + prediction while preserving the prediction interface. + + Args: + quantiles: Quantiles to predict (optional). + predict_mean: If True, predicts the mean of the training load data. + If False, predicts 0.0. """ super().__init__() self.quantiles = quantiles + self.predict_mean = predict_mean + self.predicted_value_: float = 0.0 @property def feature_names(self) -> list: @@ -48,10 +56,10 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin: Args: x: Feature matrix - y: Labels + y: Labels (load measurements) Returns: - Fitted LinearQuantile model + Fitted FlatlinerRegressor model """ self.feature_names_ = list(x.columns) @@ -59,6 +67,12 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin: len(self.feature_names_) or 1.0 ) + # Calculate the predicted value based on predict_mean setting + if self.predict_mean and len(y) > 0: + self.predicted_value_ = float(y.mean()) + else: + self.predicted_value_ = 0.0 + return self def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array: @@ -66,12 +80,12 @@ def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array: Args: x: Feature matrix - quantile: Quantile for which a prediciton is desired, - note that only quantile are available for which a model is trained, + quantile: Quantile for which a prediction is desired, + note that only quantiles are available for which a model is trained, and that this is a quantile-model specific keyword Returns: - Prediction + Prediction (constant value for all rows) Raises: ValueError in case no model is trained for the requested quantile @@ -79,7 +93,7 @@ def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array: """ check_is_fitted(self) - return np.zeros(x.shape[0]) + return np.full(x.shape[0], self.predicted_value_) def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array: check_is_fitted(self) @@ -89,6 +103,7 @@ def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array def _get_param_names(cls): return [ "quantiles", + "predict_mean", ] def __sklearn_is_fitted__(self) -> bool: diff --git a/openstef/pipeline/train_model.py b/openstef/pipeline/train_model.py index 94a3bcbad..a9aaf2097 100644 --- a/openstef/pipeline/train_model.py +++ b/openstef/pipeline/train_model.py @@ -458,11 +458,16 @@ def train_pipeline_step_train_model( "'load' column should be first and 'horizon' column last." ) + # Prepare model kwargs, including predict_mean for flatliner models + model_kwargs = dict(pj.model_kwargs or {}) + if pj.get("predict_non_zero_flatliner", False): + model_kwargs["predict_mean"] = True + # Create relevant model model = ModelCreator.create_model( pj["model"], quantiles=pj["quantiles"], - **(pj.model_kwargs or {}), + **model_kwargs, ) # split x and y data diff --git a/test/unit/model/regressors/test_flatliner.py b/test/unit/model/regressors/test_flatliner.py index fb58ae40e..e3677a6e3 100644 --- a/test/unit/model/regressors/test_flatliner.py +++ b/test/unit/model/regressors/test_flatliner.py @@ -64,3 +64,42 @@ def test_get_feature_names_from_linear(self): self.assertTrue( (feature_importance == np.array([0, 0, 0], dtype=np.float32)).all() ) + + def test_predict_mean_when_enabled(self): + """Test that predict_mean=True causes the model to predict the mean of the load.""" + # Arrange + model = FlatlinerRegressor(predict_mean=True) + + # Create test data with known load values + x_train = train_input.iloc[:, 1:] + y_train = train_input.iloc[:, 0] + expected_mean = y_train.mean() + + # Act + model.fit(x_train, y_train) + result = model.predict(x_train) + + # Assert + # check if the model was fitted + self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) + + # check if model predicts the mean + self.assertEqual(len(result), len(x_train)) + self.assertTrue(np.allclose(result, expected_mean)) + self.assertAlmostEqual(model.predicted_value_, expected_mean) + + def test_predict_zero_when_predict_mean_disabled(self): + """Test that predict_mean=False causes the model to predict zero.""" + # Arrange + model = FlatlinerRegressor(predict_mean=False) + + x_train = train_input.iloc[:, 1:] + y_train = train_input.iloc[:, 0] + + # Act + model.fit(x_train, y_train) + result = model.predict(x_train) + + # Assert + self.assertTrue((result == 0).all()) + self.assertEqual(model.predicted_value_, 0.0) From 771fc43158d7d68e654ee7287f82dba54087347e Mon Sep 17 00:00:00 2001 From: Bart Pleiter Date: Tue, 2 Dec 2025 16:40:01 +0100 Subject: [PATCH 2/7] fix: format Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Bart Pleiter --- test/unit/model/regressors/test_flatliner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/model/regressors/test_flatliner.py b/test/unit/model/regressors/test_flatliner.py index e3677a6e3..9ed09d458 100644 --- a/test/unit/model/regressors/test_flatliner.py +++ b/test/unit/model/regressors/test_flatliner.py @@ -69,7 +69,6 @@ def test_predict_mean_when_enabled(self): """Test that predict_mean=True causes the model to predict the mean of the load.""" # Arrange model = FlatlinerRegressor(predict_mean=True) - # Create test data with known load values x_train = train_input.iloc[:, 1:] y_train = train_input.iloc[:, 0] From 54c7aa7ab4980ea7ee005060cf53d70c3e1512b9 Mon Sep 17 00:00:00 2001 From: Bart Pleiter Date: Tue, 2 Dec 2025 16:40:10 +0100 Subject: [PATCH 3/7] fix: format Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Bart Pleiter --- test/unit/model/regressors/test_flatliner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/model/regressors/test_flatliner.py b/test/unit/model/regressors/test_flatliner.py index 9ed09d458..76ecec49e 100644 --- a/test/unit/model/regressors/test_flatliner.py +++ b/test/unit/model/regressors/test_flatliner.py @@ -81,7 +81,6 @@ def test_predict_mean_when_enabled(self): # Assert # check if the model was fitted self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) - # check if model predicts the mean self.assertEqual(len(result), len(x_train)) self.assertTrue(np.allclose(result, expected_mean)) From ac679e3fa39404d972c805cb41006c108ea47e5b Mon Sep 17 00:00:00 2001 From: Bart Pleiter Date: Tue, 2 Dec 2025 16:40:17 +0100 Subject: [PATCH 4/7] fix: format Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Signed-off-by: Bart Pleiter --- test/unit/model/regressors/test_flatliner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/model/regressors/test_flatliner.py b/test/unit/model/regressors/test_flatliner.py index 76ecec49e..8a9c81814 100644 --- a/test/unit/model/regressors/test_flatliner.py +++ b/test/unit/model/regressors/test_flatliner.py @@ -90,7 +90,6 @@ def test_predict_zero_when_predict_mean_disabled(self): """Test that predict_mean=False causes the model to predict zero.""" # Arrange model = FlatlinerRegressor(predict_mean=False) - x_train = train_input.iloc[:, 1:] y_train = train_input.iloc[:, 0] From 3cb16f4ddf52cfea5d6a0ac9d3205ccbd959c873 Mon Sep 17 00:00:00 2001 From: Bart Pleiter Date: Tue, 2 Dec 2025 16:50:26 +0100 Subject: [PATCH 5/7] fix: change mean to median. Signed-off-by: Bart Pleiter --- openstef/model/model_creator.py | 2 +- openstef/model/regressors/flatliner.py | 16 ++++++++-------- openstef/pipeline/train_model.py | 4 ++-- test/unit/model/regressors/test_flatliner.py | 20 ++++++++++---------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/openstef/model/model_creator.py b/openstef/model/model_creator.py index e73059da4..f38986109 100644 --- a/openstef/model/model_creator.py +++ b/openstef/model/model_creator.py @@ -101,7 +101,7 @@ ], ModelType.FLATLINER: [ "quantiles", - "predict_mean", + "predict_median", ], ModelType.LINEAR_QUANTILE: [ "alpha", diff --git a/openstef/model/regressors/flatliner.py b/openstef/model/regressors/flatliner.py index 8f204a88e..7826bb592 100644 --- a/openstef/model/regressors/flatliner.py +++ b/openstef/model/regressors/flatliner.py @@ -15,22 +15,22 @@ class FlatlinerRegressor(OpenstfRegressor, RegressorMixin): feature_names_: List[str] = [] - def __init__(self, quantiles=None, predict_mean: bool = False): + def __init__(self, quantiles=None, predict_median: bool = False): """Initialize FlatlinerRegressor. The model always predicts a constant value, regardless of the input features. - The model is meant to be used for flatliner locations that still expect a + The model is mediant to be used for flatliner locations that still expect a prediction while preserving the prediction interface. Args: quantiles: Quantiles to predict (optional). - predict_mean: If True, predicts the mean of the training load data. + predict_median: If True, predicts the median of the training load data. If False, predicts 0.0. """ super().__init__() self.quantiles = quantiles - self.predict_mean = predict_mean + self.predict_median = predict_median self.predicted_value_: float = 0.0 @property @@ -67,9 +67,9 @@ def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin: len(self.feature_names_) or 1.0 ) - # Calculate the predicted value based on predict_mean setting - if self.predict_mean and len(y) > 0: - self.predicted_value_ = float(y.mean()) + # Calculate the predicted value based on predict_median setting + if self.predict_median and len(y) > 0: + self.predicted_value_ = float(y.median()) else: self.predicted_value_ = 0.0 @@ -103,7 +103,7 @@ def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array def _get_param_names(cls): return [ "quantiles", - "predict_mean", + "predict_median", ] def __sklearn_is_fitted__(self) -> bool: diff --git a/openstef/pipeline/train_model.py b/openstef/pipeline/train_model.py index a9aaf2097..fa2b2dad6 100644 --- a/openstef/pipeline/train_model.py +++ b/openstef/pipeline/train_model.py @@ -458,10 +458,10 @@ def train_pipeline_step_train_model( "'load' column should be first and 'horizon' column last." ) - # Prepare model kwargs, including predict_mean for flatliner models + # Prepare model kwargs, including predict_median for flatliner models model_kwargs = dict(pj.model_kwargs or {}) if pj.get("predict_non_zero_flatliner", False): - model_kwargs["predict_mean"] = True + model_kwargs["predict_median"] = True # Create relevant model model = ModelCreator.create_model( diff --git a/test/unit/model/regressors/test_flatliner.py b/test/unit/model/regressors/test_flatliner.py index 8a9c81814..ed7a50e28 100644 --- a/test/unit/model/regressors/test_flatliner.py +++ b/test/unit/model/regressors/test_flatliner.py @@ -65,14 +65,14 @@ def test_get_feature_names_from_linear(self): (feature_importance == np.array([0, 0, 0], dtype=np.float32)).all() ) - def test_predict_mean_when_enabled(self): - """Test that predict_mean=True causes the model to predict the mean of the load.""" + def test_predict_median_when_enabled(self): + """Test that predict_median=True causes the model to predict the median of the load.""" # Arrange - model = FlatlinerRegressor(predict_mean=True) + model = FlatlinerRegressor(predict_median=True) # Create test data with known load values x_train = train_input.iloc[:, 1:] y_train = train_input.iloc[:, 0] - expected_mean = y_train.mean() + expected_median = y_train.median() # Act model.fit(x_train, y_train) @@ -81,15 +81,15 @@ def test_predict_mean_when_enabled(self): # Assert # check if the model was fitted self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) - # check if model predicts the mean + # check if model predicts the median self.assertEqual(len(result), len(x_train)) - self.assertTrue(np.allclose(result, expected_mean)) - self.assertAlmostEqual(model.predicted_value_, expected_mean) + self.assertTrue(np.allclose(result, expected_median)) + self.assertAlmostEqual(model.predicted_value_, expected_median) - def test_predict_zero_when_predict_mean_disabled(self): - """Test that predict_mean=False causes the model to predict zero.""" + def test_predict_zero_when_predict_median_disabled(self): + """Test that predict_median=False causes the model to predict zero.""" # Arrange - model = FlatlinerRegressor(predict_mean=False) + model = FlatlinerRegressor(predict_median=False) x_train = train_input.iloc[:, 1:] y_train = train_input.iloc[:, 0] From 8d7ff5b49bfc315c2ab536b548f35207bfa5c849 Mon Sep 17 00:00:00 2001 From: Bart Pleiter Date: Tue, 2 Dec 2025 16:51:33 +0100 Subject: [PATCH 6/7] fix: mean to median. Signed-off-by: Bart Pleiter --- openstef/data_classes/prediction_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openstef/data_classes/prediction_job.py b/openstef/data_classes/prediction_job.py index a57751b4b..4a8bfdd42 100644 --- a/openstef/data_classes/prediction_job.py +++ b/openstef/data_classes/prediction_job.py @@ -107,7 +107,7 @@ class PredictionJobDataClass(BaseModel): ) predict_non_zero_flatliner: bool = Field( False, - description="If True, the flatliner model predicts the mean of the load measurements instead of zero.", + description="If True, the flatliner model predicts the median of the load measurements instead of zero.", ) data_balancing_ratio: Optional[float] = Field( None, From b732cf6b543dec481f823bd2cd70f5dc1939f1c8 Mon Sep 17 00:00:00 2001 From: Bart Pleiter Date: Tue, 2 Dec 2025 17:01:50 +0100 Subject: [PATCH 7/7] Update openstef/model/regressors/flatliner.py Co-authored-by: Lars Schilders <123180911+lschilders@users.noreply.github.com> Signed-off-by: Bart Pleiter --- openstef/model/regressors/flatliner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openstef/model/regressors/flatliner.py b/openstef/model/regressors/flatliner.py index 7826bb592..af9dd63cb 100644 --- a/openstef/model/regressors/flatliner.py +++ b/openstef/model/regressors/flatliner.py @@ -19,7 +19,7 @@ def __init__(self, quantiles=None, predict_median: bool = False): """Initialize FlatlinerRegressor. The model always predicts a constant value, regardless of the input features. - The model is mediant to be used for flatliner locations that still expect a + The model is meant to be used for flatliner locations that still expect a prediction while preserving the prediction interface. Args: