From cfc630f3e20fb0a036f33ed1b6ffc71e21b7a61e Mon Sep 17 00:00:00 2001 From: Samuel Northover-Naylor Date: Fri, 6 Mar 2026 21:07:30 +0000 Subject: [PATCH] Add option to use site mean PC to form the PC for uplift --- tests/pp_analysis/__init__.py | 0 tests/pp_analysis/test_cook_pp.py | 370 ++++++++++++++++++++ tests/{ => pp_analysis}/test_pp_analysis.py | 13 +- wind_up/main_analysis.py | 1 + wind_up/models.py | 7 + wind_up/pp_analysis.py | 28 +- 6 files changed, 411 insertions(+), 8 deletions(-) create mode 100644 tests/pp_analysis/__init__.py create mode 100644 tests/pp_analysis/test_cook_pp.py rename tests/{ => pp_analysis}/test_pp_analysis.py (78%) diff --git a/tests/pp_analysis/__init__.py b/tests/pp_analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pp_analysis/test_cook_pp.py b/tests/pp_analysis/test_cook_pp.py new file mode 100644 index 0000000..85fa0ee --- /dev/null +++ b/tests/pp_analysis/test_cook_pp.py @@ -0,0 +1,370 @@ +"""Tests for _cook_pp in pp_analysis.py.""" + +from __future__ import annotations + +import numpy as np +import pandas as pd +import pytest + +from wind_up.constants import DataColumns +from wind_up.pp_analysis import _cook_pp + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +WS_BIN_WIDTH = 1.0 +RATED_POWER = 1300.0 # kW +CUTOUT_WS = 25.0 + + +def _make_pp_raw_df( + bin_mids: list[float], pw_means: list[float | None], hours: list[float], pre_or_post: str = "pre" +) -> pd.DataFrame: + """Build a minimal pp_df as would be produced by _pp_raw_df.""" + n = len(bin_mids) + assert len(pw_means) == n + assert len(hours) == n + + pw_col = f"pw_mean_{pre_or_post}" + ws_col = f"ws_mean_{pre_or_post}" + hours_col = f"hours_{pre_or_post}" + pw_std_col = f"pw_std_{pre_or_post}" + ws_std_col = f"ws_std_{pre_or_post}" + pw_sem_col = f"pw_sem_{pre_or_post}" + ws_sem_col = f"ws_sem_{pre_or_post}" + count_col = f"count_{pre_or_post}" + + counts = [max(1, round(h * 3600 / 600)) for h in hours] # assume 10-min timebase + + df = pd.DataFrame( + { + "bin_mid": bin_mids, + "bin_left": [m - WS_BIN_WIDTH / 2 for m in bin_mids], + "bin_right": [m + WS_BIN_WIDTH / 2 for m in bin_mids], + "bin_closed_right": [True] * n, + pw_col: pw_means, + ws_col: bin_mids, + hours_col: hours, + pw_std_col: [50.0 if p is not None else np.nan for p in pw_means], + ws_std_col: [0.2] * n, + count_col: counts, + pw_sem_col: [50.0 / max(1, c) ** 0.5 if p is not None else np.nan for p, c in zip(pw_means, counts)], + ws_sem_col: [0.2 / max(1, c) ** 0.5 for c in counts], + } + ) + return df.set_index("bin_mid", drop=False, verify_integrity=True) + + +def _make_site_mean_pc_df(bin_mids: list[float] | None = None, rated_power: float = RATED_POWER) -> pd.DataFrame: + """Build a simple site mean power curve DataFrame.""" + if bin_mids is None: + bin_mids = list(np.arange(0.5, CUTOUT_WS + 0.5, 1.0)) + + def _simple_pc(ws: float) -> float: + if ws < 3.0: + return 0.0 + if ws >= 12.0: + return rated_power + # linear ramp from 3 to 12 m/s + return rated_power * (ws - 3.0) / (12.0 - 3.0) + + pw_clipped = [_simple_pc(m) for m in bin_mids] + return pd.DataFrame({"bin_mid": bin_mids, DataColumns.wind_speed_mean: bin_mids, "pw_clipped": pw_clipped}) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def full_data_pp_df() -> pd.DataFrame: + """pp_df with enough data in every bin to be valid.""" + bin_mids = list(range(1, 26)) # 1..25 m/s + + # simple power curve: 0 below 3, ramp 3-12, rated above 12 + def pc(ws: float) -> float: + if ws < 3: + return 0.0 + if ws >= 12: + return RATED_POWER + return RATED_POWER * (ws - 3) / 9 + + pw_means = [pc(m) for m in bin_mids] + hours = [50.0] * len(bin_mids) # well above validity threshold + return _make_pp_raw_df(bin_mids, pw_means, hours) + + +@pytest.fixture +def sparse_high_ws_pp_df() -> pd.DataFrame: + """pp_df where data only reaches 700 kW (at ~8 m/s), sparse above that.""" + bin_mids = list(range(1, 26)) + + def pc(ws: float) -> float: + if ws < 3: + return 0.0 + if ws >= 8: + return 700.0 + return 700.0 * (ws - 3) / 5 + + pw_means = [pc(m) for m in bin_mids] + # bins above 8 m/s have insufficient data (below validity threshold) + hours = [50.0 if m <= 8 else 1.0 for m in bin_mids] + return _make_pp_raw_df(bin_mids, pw_means, hours) + + +# --------------------------------------------------------------------------- +# Basic output structure tests +# --------------------------------------------------------------------------- + + +class TestCookPpOutputStructure: + def test_returns_dataframe(self, full_data_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + assert isinstance(result, pd.DataFrame) + + def test_expected_columns_present(self, full_data_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + assert "pw_mean_pre" in result.columns + assert "pw_at_mid_pre" in result.columns + assert "pw_sem_at_mid_pre" in result.columns + assert "pre_valid" in result.columns + assert "pw_mean_pre_raw" in result.columns + + def test_does_not_mutate_input(self, full_data_pp_df: pd.DataFrame) -> None: + original = full_data_pp_df.copy() + _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + pd.testing.assert_frame_equal(full_data_pp_df, original) + + def test_no_nans_in_output(self, full_data_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + key_cols = ["pw_mean_pre", "pw_at_mid_pre", "pw_sem_at_mid_pre"] + assert not result[key_cols].isna().any().any() + + +# --------------------------------------------------------------------------- +# Validity flagging tests +# --------------------------------------------------------------------------- + + +class TestValidityFlagging: + def test_bins_with_sufficient_hours_are_valid(self, full_data_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + # With 50 hours per bin and bin_width=1, threshold is 3 hrs -> all valid + assert result["pre_valid"].all() + + def test_bins_below_hours_threshold_are_invalid(self, sparse_high_ws_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + sparse_high_ws_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + # bins above 8 m/s have only 1 hour (< 3 hour threshold) + assert not result.loc[result["bin_mid"] > 8, "pre_valid"].any() + + def test_raw_pw_col_preserves_original_values(self, sparse_high_ws_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + sparse_high_ws_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + # raw column should be unchanged even where bins are invalid + assert result["pw_mean_pre_raw"].notna().any() + + +# --------------------------------------------------------------------------- +# clip_to_rated tests +# --------------------------------------------------------------------------- + + +class TestClipToRated: + def test_clip_to_rated_caps_pw_mean(self, full_data_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=True, + ) + assert (result["pw_mean_pre"] <= RATED_POWER).all() + + def test_clip_to_rated_caps_pw_at_mid(self, full_data_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=True, + ) + assert (result["pw_at_mid_pre"] <= RATED_POWER).all() + + def test_no_clip_allows_values_at_rated(self, full_data_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + assert (result["pw_at_mid_pre"] >= 0).all() + + def test_power_clipped_to_zero_at_low_ws(self, full_data_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + assert (result["pw_mean_pre"] >= 0).all() + + +# --------------------------------------------------------------------------- +# site_mean_pc_df gap-filling tests +# --------------------------------------------------------------------------- + + +class TestSiteMeanPcGapFilling: + def test_without_site_mean_invalid_bins_clip_at_max_measured(self, sparse_high_ws_pp_df: pd.DataFrame) -> None: + result = _cook_pp( + sparse_high_ws_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + # Without site mean curve, high-ws bins should be filled at ~700 kW + high_ws_pw = result.loc[result["bin_mid"] > 12, "pw_at_mid_pre"] + assert (high_ws_pw <= 700.0 * 1.05).all() # allow small tolerance + + def test_with_site_mean_invalid_bins_reach_rated_power(self, sparse_high_ws_pp_df: pd.DataFrame) -> None: + site_mean_pc_df = _make_site_mean_pc_df() + result = _cook_pp( + sparse_high_ws_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + site_mean_pc_df=site_mean_pc_df, + ) + # With site mean curve, bins at rated ws should reach ~1300 kW + high_ws_pw = result.loc[result["bin_mid"] >= 12, "pw_at_mid_pre"] + assert (high_ws_pw >= RATED_POWER * 0.95).all() + + def test_with_site_mean_valid_bins_are_unchanged(self, sparse_high_ws_pp_df: pd.DataFrame) -> None: + site_mean_pc_df = _make_site_mean_pc_df() + result_with = _cook_pp( + sparse_high_ws_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + site_mean_pc_df=site_mean_pc_df, + ) + result_without = _cook_pp( + sparse_high_ws_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + site_mean_pc_df=None, + ) + # valid bins (<=8 m/s) should be identical regardless of site_mean_pc_df + valid_mask = result_with["pre_valid"] + pd.testing.assert_series_equal( + result_with.loc[valid_mask, "pw_at_mid_pre"], + result_without.loc[valid_mask, "pw_at_mid_pre"], + ) + + def test_site_mean_none_behaviour_unchanged(self, full_data_pp_df: pd.DataFrame) -> None: + result_none = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + site_mean_pc_df=None, + ) + result_no_arg = _cook_pp( + full_data_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + pd.testing.assert_frame_equal(result_none, result_no_arg) + + def test_power_curve_monotonically_non_decreasing_with_site_mean(self, sparse_high_ws_pp_df: pd.DataFrame) -> None: + site_mean_pc_df = _make_site_mean_pc_df() + result = _cook_pp( + sparse_high_ws_pp_df, + pre_or_post="pre", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + site_mean_pc_df=site_mean_pc_df, + ) + pw = result["pw_at_mid_pre"].to_numpy() + # allow tiny floating point diffs + diffs = np.diff(pw) + assert (diffs >= -1.0).all(), f"Power curve decreased by more than 1 kW: {diffs.min():.2f}" + + +# --------------------------------------------------------------------------- +# post period tests +# --------------------------------------------------------------------------- + + +class TestPostPeriod: + def test_post_period_columns_named_correctly(self) -> None: + post_df = _make_pp_raw_df( + bin_mids=list(range(1, 26)), + pw_means=[min(RATED_POWER, max(0, RATED_POWER * (m - 3) / 9)) for m in range(1, 26)], + hours=[50.0] * 25, + pre_or_post="post", + ) + result = _cook_pp( + post_df, + pre_or_post="post", + ws_bin_width=WS_BIN_WIDTH, + rated_power=RATED_POWER, + clip_to_rated=False, + ) + assert "pw_mean_post" in result.columns + assert "pw_at_mid_post" in result.columns + assert "post_valid" in result.columns diff --git a/tests/test_pp_analysis.py b/tests/pp_analysis/test_pp_analysis.py similarity index 78% rename from tests/test_pp_analysis.py rename to tests/pp_analysis/test_pp_analysis.py index f970b2c..cb43c91 100644 --- a/tests/test_pp_analysis.py +++ b/tests/pp_analysis/test_pp_analysis.py @@ -1,9 +1,8 @@ -from pathlib import Path - import pandas as pd import pytest from pandas.testing import assert_frame_equal +from tests.conftest import TEST_DATA_FLD from wind_up.models import WindUpConfig from wind_up.pp_analysis import _pre_post_pp_analysis_with_reversal @@ -17,11 +16,11 @@ def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) - test_pw_col = "test_pw_clipped" ref_wd_col = "ref_YawAngleMean" - pre_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_LSA_T12_pre_df.parquet") - post_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_LSA_T12_post_df.parquet") - lt_wtg_df_filt = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_lt_wtg_df_filt.parquet") - test_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/LSA_T13_test_df.parquet") - expected_df = pd.read_parquet(Path(__file__).parents[0] / "test_data/pre_post_pp_analysis_expected_df.parquet") + pre_df = pd.read_parquet(TEST_DATA_FLD / "LSA_T13_LSA_T12_pre_df.parquet") + post_df = pd.read_parquet(TEST_DATA_FLD / "LSA_T13_LSA_T12_post_df.parquet") + lt_wtg_df_filt = pd.read_parquet(TEST_DATA_FLD / "LSA_T13_lt_wtg_df_filt.parquet") + test_df = pd.read_parquet(TEST_DATA_FLD / "LSA_T13_test_df.parquet") + expected_df = pd.read_parquet(TEST_DATA_FLD / "pre_post_pp_analysis_expected_df.parquet") pp_results, actual_df = _pre_post_pp_analysis_with_reversal( cfg=cfg, test_wtg=test_wtg, diff --git a/wind_up/main_analysis.py b/wind_up/main_analysis.py index 07c6cec..a3870c6 100644 --- a/wind_up/main_analysis.py +++ b/wind_up/main_analysis.py @@ -724,6 +724,7 @@ def _calc_test_ref_results( plot_cfg=plot_cfg, test_df=test_df, random_seed=random_seed, + site_mean_pc_df=scada_pc if cfg.gapfill_uplift_curve_using_site_mean_power_curve else None, ) other_results = ref_info | { diff --git a/wind_up/models.py b/wind_up/models.py index e630eb3..23ed55a 100644 --- a/wind_up/models.py +++ b/wind_up/models.py @@ -311,6 +311,13 @@ class WindUpConfig(BaseModel): prepost: PrePost | None = None clip_rated_power_pp: bool = Field(default=True, description="Clip rated power in power performance analysis") use_rated_invalid_bins: bool = Field(default=False, description="Use rated power bins which have been filled in") + gapfill_uplift_curve_using_site_mean_power_curve: bool = Field( + default=False, + description=( + "Whether to gapfill the uplift curve using the site mean power curve when there are not enough data points " + "to calculate an uplift value for a given power bin. The IEC standard approach is setting this to `False`." + ), + ) @model_validator(mode="after") def _check_years_offset_for_pre_period(self: WindUpConfig) -> WindUpConfig: diff --git a/wind_up/pp_analysis.py b/wind_up/pp_analysis.py index b75a1ee..37dde0e 100644 --- a/wind_up/pp_analysis.py +++ b/wind_up/pp_analysis.py @@ -67,7 +67,13 @@ def _calc_rated_ws(*, pp_df: pd.DataFrame, pw_col: str, rated_power: float) -> f def _cook_pp( - pp_df: pd.DataFrame, *, pre_or_post: str, ws_bin_width: float, rated_power: float, clip_to_rated: bool + pp_df: pd.DataFrame, + *, + pre_or_post: str, + ws_bin_width: float, + rated_power: float, + clip_to_rated: bool, + site_mean_pc_df: pd.DataFrame | None = None, ) -> pd.DataFrame: pp_df = pp_df.copy() @@ -104,6 +110,13 @@ def _cook_pp( (pp_df["bin_mid"] >= rated_ws) & ~pp_df[pw_col].isna(), pw_col ].iloc[-1] pp_df.loc[(pp_df["bin_mid"] >= rated_ws) & pp_df[pw_col].isna(), pw_col] = empty_rated_bins_fill_value + + # For bins with insufficient data, fill power with site mean power curve if provided + if site_mean_pc_df is not None: + pp_df.loc[~pp_df[valid_col], pw_col] = np.interp( + pp_df.loc[~pp_df[valid_col], "bin_mid"], site_mean_pc_df["bin_mid"], site_mean_pc_df["pw_clipped"] + ) + pp_df[pw_sem_col] = pp_df[pw_sem_col].ffill() # missing data at low wind speed can be filled with 0 @@ -168,6 +181,7 @@ def _pre_post_pp_analysis( confidence_level: float = 0.9, test_df: pd.DataFrame | None = None, reverse: bool = False, + site_mean_pc_df: pd.DataFrame | None = None, ) -> tuple[dict, pd.DataFrame]: wtg_for_turbine_type = test_wtg test_name = test_wtg.name @@ -196,6 +210,7 @@ def _pre_post_pp_analysis( ws_bin_width=cfg.ws_bin_width, rated_power=rated_power, clip_to_rated=cfg.clip_rated_power_pp, + site_mean_pc_df=site_mean_pc_df, ) post_pp_df = _cook_pp( pp_df=post_pp_df, @@ -203,6 +218,7 @@ def _pre_post_pp_analysis( ws_bin_width=cfg.ws_bin_width, rated_power=rated_power, clip_to_rated=cfg.clip_rated_power_pp, + site_mean_pc_df=site_mean_pc_df, ) pp_df = pre_pp_df.merge( post_pp_df[[x for x in post_pp_df.columns if x not in pre_pp_df.columns]], @@ -339,6 +355,7 @@ def _calc_power_only_and_reversed_uplifts( pw_col: str, wd_col: str, confidence_level: float = 0.9, + site_mean_pc_df: pd.DataFrame | None = None, ) -> tuple[float, float]: # calculate power only forward result pre_power_only = pre_df.copy() @@ -361,6 +378,7 @@ def _calc_power_only_and_reversed_uplifts( wd_col=wd_col, plot_cfg=None, confidence_level=confidence_level, + site_mean_pc_df=site_mean_pc_df, ) # need to predict the reference wind speed using the test wind speed for reverse analysis @@ -383,6 +401,7 @@ def _calc_power_only_and_reversed_uplifts( plot_cfg=None, confidence_level=confidence_level, reverse=True, + site_mean_pc_df=site_mean_pc_df, ) poweronly_uplift_frc = power_only_results["uplift_frc"] @@ -404,6 +423,7 @@ def _pre_post_pp_analysis_with_reversal( plot_cfg: PlotConfig | None, confidence_level: float = 0.9, test_df: pd.DataFrame | None = None, + site_mean_pc_df: pd.DataFrame | None = None, ) -> tuple[dict, pd.DataFrame]: pp_results, pp_df = _pre_post_pp_analysis( cfg=cfg, @@ -418,6 +438,7 @@ def _pre_post_pp_analysis_with_reversal( plot_cfg=plot_cfg, confidence_level=confidence_level, test_df=test_df, + site_mean_pc_df=site_mean_pc_df, ) if test_wtg.name == ref_name: @@ -435,6 +456,7 @@ def _pre_post_pp_analysis_with_reversal( pw_col=pw_col, wd_col=wd_col, confidence_level=confidence_level, + site_mean_pc_df=site_mean_pc_df, ) reversal_error = reversed_uplift_frc - poweronly_uplift_frc if plot_cfg is not None: @@ -477,6 +499,7 @@ def pre_post_pp_analysis_with_reversal_and_bootstrapping( random_seed: int, confidence_level: float = 0.9, test_df: pd.DataFrame | None = None, + site_mean_pc_df: pd.DataFrame | None = None, ) -> tuple[dict, pd.DataFrame]: """Perform pre-post analysis with reversal and block bootstrapping uncertainty analysis. @@ -493,6 +516,7 @@ def pre_post_pp_analysis_with_reversal_and_bootstrapping( :param random_seed: random seed for reproducibility :param confidence_level: confidence level :param test_df: test data DataFrame + :param site_mean_pc_df: df containing site mean power curve, used for filling invalid bins above rated wind speed :return: tuple of results dictionary and DataFrame """ pp_results, pp_df = _pre_post_pp_analysis_with_reversal( @@ -507,6 +531,7 @@ def pre_post_pp_analysis_with_reversal_and_bootstrapping( wd_col=wd_col, plot_cfg=plot_cfg, test_df=test_df, + site_mean_pc_df=site_mean_pc_df, ) pre_df_dropna = pre_df.dropna(subset=[ws_col, pw_col, wd_col]) @@ -550,6 +575,7 @@ def pre_post_pp_analysis_with_reversal_and_bootstrapping( pw_col=pw_col, wd_col=wd_col, plot_cfg=None, + site_mean_pc_df=site_mean_pc_df, ) bootstrapped_uplifts[n] = sample_results["uplift_frc"] except RuntimeError: