diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..2610837e --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,5 @@ +# Apply Black formatting to soiling.py +e7ae5c7ac3e7ab613239165106e769ac6e683603 + +# Apply Black formatting to soiling_test.py +1a34ca430702ee7c4eea9b748d8f5b2b00759722 diff --git a/rdtools/soiling.py b/rdtools/soiling.py index 9a49ed9a..3ccba639 100644 --- a/rdtools/soiling.py +++ b/rdtools/soiling.py @@ -1,10 +1,11 @@ -''' +""" Functions for calculating soiling metrics from photovoltaic system data. The soiling module is currently experimental. The API, results, and default behaviors may change in future releases (including MINOR and PATCH releases) as the code matures. -''' +""" + from rdtools import degradation as RdToolsDeg from rdtools.bootstrap import _make_time_series_bootstrap_samples @@ -22,23 +23,26 @@ from statsmodels.tsa.seasonal import STL from statsmodels.tsa.stattools import adfuller import statsmodels.api as sm + lowess = sm.nonparametric.lowess warnings.warn( - 'The soiling module is currently experimental. The API, results, ' - 'and default behaviors may change in future releases (including MINOR ' - 'and PATCH releases) as the code matures.', stacklevel=2 + "The soiling module is currently experimental. The API, results, " + "and default behaviors may change in future releases (including MINOR " + "and PATCH releases) as the code matures.", + stacklevel=2, ) # Custom exception class NoValidIntervalError(Exception): - '''raised when no valid rows appear in the result dataframe''' + """raised when no valid rows appear in the result dataframe""" + pass -class SRRAnalysis(): - ''' +class SRRAnalysis: + """ Class for running the stochastic rate and recovery (SRR) photovoltaic soiling loss analysis presented in Deceglie et al. JPV 8(2) p547 2018 @@ -55,10 +59,9 @@ class SRRAnalysis(): precipitation_daily : pandas.Series, default None Daily total precipitation. (Ignored if ``clean_criterion='shift'`` in subsequent calculations.) - ''' + """ - def __init__(self, energy_normalized_daily, insolation_daily, - precipitation_daily=None): + def __init__(self, energy_normalized_daily, insolation_daily, precipitation_daily=None): self.pm = energy_normalized_daily # daily performance metric self.insolation_daily = insolation_daily self.precipitation_daily = precipitation_daily # daily precipitation @@ -66,23 +69,26 @@ def __init__(self, energy_normalized_daily, insolation_daily, # insolation-weighted soiling ratios in _calc_monte: self.monte_losses = [] - if pd.infer_freq(self.pm.index) != 'D': - raise ValueError('Daily performance metric series must have ' - 'daily frequency') + if pd.infer_freq(self.pm.index) != "D": + raise ValueError("Daily performance metric series must have " "daily frequency") - if pd.infer_freq(self.insolation_daily.index) != 'D': - raise ValueError('Daily insolation series must have ' - 'daily frequency') + if pd.infer_freq(self.insolation_daily.index) != "D": + raise ValueError("Daily insolation series must have " "daily frequency") if self.precipitation_daily is not None: - if pd.infer_freq(self.precipitation_daily.index) != 'D': - raise ValueError('Precipitation series must have ' - 'daily frequency') - - def _calc_daily_df(self, day_scale=13, clean_threshold='infer', - recenter=True, clean_criterion='shift', precip_threshold=0.01, - outlier_factor=1.5): - ''' + if pd.infer_freq(self.precipitation_daily.index) != "D": + raise ValueError("Precipitation series must have " "daily frequency") + + def _calc_daily_df( + self, + day_scale=13, + clean_threshold="infer", + recenter=True, + clean_criterion="shift", + precip_threshold=0.01, + outlier_factor=1.5, + ): + """ Calculates self.daily_df, a pandas dataframe prepared for SRR analysis, and self.renorm_factor, the renormalization factor for the daily performance @@ -118,27 +124,29 @@ def _calc_daily_df(self, day_scale=13, clean_threshold='infer', The factor used in the Tukey fence definition of outliers for flagging positive shifts in the rolling median used for cleaning detection. A smaller value will cause more and smaller shifts to be classified as cleaning events. - ''' - if (day_scale % 2 == 0) and ('shift' in clean_criterion): - warnings.warn('An even value of day_scale was passed. An odd value is ' - 'recommended, otherwise, consecutive days may be erroneously ' - 'flagged as cleaning events. ' - 'See https://github.com/NatLabRockies/rdtools/issues/189', - stacklevel=2) + """ + if (day_scale % 2 == 0) and ("shift" in clean_criterion): + warnings.warn( + "An even value of day_scale was passed. An odd value is " + "recommended, otherwise, consecutive days may be erroneously " + "flagged as cleaning events. " + "See https://github.com/NatLabRockies/rdtools/issues/189", + stacklevel=2, + ) df = self.pm.to_frame() - df.columns = ['pi'] + df.columns = ["pi"] df_insol = self.insolation_daily.to_frame() - df_insol.columns = ['insol'] + df_insol.columns = ["insol"] df = df.join(df_insol) precip = self.precipitation_daily if precip is not None: df_precip = precip.to_frame() - df_precip.columns = ['precip'] + df_precip.columns = ["precip"] df = df.join(df_precip) else: - df['precip'] = 0 + df["precip"] = 0 # find first and last valid data point start = df[~df.pi.isnull()].index[0] @@ -146,22 +154,22 @@ def _calc_daily_df(self, day_scale=13, clean_threshold='infer', df = df[start:end] # create a day count column - df['day'] = range(len(df)) + df["day"] = range(len(df)) # Recenter to median of first year, as in YoY degradation if recenter: - oneyear = start + pd.Timedelta('364d') - renorm = df.loc[start:oneyear, 'pi'].median() + oneyear = start + pd.Timedelta("364d") + renorm = df.loc[start:oneyear, "pi"].median() else: renorm = 1 - df['pi_norm'] = df['pi'] / renorm + df["pi_norm"] = df["pi"] / renorm # Find the beginning and ends of outages longer than dayscale - bfill = df['pi_norm'].bfill(limit=day_scale) - ffill = df['pi_norm'].ffill(limit=day_scale) - out_start = (~df['pi_norm'].isnull() & bfill.shift(-1).isnull()) - out_end = (~df['pi_norm'].isnull() & ffill.shift(1).isnull()) + bfill = df["pi_norm"].bfill(limit=day_scale) + ffill = df["pi_norm"].ffill(limit=day_scale) + out_start = ~df["pi_norm"].isnull() & bfill.shift(-1).isnull() + out_end = ~df["pi_norm"].isnull() & ffill.shift(1).isnull() # clean up the first and last elements out_start.iloc[-1] = False @@ -172,50 +180,58 @@ def _calc_daily_df(self, day_scale=13, clean_threshold='infer', df_ffill = df.ffill(limit=day_scale).copy() # Calculate rolling median - df['pi_roll_med'] = \ - df_ffill.pi_norm.rolling(day_scale, center=True).median() + df["pi_roll_med"] = df_ffill.pi_norm.rolling(day_scale, center=True).median() # Detect steps in rolling median - df['delta'] = df.pi_roll_med.diff() - if clean_threshold == 'infer': + df["delta"] = df.pi_roll_med.diff() + if clean_threshold == "infer": deltas = abs(df.delta) - clean_threshold = deltas.quantile(0.75) + \ - outlier_factor * (deltas.quantile(0.75) - deltas.quantile(0.25)) + clean_threshold = deltas.quantile(0.75) + outlier_factor * ( + deltas.quantile(0.75) - deltas.quantile(0.25) + ) - df['clean_event_detected'] = (df.delta > clean_threshold) - precip_event = (df['precip'] > precip_threshold) + df["clean_event_detected"] = df.delta > clean_threshold + precip_event = df["precip"] > precip_threshold - if clean_criterion == 'precip_and_shift': + if clean_criterion == "precip_and_shift": # Detect which cleaning events are associated with rain # within a 3 day window - precip_event = precip_event.rolling( - 3, center=True, min_periods=1).apply(any).astype(bool) - df['clean_event'] = (df['clean_event_detected'] & precip_event) - elif clean_criterion == 'precip_or_shift': - df['clean_event'] = (df['clean_event_detected'] | precip_event) - elif clean_criterion == 'precip': - df['clean_event'] = precip_event - elif clean_criterion == 'shift': - df['clean_event'] = df['clean_event_detected'] + precip_event = ( + precip_event.rolling(3, center=True, min_periods=1).apply(any).astype(bool) + ) + df["clean_event"] = df["clean_event_detected"] & precip_event + elif clean_criterion == "precip_or_shift": + df["clean_event"] = df["clean_event_detected"] | precip_event + elif clean_criterion == "precip": + df["clean_event"] = precip_event + elif clean_criterion == "shift": + df["clean_event"] = df["clean_event_detected"] else: - raise ValueError('clean_criterion must be one of ' - '{"precip_and_shift", "precip_or_shift", ' - '"precip", "shift"}') + raise ValueError( + "clean_criterion must be one of " + '{"precip_and_shift", "precip_or_shift", ' + '"precip", "shift"}' + ) - df['clean_event'] = df.clean_event | out_start | out_end + df["clean_event"] = df.clean_event | out_start | out_end df = df.fillna(0) # Give an index to each soiling interval/run - df['run'] = df.clean_event.cumsum() - df.index.name = 'date' # this gets used by name + df["run"] = df.clean_event.cumsum() + df.index.name = "date" # this gets used by name self.renorm_factor = renorm self.daily_df = df - def _calc_result_df(self, trim=False, max_relative_slope_error=500.0, - max_negative_step=0.05, min_interval_length=7): - ''' + def _calc_result_df( + self, + trim=False, + max_relative_slope_error=500.0, + max_negative_step=0.05, + min_interval_length=7, + ): + """ Calculates self.result_df, a pandas dataframe summarizing the soiling intervals identified and self.analyzed_daily_df, a version of self.daily_df with additional columns calculated during analysis. @@ -235,21 +251,21 @@ def _calc_result_df(self, trim=False, max_relative_slope_error=500.0, min_interval_length : int, default 7 The minimum duration for an interval to be considered valid. Cannot be less than 2 (days). - ''' + """ daily_df = self.daily_df result_list = [] if trim: # ignore first and last interval - res_loop = sorted(list(set(daily_df['run'])))[1:-1] + res_loop = sorted(list(set(daily_df["run"])))[1:-1] else: - res_loop = sorted(list(set(daily_df['run']))) + res_loop = sorted(list(set(daily_df["run"]))) for r in res_loop: - run = daily_df[daily_df['run'] == r] - length = (run['day'].iloc[-1] - run['day'].iloc[0]) - start_day = run['day'].iloc[0] - end_day = run['day'].iloc[-1] + run = daily_df[daily_df["run"] == r] + length = run["day"].iloc[-1] - run["day"].iloc[0] + start_day = run["day"].iloc[0] + end_day = run["day"].iloc[-1] start = run.index[0] end = run.index[-1] run_filtered = run[run.pi_norm > 0] @@ -259,97 +275,95 @@ def _calc_result_df(self, trim=False, max_relative_slope_error=500.0, if not run_filtered.empty: run = run_filtered result_dict = { - 'start': start, - 'end': end, - 'length': length, - 'run': r, - 'run_slope': 0, - 'run_slope_low': 0, - 'run_slope_high': 0, - 'max_neg_step': min(run.delta), - 'start_loss': 1, - 'inferred_start_loss': run.pi_norm.mean(), - 'inferred_end_loss': run.pi_norm.mean(), - 'valid': False + "start": start, + "end": end, + "length": length, + "run": r, + "run_slope": 0, + "run_slope_low": 0, + "run_slope_high": 0, + "max_neg_step": min(run.delta), + "start_loss": 1, + "inferred_start_loss": run.pi_norm.mean(), + "inferred_end_loss": run.pi_norm.mean(), + "valid": False, } if len(run) > min_interval_length and run.pi_norm.sum() > 0: fit = theilslopes(run.pi_norm, run.day) fit_poly = np.poly1d(fit[0:2]) - result_dict['run_slope'] = fit[0] - result_dict['run_slope_low'] = fit[2] - result_dict['run_slope_high'] = min([0.0, fit[3]]) - result_dict['inferred_start_loss'] = fit_poly(start_day) - result_dict['inferred_end_loss'] = fit_poly(end_day) - result_dict['valid'] = True + result_dict["run_slope"] = fit[0] + result_dict["run_slope_low"] = fit[2] + result_dict["run_slope_high"] = min([0.0, fit[3]]) + result_dict["inferred_start_loss"] = fit_poly(start_day) + result_dict["inferred_end_loss"] = fit_poly(end_day) + result_dict["valid"] = True result_list.append(result_dict) results = pd.DataFrame(result_list) if results.empty: - raise NoValidIntervalError('No valid soiling intervals were found') + raise NoValidIntervalError("No valid soiling intervals were found") # Filter results for each interval, # setting invalid interval to slope of 0 - results['slope_err'] = ( - results.run_slope_high - results.run_slope_low)\ - / abs(results.run_slope) + results["slope_err"] = (results.run_slope_high - results.run_slope_low) / abs( + results.run_slope + ) # critera for exclusions filt = ( - (results.run_slope > 0) | - (results.slope_err >= max_relative_slope_error / 100.0) | - (results.max_neg_step <= -1.0 * max_negative_step) + (results.run_slope > 0) + | (results.slope_err >= max_relative_slope_error / 100.0) + | (results.max_neg_step <= -1.0 * max_negative_step) ) - results.loc[filt, 'run_slope'] = 0 - results.loc[filt, 'run_slope_low'] = 0 - results.loc[filt, 'run_slope_high'] = 0 - results.loc[filt, 'valid'] = False + results.loc[filt, "run_slope"] = 0 + results.loc[filt, "run_slope_low"] = 0 + results.loc[filt, "run_slope_high"] = 0 + results.loc[filt, "valid"] = False # Calculate the next inferred start loss from next valid interval - results['next_inferred_start_loss'] = np.clip( - results[results.valid].inferred_start_loss.shift(-1), - 0, 1) + results["next_inferred_start_loss"] = np.clip( + results[results.valid].inferred_start_loss.shift(-1), 0, 1 + ) # Calculate the inferred recovery at the end of each interval - results['inferred_recovery'] = np.clip( - results.next_inferred_start_loss - results.inferred_end_loss, - 0, 1) + results["inferred_recovery"] = np.clip( + results.next_inferred_start_loss - results.inferred_end_loss, 0, 1 + ) if len(results[results.valid]) == 0: - raise NoValidIntervalError('No valid soiling intervals were found') + raise NoValidIntervalError("No valid soiling intervals were found") new_start = results.start.iloc[0] new_end = results.end.iloc[-1] pm_frame_out = daily_df[new_start:new_end] - pm_frame_out = pm_frame_out.reset_index() \ - .merge(results, how='left', on='run') \ - .set_index('date') + pm_frame_out = ( + pm_frame_out.reset_index().merge(results, how="left", on="run").set_index("date") + ) - pm_frame_out['loss_perfect_clean'] = np.nan - pm_frame_out['loss_inferred_clean'] = np.nan - pm_frame_out['days_since_clean'] = \ - (pm_frame_out.index - pm_frame_out.start).dt.days + pm_frame_out["loss_perfect_clean"] = np.nan + pm_frame_out["loss_inferred_clean"] = np.nan + pm_frame_out["days_since_clean"] = (pm_frame_out.index - pm_frame_out.start).dt.days # Calculate the daily derate - pm_frame_out['loss_perfect_clean'] = \ - pm_frame_out.start_loss + \ - pm_frame_out.days_since_clean * pm_frame_out.run_slope + pm_frame_out["loss_perfect_clean"] = ( + pm_frame_out.start_loss + pm_frame_out.days_since_clean * pm_frame_out.run_slope + ) # filling the flat intervals may need to be recalculated # for different assumptions - pm_frame_out.loss_perfect_clean = \ - pm_frame_out.loss_perfect_clean.fillna(1) + pm_frame_out.loss_perfect_clean = pm_frame_out.loss_perfect_clean.fillna(1) - pm_frame_out['loss_inferred_clean'] = \ - pm_frame_out.inferred_start_loss + \ - pm_frame_out.days_since_clean * pm_frame_out.run_slope + pm_frame_out["loss_inferred_clean"] = ( + pm_frame_out.inferred_start_loss + + pm_frame_out.days_since_clean * pm_frame_out.run_slope + ) # filling the flat intervals may need to be recalculated # for different assumptions - pm_frame_out.loss_inferred_clean = \ - pm_frame_out.loss_inferred_clean.fillna(1) + pm_frame_out.loss_inferred_clean = pm_frame_out.loss_inferred_clean.fillna(1) self.result_df = results self.analyzed_daily_df = pm_frame_out - def _calc_monte(self, monte, method='half_norm_clean'): - ''' + def _calc_monte(self, monte, method="half_norm_clean"): + """ Runs the Monte Carlo step of the SRR method. Calculates self.random_profiles, a list of the random soiling profiles realized in the calculation, and self.monte_losses, a list of the @@ -371,36 +385,36 @@ def _calc_monte(self, monte, method='half_norm_clean'): mode (mu) at 1 and its sigma equal to 1/3 * (1-b) where b is the intercept of the fit to the interval. - ''' + """ # Raise a warning if there is >20% invalid data - if (method == 'half_norm_clean') or (method == 'random_clean'): - valid_fraction = self.analyzed_daily_df['valid'].mean() + if (method == "half_norm_clean") or (method == "random_clean"): + valid_fraction = self.analyzed_daily_df["valid"].mean() if valid_fraction <= 0.8: - warnings.warn('20% or more of the daily data is assigned to invalid soiling ' - 'intervals. This can be problematic with the "half_norm_clean" ' - 'and "random_clean" cleaning assumptions. Consider more permissive ' - 'validity criteria such as increasing "max_relative_slope_error" ' - 'and/or "max_negative_step" and/or decreasing "min_interval_length".' - ' Alternatively, consider using method="perfect_clean". For more' - ' info see https://github.com/NatLabRockies/rdtools/issues/272', - stacklevel=2 - ) + warnings.warn( + "20% or more of the daily data is assigned to invalid soiling " + 'intervals. This can be problematic with the "half_norm_clean" ' + 'and "random_clean" cleaning assumptions. Consider more permissive ' + 'validity criteria such as increasing "max_relative_slope_error" ' + 'and/or "max_negative_step" and/or decreasing "min_interval_length".' + ' Alternatively, consider using method="perfect_clean". For more' + " info see https://github.com/NatLabRockies/rdtools/issues/272", + stacklevel=2, + ) monte_losses = [] random_profiles = [] for _ in range(monte): results_rand = self.result_df.copy() df_rand = self.analyzed_daily_df.copy() # only really need this column from the original frame: - df_rand = df_rand[['insol', 'run']] - results_rand['run_slope'] = \ - np.random.uniform(results_rand.run_slope_low, - results_rand.run_slope_high) - results_rand['run_loss'] = \ - results_rand.run_slope * results_rand.length + df_rand = df_rand[["insol", "run"]] + results_rand["run_slope"] = np.random.uniform( + results_rand.run_slope_low, results_rand.run_slope_high + ) + results_rand["run_loss"] = results_rand.run_slope * results_rand.length - results_rand['end_loss'] = np.nan - results_rand['start_loss'] = np.nan + results_rand["end_loss"] = np.nan + results_rand["start_loss"] = np.nan # Make groups that start with a valid interval and contain # subsequent invalid intervals @@ -411,16 +425,17 @@ def _calc_monte(self, monte, method='half_norm_clean'): group += 1 group_list.append(group) - results_rand['group'] = group_list + results_rand["group"] = group_list # randomize the extent of the cleaning inter_start = 1.0 start_list = [] - if (method == 'half_norm_clean') or (method == 'random_clean'): + if (method == "half_norm_clean") or (method == "random_clean"): # Randomize recovery of valid intervals only valid_intervals = results_rand[results_rand.valid].copy() - valid_intervals['inferred_recovery'] = \ - valid_intervals.inferred_recovery.fillna(1.0) + valid_intervals["inferred_recovery"] = valid_intervals.inferred_recovery.fillna( + 1.0 + ) end_list = [] for i, row in valid_intervals.iterrows(): @@ -428,36 +443,32 @@ def _calc_monte(self, monte, method='half_norm_clean'): end = inter_start + row.run_loss end_list.append(end) - if method == 'half_norm_clean': + if method == "half_norm_clean": # Use a half normal with the inferred clean at the # 3sigma point x = np.clip(end + row.inferred_recovery, 0, 1) inter_start = 1 - abs(np.random.normal(0.0, (1 - x) / 3)) - elif method == 'random_clean': + elif method == "random_clean": inter_start = np.random.uniform(end, 1) # Update the valid rows in results_rand valid_update = pd.DataFrame() - valid_update['start_loss'] = start_list - valid_update['end_loss'] = end_list + valid_update["start_loss"] = start_list + valid_update["end_loss"] = end_list valid_update.index = valid_intervals.index results_rand.update(valid_update) # forward and back fill to note the limits of random constant # derate for invalid intervals - results_rand['previous_end'] = \ - results_rand.end_loss.ffill() - results_rand['next_start'] = \ - results_rand.start_loss.bfill() + results_rand["previous_end"] = results_rand.end_loss.ffill() + results_rand["next_start"] = results_rand.start_loss.bfill() # Randomly select random constant derate for invalid intervals # based on previous end and next beginning invalid_intervals = results_rand[~results_rand.valid].copy() # fill NaNs at beggining and end - invalid_intervals['previous_end'] = \ - invalid_intervals['previous_end'].fillna(1.0) - invalid_intervals['next_start'] = \ - invalid_intervals['next_start'].fillna(1.0) + invalid_intervals["previous_end"] = invalid_intervals["previous_end"].fillna(1.0) + invalid_intervals["next_start"] = invalid_intervals["next_start"].fillna(1.0) groups = set(invalid_intervals.group) replace_levels = [] @@ -473,49 +484,58 @@ def _calc_monte(self, monte, method='half_norm_clean'): # Update results rand with the invalid rows replace_levels = np.concatenate(replace_levels) invalid_update = pd.DataFrame() - invalid_update['start_loss'] = replace_levels + invalid_update["start_loss"] = replace_levels invalid_update.index = invalid_intervals.index results_rand.update(invalid_update) - elif method == 'perfect_clean': + elif method == "perfect_clean": for i, row in results_rand.iterrows(): start_list.append(inter_start) end = inter_start + row.run_loss inter_start = 1 - results_rand['start_loss'] = start_list + results_rand["start_loss"] = start_list else: raise ValueError("Invalid method specification") - df_rand = df_rand.reset_index() \ - .merge(results_rand, how='left', on='run') \ - .set_index('date') - df_rand['loss'] = np.nan - df_rand['days_since_clean'] = \ - (df_rand.index - df_rand.start).dt.days - df_rand['loss'] = df_rand.start_loss + \ - df_rand.days_since_clean * df_rand.run_slope + df_rand = ( + df_rand.reset_index().merge(results_rand, how="left", on="run").set_index("date") + ) + df_rand["loss"] = np.nan + df_rand["days_since_clean"] = (df_rand.index - df_rand.start).dt.days + df_rand["loss"] = df_rand.start_loss + df_rand.days_since_clean * df_rand.run_slope - df_rand['soil_insol'] = df_rand.loss * df_rand.insol + df_rand["soil_insol"] = df_rand.loss * df_rand.insol soiling_ratio = ( - df_rand.soil_insol.sum() / df_rand.insol[ - ~df_rand.soil_insol.isnull()].sum() + df_rand.soil_insol.sum() / df_rand.insol[~df_rand.soil_insol.isnull()].sum() ) monte_losses.append(soiling_ratio) - random_profile = df_rand['loss'].copy() - random_profile.name = 'stochastic_soiling_profile' + random_profile = df_rand["loss"].copy() + random_profile.name = "stochastic_soiling_profile" random_profiles.append(random_profile) self.random_profiles = random_profiles self.monte_losses = monte_losses - def run(self, reps=1000, day_scale=13, clean_threshold='infer', - trim=False, method='half_norm_clean', - clean_criterion='shift', precip_threshold=0.01, min_interval_length=7, - exceedance_prob=95.0, confidence_level=68.2, recenter=True, - max_relative_slope_error=500.0, max_negative_step=0.05, outlier_factor=1.5): - ''' + def run( + self, + reps=1000, + day_scale=13, + clean_threshold="infer", + trim=False, + method="half_norm_clean", + clean_criterion="shift", + precip_threshold=0.01, + min_interval_length=7, + exceedance_prob=95.0, + confidence_level=68.2, + recenter=True, + max_relative_slope_error=500.0, + max_negative_step=0.05, + outlier_factor=1.5, + ): + """ Run the SRR method from beginning to end. Perform the stochastic rate and recovery soiling loss calculation. Based on the methods presented in Deceglie et al. "Quantifying Soiling Loss Directly From PV Yield" @@ -636,59 +656,87 @@ def run(self, reps=1000, day_scale=13, clean_threshold='infer', | | be treated as a valid soiling interval | +------------------------+----------------------------------------------+ - ''' - self._calc_daily_df(day_scale=day_scale, - clean_threshold=clean_threshold, - recenter=recenter, - clean_criterion=clean_criterion, - precip_threshold=precip_threshold, - outlier_factor=outlier_factor) - self._calc_result_df(trim=trim, - max_relative_slope_error=max_relative_slope_error, - max_negative_step=max_negative_step, - min_interval_length=min_interval_length) + """ + self._calc_daily_df( + day_scale=day_scale, + clean_threshold=clean_threshold, + recenter=recenter, + clean_criterion=clean_criterion, + precip_threshold=precip_threshold, + outlier_factor=outlier_factor, + ) + self._calc_result_df( + trim=trim, + max_relative_slope_error=max_relative_slope_error, + max_negative_step=max_negative_step, + min_interval_length=min_interval_length, + ) self._calc_monte(reps, method=method) # Calculate the P50 and confidence interval half_ci = confidence_level / 2.0 - result = np.percentile(self.monte_losses, - [50, - 50.0 - half_ci, - 50.0 + half_ci, - 100 - exceedance_prob]) + result = np.percentile( + self.monte_losses, [50, 50.0 - half_ci, 50.0 + half_ci, 100 - exceedance_prob] + ) P_level = result[3] # Construct calc_info output intervals_out = self.result_df[ - ['start', 'end', 'run_slope', 'run_slope_low', - 'run_slope_high', 'inferred_start_loss', 'inferred_end_loss', - 'length', 'valid']].copy() - intervals_out.rename(columns={'run_slope': 'soiling_rate', - 'run_slope_high': 'soiling_rate_high', - 'run_slope_low': 'soiling_rate_low', - }, inplace=True) + [ + "start", + "end", + "run_slope", + "run_slope_low", + "run_slope_high", + "inferred_start_loss", + "inferred_end_loss", + "length", + "valid", + ] + ].copy() + intervals_out.rename( + columns={ + "run_slope": "soiling_rate", + "run_slope_high": "soiling_rate_high", + "run_slope_low": "soiling_rate_low", + }, + inplace=True, + ) df_d = self.analyzed_daily_df - sr_perfect = df_d[df_d['valid']]['loss_perfect_clean'] + sr_perfect = df_d[df_d["valid"]]["loss_perfect_clean"] calc_info = { - 'exceedance_level': P_level, - 'renormalizing_factor': self.renorm_factor, - 'stochastic_soiling_profiles': self.random_profiles, - 'soiling_interval_summary': intervals_out, - 'soiling_ratio_perfect_clean': sr_perfect + "exceedance_level": P_level, + "renormalizing_factor": self.renorm_factor, + "stochastic_soiling_profiles": self.random_profiles, + "soiling_interval_summary": intervals_out, + "soiling_ratio_perfect_clean": sr_perfect, } return (result[0], result[1:3], calc_info) -def soiling_srr(energy_normalized_daily, insolation_daily, reps=1000, - precipitation_daily=None, day_scale=13, clean_threshold='infer', - trim=False, method='half_norm_clean', - clean_criterion='shift', precip_threshold=0.01, min_interval_length=7, - exceedance_prob=95.0, confidence_level=68.2, recenter=True, - max_relative_slope_error=500.0, max_negative_step=0.05, outlier_factor=1.5): - ''' +def soiling_srr( + energy_normalized_daily, + insolation_daily, + reps=1000, + precipitation_daily=None, + day_scale=13, + clean_threshold="infer", + trim=False, + method="half_norm_clean", + clean_criterion="shift", + precip_threshold=0.01, + min_interval_length=7, + exceedance_prob=95.0, + confidence_level=68.2, + recenter=True, + max_relative_slope_error=500.0, + max_negative_step=0.05, + outlier_factor=1.5, +): + """ Functional wrapper for :py:class:`~rdtools.soiling.SRRAnalysis`. Perform the stochastic rate and recovery soiling loss calculation. Based on the methods presented in Deceglie et al. JPV 8(2) p547 2018. @@ -818,11 +866,11 @@ def soiling_srr(energy_normalized_daily, insolation_daily, reps=1000, | 'valid' | Whether the interval meets the criteria to | | | be treated as a valid soiling interval | +------------------------+----------------------------------------------+ - ''' + """ - srr = SRRAnalysis(energy_normalized_daily, - insolation_daily, - precipitation_daily=precipitation_daily) + srr = SRRAnalysis( + energy_normalized_daily, insolation_daily, precipitation_daily=precipitation_daily + ) sr, sr_ci, soiling_info = srr.run( reps=reps, @@ -838,14 +886,15 @@ def soiling_srr(energy_normalized_daily, insolation_daily, reps=1000, recenter=recenter, max_relative_slope_error=max_relative_slope_error, max_negative_step=max_negative_step, - outlier_factor=outlier_factor) + outlier_factor=outlier_factor, + ) return sr, sr_ci, soiling_info def _count_month_days(start, end): - '''Return a dict of number of days between start and end - (inclusive) in each month''' + """Return a dict of number of days between start and end + (inclusive) in each month""" days = pd.date_range(start, end) months = [x.month for x in days] out_dict = {} @@ -855,9 +904,8 @@ def _count_month_days(start, end): return out_dict -def annual_soiling_ratios(stochastic_soiling_profiles, - insolation_daily, confidence_level=68.2): - ''' +def annual_soiling_ratios(stochastic_soiling_profiles, insolation_daily, confidence_level=68.2): + """ Return annualized soiling ratios and associated confidence intervals based on stochastic soiling profiles from SRR. Note that each year may be affected by previous years' profiles for all SRR cleaning @@ -897,7 +945,7 @@ def annual_soiling_ratios(stochastic_soiling_profiles, | | for insolation-weighted soiling ratio for | | | the year | +------------------------+-------------------------------------------+ - ''' + """ # Create a df with each realization as a column all_profiles = pd.concat(stochastic_soiling_profiles, axis=1) @@ -905,11 +953,12 @@ def annual_soiling_ratios(stochastic_soiling_profiles, if not all_profiles.index.isin(insolation_daily.index).all(): warnings.warn( - 'The indexes of stochastic_soiling_profiles are not entirely ' - 'contained within the index of insolation_daily. Every day in ' - 'stochastic_soiling_profiles should be represented in ' - 'insolation_daily. This may cause erroneous results.', - stacklevel=2) + "The indexes of stochastic_soiling_profiles are not entirely " + "contained within the index of insolation_daily. Every day in " + "stochastic_soiling_profiles should be represented in " + "insolation_daily. This may cause erroneous results.", + stacklevel=2, + ) insolation_daily = insolation_daily.reindex(all_profiles.index) @@ -917,30 +966,37 @@ def annual_soiling_ratios(stochastic_soiling_profiles, all_profiles_weighted = all_profiles.multiply(insolation_daily, axis=0) # Compute the insolation-weighted soiling ratio (IWSR) for each realization - annual_insolation = insolation_daily.groupby( - insolation_daily.index.year).sum() + annual_insolation = insolation_daily.groupby(insolation_daily.index.year).sum() all_annual_weighted_sums = all_profiles_weighted.groupby( - all_profiles_weighted.index.year).sum() - all_annual_iwsr = all_annual_weighted_sums.multiply( - 1/annual_insolation, axis=0) - - annual_soiling = pd.DataFrame({ - 'soiling_ratio_median': all_annual_iwsr.quantile(0.5, axis=1), - 'soiling_ratio_low': all_annual_iwsr.quantile( - 0.5 - confidence_level/2/100, axis=1), - 'soiling_ratio_high': all_annual_iwsr.quantile( - 0.5 + confidence_level/2/100, axis=1), - }) - annual_soiling.index.name = 'year' + all_profiles_weighted.index.year + ).sum() + all_annual_iwsr = all_annual_weighted_sums.multiply(1 / annual_insolation, axis=0) + + annual_soiling = pd.DataFrame( + { + "soiling_ratio_median": all_annual_iwsr.quantile(0.5, axis=1), + "soiling_ratio_low": all_annual_iwsr.quantile( + 0.5 - confidence_level / 2 / 100, axis=1 + ), + "soiling_ratio_high": all_annual_iwsr.quantile( + 0.5 + confidence_level / 2 / 100, axis=1 + ), + } + ) + annual_soiling.index.name = "year" annual_soiling = annual_soiling.reset_index() return annual_soiling -def monthly_soiling_rates(soiling_interval_summary, min_interval_length=14, - max_relative_slope_error=500.0, reps=100000, - confidence_level=68.2): - ''' +def monthly_soiling_rates( + soiling_interval_summary, + min_interval_length=14, + max_relative_slope_error=500.0, + reps=100000, + confidence_level=68.2, +): + """ Use Monte Carlo to calculate typical monthly soiling rates. Samples possible soiling rates from soiling rate confidence intervals associated with soiling intervals assuming a uniform @@ -1005,75 +1061,73 @@ def monthly_soiling_rates(soiling_interval_summary, min_interval_length=14, | | intervals contribute, the confidence interval | | | is likely to underestimate the true uncertainty. | +-----------------------+--------------------------------------------------+ - ''' + """ # filter to intervals of interest - high = soiling_interval_summary['soiling_rate_high'] - low = soiling_interval_summary['soiling_rate_low'] - rate = soiling_interval_summary['soiling_rate'] + high = soiling_interval_summary["soiling_rate_high"] + low = soiling_interval_summary["soiling_rate_low"] + rate = soiling_interval_summary["soiling_rate"] rel_error = 100 * abs((high - low) / rate) intervals = soiling_interval_summary[ - (soiling_interval_summary['length'] >= min_interval_length) & - (soiling_interval_summary['valid']) & - (rel_error <= max_relative_slope_error) + (soiling_interval_summary["length"] >= min_interval_length) + & (soiling_interval_summary["valid"]) + & (rel_error <= max_relative_slope_error) ].copy() # count the overlap of each interval with each month month_counts = [] for _, row in intervals.iterrows(): - month_counts.append(_count_month_days(row['start'], row['end'])) + month_counts.append(_count_month_days(row["start"], row["end"])) # divy up the monte carlo reps based on overlap for month in range(1, 13): days_in_month = np.array([x[month] for x in month_counts]) - sample_col = f'samples_for_month_{month}' + sample_col = f"samples_for_month_{month}" if days_in_month.sum() > 0: - intervals[sample_col] = np.ceil( - days_in_month / days_in_month.sum() * reps) + intervals[sample_col] = np.ceil(days_in_month / days_in_month.sum() * reps) else: intervals[sample_col] = 0 intervals[sample_col] = intervals[sample_col].astype(int) # perform the monte carlo month by month - ci_quantiles = [0.5 - confidence_level/2/100, 0.5 + confidence_level/2/100] + ci_quantiles = [0.5 - confidence_level / 2 / 100, 0.5 + confidence_level / 2 / 100] monthly_rate_data = [] relevant_interval_count = [] for month in range(1, 13): rates = [] - sample_col = f'samples_for_month_{month}' + sample_col = f"samples_for_month_{month}" relevant_intervals = intervals[intervals[sample_col] > 0] for _, row in relevant_intervals.iterrows(): - rates.append(np.random.uniform( - row['soiling_rate_low'], - row['soiling_rate_high'], - row[sample_col])) + rates.append( + np.random.uniform( + row["soiling_rate_low"], row["soiling_rate_high"], row[sample_col] + ) + ) rates = [x for sublist in rates for x in sublist] if rates: - monthly_rate_data.append(np.quantile(rates, - [0.5, ci_quantiles[0], - ci_quantiles[1]])) + monthly_rate_data.append(np.quantile(rates, [0.5, ci_quantiles[0], ci_quantiles[1]])) else: - monthly_rate_data.append(np.array([np.nan]*3)) + monthly_rate_data.append(np.array([np.nan] * 3)) relevant_interval_count.append(len(relevant_intervals)) monthly_rate_data = np.array(monthly_rate_data) # make a dataframe out of the results - monthly_soiling_df = pd.DataFrame(data=monthly_rate_data, - columns=['soiling_rate_median', - 'soiling_rate_low', - 'soiling_rate_high']) - monthly_soiling_df.insert(0, 'month', range(1, 13)) - monthly_soiling_df['interval_count'] = relevant_interval_count + monthly_soiling_df = pd.DataFrame( + data=monthly_rate_data, + columns=["soiling_rate_median", "soiling_rate_low", "soiling_rate_high"], + ) + monthly_soiling_df.insert(0, "month", range(1, 13)) + monthly_soiling_df["interval_count"] = relevant_interval_count return monthly_soiling_df -class CODSAnalysis(): - ''' +class CODSAnalysis: + """ Container for the Combined Degradation and Soiling (CODS) algorithm for degradation and soiling loss analysis. Based on the method presented in [1]_. @@ -1169,7 +1223,7 @@ class CODSAnalysis(): ---------- .. [1] Skomedal, Å. and Deceglie, M. G., IEEE Journal of Photovoltaics, Sept. 2020. https://doi.org/10.1109/JPHOTOV.2020.3018219 - ''' + """ def __init__(self, energy_normalized_daily): self.pm = energy_normalized_daily # daily performance metric @@ -1178,18 +1232,30 @@ def __init__(self, energy_normalized_daily): first_keeper = self.pm.isna().idxmin() self.pm = self.pm.loc[first_keeper:] - if self.pm.index.freq != 'D': - raise ValueError('Daily performance metric series must have ' - 'daily frequency (missing dates should be ' - 'represented by NaNs)') + if self.pm.index.freq != "D": + raise ValueError( + "Daily performance metric series must have " + "daily frequency (missing dates should be " + "represented by NaNs)" + ) def iterative_signal_decomposition( - self, order=('SR', 'SC', 'Rd'), degradation_method='YoY', - max_iterations=18, cleaning_sensitivity=.5, convergence_criterion=5e-3, - pruning_iterations=1, clean_pruning_sensitivity=.6, soiling_significance=.75, - process_noise=1e-4, renormalize_SR=None, ffill=True, clip_soiling=True, - verbose=False): - ''' + self, + order=("SR", "SC", "Rd"), + degradation_method="YoY", + max_iterations=18, + cleaning_sensitivity=0.5, + convergence_criterion=5e-3, + pruning_iterations=1, + clean_pruning_sensitivity=0.6, + soiling_significance=0.75, + process_noise=1e-4, + renormalize_SR=None, + ffill=True, + clip_soiling=True, + verbose=False, + ): + """ Estimates the soiling losses and the degradation rate of a PV system based on its daily normalized energy, or daily Performance Index (PI). The underlying assumption is that the PI @@ -1328,14 +1394,15 @@ def iterative_signal_decomposition( .. [3] Skomedal, Å. and Deceglie, M. G., IEEE Journal of Photovoltaics, Sept. 2020. https://doi.org/10.1109/JPHOTOV.2020.3018219 - ''' + """ pi = self.pm.copy() - if degradation_method == 'STL' and 'Rd' in order: - order = tuple([c for c in order if c != 'Rd']) + if degradation_method == "STL" and "Rd" in order: + order = tuple([c for c in order if c != "Rd"]) - if 'SR' not in order: - raise ValueError('\'SR\' must be in argument \'order\' ' + - '(e.g. order=[\'SR\', \'SC\', \'Rd\']') + if "SR" not in order: + raise ValueError( + "'SR' must be in argument 'order' " + "(e.g. order=['SR', 'SC', 'Rd']" + ) n_steps = len(order) day = np.arange(len(pi)) degradation_trend = [1] @@ -1348,39 +1415,39 @@ def iterative_signal_decomposition( convergence_metric = [_RMSE(pi, np.ones((len(pi),)))] # Find possible cleaning events based on the performance index - ce, rm9 = _rolling_median_ce_detection(pi.index, pi, ffill=ffill, - tuner=cleaning_sensitivity) + ce, rm9 = _rolling_median_ce_detection( + pi.index, pi, ffill=ffill, tuner=cleaning_sensitivity + ) pce = _collapse_cleaning_events(ce, rm9.diff().values, 5) small_soiling_signal, perfect_cleaning = False, True ic = 0 # iteration counter if verbose: - print('It. nr\tstep\tRMSE\ttimer') + print("It. nr\tstep\tRMSE\ttimer") if verbose: - print('{:}\t- \t{:.5f}'.format(ic, convergence_metric[ic])) + print("{:}\t- \t{:.5f}".format(ic, convergence_metric[ic])) while ic < max_iterations: t0 = time.time() ic += 1 # Find soiling component - if order[(ic-1) % n_steps] == 'SR': + if order[(ic - 1) % n_steps] == "SR": if ic > 2: # Add possible cleaning events found by considering # the residuals pce = soiling_dfs[-1].cleaning_events.copy() cleaning_sensitivity *= 1.2 # decrease sensitivity ce, rm9 = _rolling_median_ce_detection( - pi.index, residuals, ffill=ffill, - tuner=cleaning_sensitivity) + pi.index, residuals, ffill=ffill, tuner=cleaning_sensitivity + ) ce = _collapse_cleaning_events(ce, rm9.diff().values, 5) pce[ce] = True clean_pruning_sensitivity /= 1.1 # increase pruning sensitivity # Decompose input signal - soiling_dummy = (pi / - degradation_trend[-1] / - seasonal_component[-1] / - residual_shift) + soiling_dummy = ( + pi / degradation_trend[-1] / seasonal_component[-1] / residual_shift + ) # Run Kalman Filter for obtaining soiling component kdf, Ps = self._Kalman_filter_for_SR( @@ -1391,100 +1458,124 @@ def iterative_signal_decomposition( clean_pruning_sensitivity=clean_pruning_sensitivity, perfect_cleaning=perfect_cleaning, process_noise=process_noise, - renormalize_SR=renormalize_SR) + renormalize_SR=renormalize_SR, + ) soiling_ratio.append(kdf.soiling_ratio) soiling_dfs.append(kdf) # Find seasonal component - if order[(ic-1) % n_steps] == 'SC': + if order[(ic - 1) % n_steps] == "SC": season_dummy = pi / soiling_ratio[-1] # Decompose signal if season_dummy.isna().sum() > 0: - season_dummy.interpolate('linear', inplace=True) + season_dummy.interpolate("linear", inplace=True) season_dummy = season_dummy.apply(np.log) # Log transform # Run STL model - STL_res = STL(season_dummy, period=365, seasonal=999999, - seasonal_deg=0, trend_deg=0, - robust=True, low_pass_jump=30, seasonal_jump=30, - trend_jump=365).fit() + STL_res = STL( + season_dummy, + period=365, + seasonal=999999, + seasonal_deg=0, + trend_deg=0, + robust=True, + low_pass_jump=30, + seasonal_jump=30, + trend_jump=365, + ).fit() # Smooth result - smooth_season = lowess(STL_res.seasonal.apply(np.exp), - pi.index, is_sorted=True, delta=30, - frac=180/len(pi), return_sorted=False) + smooth_season = lowess( + STL_res.seasonal.apply(np.exp), + pi.index, + is_sorted=True, + delta=30, + frac=180 / len(pi), + return_sorted=False, + ) # Ensure periodic seaonal component - seasonal_comp = _force_periodicity(smooth_season, - season_dummy.index, - pi.index) + seasonal_comp = _force_periodicity(smooth_season, season_dummy.index, pi.index) seasonal_component.append(seasonal_comp) - if degradation_method == 'STL': # If not YoY - deg_trend = pd.Series(index=pi.index, - data=STL_res.trend.apply(np.exp)) + if degradation_method == "STL": # If not YoY + deg_trend = pd.Series(index=pi.index, data=STL_res.trend.apply(np.exp)) degradation_trend.append(deg_trend / deg_trend.iloc[0]) - yoy_save.append(RdToolsDeg.degradation_year_on_year( - degradation_trend[-1], uncertainty_method=None)) + yoy_save.append( + RdToolsDeg.degradation_year_on_year( + degradation_trend[-1], uncertainty_method=None + ) + ) # Find degradation component - if order[(ic-1) % n_steps] == 'Rd': + if order[(ic - 1) % n_steps] == "Rd": # Decompose signal - trend_dummy = (pi / - seasonal_component[-1] / - soiling_ratio[-1]) + trend_dummy = pi / seasonal_component[-1] / soiling_ratio[-1] # Run YoY - yoy = RdToolsDeg.degradation_year_on_year( - trend_dummy, uncertainty_method=None) + yoy = RdToolsDeg.degradation_year_on_year(trend_dummy, uncertainty_method=None) # Convert degradation rate to trend - degradation_trend.append(pd.Series( - index=pi.index, data=(1 + day * yoy / 100 / 365.0))) + degradation_trend.append( + pd.Series(index=pi.index, data=(1 + day * yoy / 100 / 365.0)) + ) yoy_save.append(yoy) # Combine and calculate residual flatness - total_model = (degradation_trend[-1] * - seasonal_component[-1] * - soiling_ratio[-1]) + total_model = degradation_trend[-1] * seasonal_component[-1] * soiling_ratio[-1] residuals = pi / total_model residual_shift = residuals.mean() total_model *= residual_shift convergence_metric.append(_RMSE(pi, total_model)) if verbose: - print('{:}\t{:}\t{:.5f}\t\t\t{:.1f} s'.format( - ic, order[(ic-1) % n_steps], convergence_metric[-1], - time.time()-t0)) + print( + "{:}\t{:}\t{:.5f}\t\t\t{:.1f} s".format( + ic, order[(ic - 1) % n_steps], convergence_metric[-1], time.time() - t0 + ) + ) # Convergence happens if there is no improvement in RMSE from one # step to the next if ic >= n_steps: - relative_improvement = ((convergence_metric[-n_steps-1] - - convergence_metric[-1]) / - convergence_metric[-n_steps-1]) + relative_improvement = ( + convergence_metric[-n_steps - 1] - convergence_metric[-1] + ) / convergence_metric[-n_steps - 1] if perfect_cleaning and ( - ic >= max_iterations / 2 or - relative_improvement < convergence_criterion): + ic >= max_iterations / 2 or relative_improvement < convergence_criterion + ): # From now on, do not assume perfect cleaning perfect_cleaning = False # Reorder to ensure SR first - order = tuple([order[(i+n_steps-1-(ic-1) % n_steps) % n_steps] - for i in range(n_steps)]) + order = tuple( + [ + order[(i + n_steps - 1 - (ic - 1) % n_steps) % n_steps] + for i in range(n_steps) + ] + ) change_point = ic if verbose: - print('Now not assuming perfect cleaning') - elif (not perfect_cleaning and - (ic >= max_iterations or - (ic >= change_point + n_steps and - relative_improvement < - convergence_criterion))): + print("Now not assuming perfect cleaning") + elif not perfect_cleaning and ( + ic >= max_iterations + or ( + ic >= change_point + n_steps + and relative_improvement < convergence_criterion + ) + ): if verbose: if relative_improvement < convergence_criterion: - print('Convergence reached.') + print("Convergence reached.") else: - print('Max iterations reached.') + print("Max iterations reached.") ic = max_iterations # Initialize output DataFrame - df_out = pd.DataFrame(index=pi.index, - columns=['soiling_ratio', 'soiling_rates', - 'cleaning_events', 'seasonal_component', - 'degradation_trend', 'total_model', - 'residuals']) + df_out = pd.DataFrame( + index=pi.index, + columns=[ + "soiling_ratio", + "soiling_rates", + "cleaning_events", + "seasonal_component", + "degradation_trend", + "total_model", + "residuals", + ], + ) # Save values df_out.seasonal_component = seasonal_component[-1] @@ -1499,26 +1590,28 @@ def iterative_signal_decomposition( soiling_loss = (1 - df_out.soiling_ratio).mean() * 100 # Total model - df_out.total_model = (df_out.soiling_ratio * - df_out.seasonal_component * - df_out.degradation_trend) + df_out.total_model = ( + df_out.soiling_ratio * df_out.seasonal_component * df_out.degradation_trend + ) df_out.residuals = pi / df_out.total_model residual_shift = df_out.residuals.mean() df_out.total_model *= residual_shift RMSE = _RMSE(pi, df_out.total_model) - adf_res = adfuller(df_out.residuals.dropna(), regression='ctt', autolag=None) + adf_res = adfuller(df_out.residuals.dropna(), regression="ctt", autolag=None) if verbose: - print('p-value for the H0 that there is a unit root in the' + - 'residuals (using the Augmented Dickey-fuller test):' + - '{:.3e}'.format(adf_res[1])) + print( + "p-value for the H0 that there is a unit root in the" + + "residuals (using the Augmented Dickey-fuller test):" + + "{:.3e}".format(adf_res[1]) + ) # Check size of soiling signal vs residuals - SR_amp = float(np.diff(df_out.soiling_ratio.quantile([.1, .9]))[0]) - residuals_amp = float(np.diff(df_out.residuals.quantile([.1, .9]))[0]) + SR_amp = float(np.diff(df_out.soiling_ratio.quantile([0.1, 0.9]))[0]) + residuals_amp = float(np.diff(df_out.residuals.quantile([0.1, 0.9]))[0]) soiling_signal_strength = SR_amp / residuals_amp if soiling_signal_strength < soiling_significance: if verbose: - print('Soiling signal is small relative to the noise') + print("Soiling signal is small relative to the noise") small_soiling_signal = True df_out.SR_high = 1.0 df_out.SR_low = 1.0 - SR_amp @@ -1530,25 +1623,26 @@ def iterative_signal_decomposition( residual_shift=residual_shift, RMSE=RMSE, small_soiling_signal=small_soiling_signal, - adf_res=adf_res + adf_res=adf_res, ) return df_out, results_dict - def run_bootstrap(self, - reps=512, - confidence_level=68.2, - degradation_method='YoY', - process_noise=1e-4, - order_alternatives=(('SR', 'SC', 'Rd'), - ('SC', 'SR', 'Rd')), - cleaning_sensitivity_alternatives=(.25, .75), - clean_pruning_sensitivity_alternatives=(1/1.5, 1.5), - forward_fill_alternatives=(True, False), - verbose=False, - bootstrap_seed=None, - **kwargs): - ''' + def run_bootstrap( + self, + reps=512, + confidence_level=68.2, + degradation_method="YoY", + process_noise=1e-4, + order_alternatives=(("SR", "SC", "Rd"), ("SC", "SR", "Rd")), + cleaning_sensitivity_alternatives=(0.25, 0.75), + clean_pruning_sensitivity_alternatives=(1 / 1.5, 1.5), + forward_fill_alternatives=(True, False), + verbose=False, + bootstrap_seed=None, + **kwargs, + ): + """ Bootstrapping of CODS algorithm for uncertainty analysis, inherently accounting for model and parameter choices. @@ -1671,7 +1765,7 @@ def run_bootstrap(self, ---------- .. [1] Skomedal, Å. and Deceglie, M. G., IEEE Journal of Photovoltaics, Sept. 2020. https://doi.org/10.1109/JPHOTOV.2020.3018219 - ''' + """ pi = self.pm.copy() # ###################### # @@ -1679,14 +1773,17 @@ def run_bootstrap(self, # ###################### # # Generate combinations of model parameter alternatives - parameter_alternatives = [order_alternatives, - cleaning_sensitivity_alternatives, - clean_pruning_sensitivity_alternatives, - forward_fill_alternatives] + parameter_alternatives = [ + order_alternatives, + cleaning_sensitivity_alternatives, + clean_pruning_sensitivity_alternatives, + forward_fill_alternatives, + ] index_list = list(itertools.product([0, 1], repeat=len(parameter_alternatives))) - combination_of_parameters = [[parameter_alternatives[j][indexes[j]] - for j in range(len(parameter_alternatives))] - for indexes in index_list] + combination_of_parameters = [ + [parameter_alternatives[j][indexes[j]] for j in range(len(parameter_alternatives))] + for indexes in index_list + ] nr_models = len(index_list) bootstrap_samples_list, list_of_df_out, results = [], [], [] @@ -1695,69 +1792,91 @@ def run_bootstrap(self, reps += nr_models - reps % nr_models if verbose: - print('Initially fitting {:} models'.format(nr_models)) + print("Initially fitting {:} models".format(nr_models)) t00 = time.time() # For each combination of model parameter alternatives, fit one model: for c, (order, dt, pt, ff) in enumerate(combination_of_parameters): try: df_out, result_dict = self.iterative_signal_decomposition( - max_iterations=18, order=order, clip_soiling=True, - cleaning_sensitivity=dt, pruning_iterations=1, - clean_pruning_sensitivity=pt, process_noise=process_noise, ffill=ff, - degradation_method=degradation_method, **kwargs) + max_iterations=18, + order=order, + clip_soiling=True, + cleaning_sensitivity=dt, + pruning_iterations=1, + clean_pruning_sensitivity=pt, + process_noise=process_noise, + ffill=ff, + degradation_method=degradation_method, + **kwargs, + ) # Save results list_of_df_out.append(df_out) results.append(result_dict) - adf = result_dict['adf_res'] + adf = result_dict["adf_res"] # If we can reject the null-hypothesis that there is a unit # root in the residuals: - if adf[1] < .05: + if adf[1] < 0.05: # ... generate bootstrap samples based on the fit: bootstrap_samples_list.append( _make_time_series_bootstrap_samples( - pi, df_out.total_model, + pi, + df_out.total_model, sample_nr=int(reps / nr_models), - bootstrap_seed=bootstrap_seed)) + bootstrap_seed=bootstrap_seed, + ) + ) # Print progress if verbose: - _progressBarWithETA(c+1, nr_models, time.time()-t00, - bar_length=30) + _progressBarWithETA(c + 1, nr_models, time.time() - t00, bar_length=30) except ValueError as ex: print(ex) # Revive results - adfs = np.array([(r['adf_res'][0] if r['adf_res'][1] < 0.05 else 0) for r in results]) - RMSEs = np.array([r['RMSE'] for r in results]) - SR_is_one_fraction = np.array( - [(df.soiling_ratio == 1).mean() for df in list_of_df_out]) - small_soiling_signal = [r['small_soiling_signal'] for r in results] + adfs = np.array([(r["adf_res"][0] if r["adf_res"][1] < 0.05 else 0) for r in results]) + RMSEs = np.array([r["RMSE"] for r in results]) + SR_is_one_fraction = np.array([(df.soiling_ratio == 1).mean() for df in list_of_df_out]) + small_soiling_signal = [r["small_soiling_signal"] for r in results] # Calculate weights weights = 1 / RMSEs / (1 + SR_is_one_fraction) weights /= np.sum(weights) # Save sensitivities and weights for initial model fits - _parameters_n_weights = pd.concat([pd.DataFrame(combination_of_parameters), - pd.Series(RMSEs), - pd.Series(SR_is_one_fraction), - pd.Series(weights), - pd.Series(small_soiling_signal)], - axis=1, ignore_index=True) + _parameters_n_weights = pd.concat( + [ + pd.DataFrame(combination_of_parameters), + pd.Series(RMSEs), + pd.Series(SR_is_one_fraction), + pd.Series(weights), + pd.Series(small_soiling_signal), + ], + axis=1, + ignore_index=True, + ) if verbose: # Print summary - _parameters_n_weights.columns = ['order', 'dt', 'pt', 'ff', 'RMSE', - 'SR==1', 'weights', 'small_soiling_signal'] + _parameters_n_weights.columns = [ + "order", + "dt", + "pt", + "ff", + "RMSE", + "SR==1", + "weights", + "small_soiling_signal", + ] if verbose: - print('\n', _parameters_n_weights) + print("\n", _parameters_n_weights) # Check if data is decomposable if np.sum(adfs == 0) > nr_models / 2: raise RuntimeError( - 'Test for stationary residuals (Augmented Dickey-Fuller' - + ' test) not passed in half of the instances:\nData not' - + ' decomposable.') + "Test for stationary residuals (Augmented Dickey-Fuller" + + " test) not passed in half of the instances:\nData not" + + " decomposable." + ) # Save best model self.initial_fits = [df for df in list_of_df_out] @@ -1767,83 +1886,93 @@ def run_bootstrap(self, # don't do bootstrapping if np.sum(small_soiling_signal) > nr_models / 2: self.result_df = result_df - self.residual_shift = results[np.argmax(weights)]['residual_shift'] + self.residual_shift = results[np.argmax(weights)]["residual_shift"] YOY = RdToolsDeg.degradation_year_on_year(pi) self.degradation = [YOY[0], YOY[1][0], YOY[1][1]] self.soiling_loss = [0, 0, (1 - result_df.soiling_ratio).mean()] self.small_soiling_signal = True self.errors = ( - 'Soiling signal is small relative to the noise. ' - 'Iterative decomposition not possible. ' - 'Degradation found by RdTools YoY.') + "Soiling signal is small relative to the noise. " + "Iterative decomposition not possible. " + "Degradation found by RdTools YoY." + ) warnings.warn(self.errors) return self.result_df, self.degradation, self.soiling_loss self.small_soiling_signal = False # Aggregate all bootstrap samples - all_bootstrap_samples = pd.concat(bootstrap_samples_list, axis=1, - ignore_index=True) + all_bootstrap_samples = pd.concat(bootstrap_samples_list, axis=1, ignore_index=True) # Seasonal samples are generated from previously fitted seasonal # components, by perturbing amplitude and phase shift # Number of samples per fit: sample_nr = int(reps / nr_models) - list_of_SCs = [list_of_df_out[m].seasonal_component - for m in range(nr_models) if weights[m] > 0] - seasonal_samples = _make_seasonal_samples(list_of_SCs, - sample_nr=sample_nr, - min_multiplier=.8, - max_multiplier=1.75, - max_shift=30) + list_of_SCs = [ + list_of_df_out[m].seasonal_component for m in range(nr_models) if weights[m] > 0 + ] + seasonal_samples = _make_seasonal_samples( + list_of_SCs, sample_nr=sample_nr, min_multiplier=0.8, max_multiplier=1.75, max_shift=30 + ) # ###################### # # ###### STAGE 2 ####### # # ###################### # if verbose and reps > 0: - print('\nBootstrapping for uncertainty analysis', - '({:} realizations):'.format(reps)) - order = ('SR', 'SC' if degradation_method == 'STL' else 'Rd') + print("\nBootstrapping for uncertainty analysis", "({:} realizations):".format(reps)) + order = ("SR", "SC" if degradation_method == "STL" else "Rd") t0 = time.time() - bt_kdfs, bt_SL, bt_deg, parameters, adfs, RMSEs, SR_is_1, rss, errors = \ - [], [], [], [], [], [], [], [], ['Bootstrapping errors'] + bt_kdfs, bt_SL, bt_deg, parameters, adfs, RMSEs, SR_is_1, rss, errors = ( + [], + [], + [], + [], + [], + [], + [], + [], + ["Bootstrapping errors"], + ) for b in range(reps): try: # randomly choose model sensitivities - dt = np.random.uniform(parameter_alternatives[1][0], - parameter_alternatives[1][-1]) - pt = np.random.uniform(parameter_alternatives[2][0], - parameter_alternatives[2][-1]) + dt = np.random.uniform(parameter_alternatives[1][0], parameter_alternatives[1][-1]) + pt = np.random.uniform(parameter_alternatives[2][0], parameter_alternatives[2][-1]) pn = np.random.uniform(process_noise / 1.5, process_noise * 1.5) - renormalize_SR = np.random.choice([None, - np.random.uniform(.5, .95)]) + renormalize_SR = np.random.choice([None, np.random.uniform(0.5, 0.95)]) ffill = np.random.choice([True, False]) parameters.append([dt, pt, pn, renormalize_SR, ffill]) # Sample to infer soiling from - bootstrap_sample = \ - all_bootstrap_samples[b] / seasonal_samples[b] + bootstrap_sample = all_bootstrap_samples[b] / seasonal_samples[b] # Set up a temprary instance of the CODSAnalysis object temporary_cods_instance = CODSAnalysis(bootstrap_sample) # Do Signal decomposition for soiling and degradation component kdf, results_dict = temporary_cods_instance.iterative_signal_decomposition( - max_iterations=4, order=order, clip_soiling=True, - cleaning_sensitivity=dt, pruning_iterations=1, - clean_pruning_sensitivity=pt, process_noise=pn, - renormalize_SR=renormalize_SR, ffill=ffill, - degradation_method=degradation_method, **kwargs) + max_iterations=4, + order=order, + clip_soiling=True, + cleaning_sensitivity=dt, + pruning_iterations=1, + clean_pruning_sensitivity=pt, + process_noise=pn, + renormalize_SR=renormalize_SR, + ffill=ffill, + degradation_method=degradation_method, + **kwargs, + ) # If we can reject the null-hypothesis that there is a unit # root in the residuals: - if results_dict['adf_res'][1] < .05: # Save the results + if results_dict["adf_res"][1] < 0.05: # Save the results bt_kdfs.append(kdf) - adfs.append(results_dict['adf_res'][0]) - RMSEs.append(results_dict['RMSE']) - bt_deg.append(results_dict['degradation']) - bt_SL.append(results_dict['soiling_loss']) - rss.append(results_dict['residual_shift']) + adfs.append(results_dict["adf_res"][0]) + RMSEs.append(results_dict["RMSE"]) + bt_deg.append(results_dict["degradation"]) + bt_SL.append(results_dict["soiling_loss"]) + rss.append(results_dict["residual_shift"]) SR_is_1.append((kdf.soiling_ratio == 1).mean()) else: seasonal_samples.drop(columns=[b], inplace=True) @@ -1854,20 +1983,33 @@ def run_bootstrap(self, # Print progress if verbose: - _progressBarWithETA(b+1, reps, time.time()-t0, bar_length=30) + _progressBarWithETA(b + 1, reps, time.time() - t0, bar_length=30) # Reweight and save weights weights = 1 / np.array(RMSEs) / (1 + np.array(SR_is_1)) weights /= np.sum(weights) self._parameters_n_weights = pd.concat( - [pd.DataFrame(parameters), - pd.Series(RMSEs), - pd.Series(adfs), - pd.Series(SR_is_1), - pd.Series(weights)], - axis=1, ignore_index=True) - self._parameters_n_weights.columns = ['dt', 'pt', 'pn', 'RSR', 'ffill', - 'RMSE', 'ADF', 'SR==1', 'weights'] + [ + pd.DataFrame(parameters), + pd.Series(RMSEs), + pd.Series(adfs), + pd.Series(SR_is_1), + pd.Series(weights), + ], + axis=1, + ignore_index=True, + ) + self._parameters_n_weights.columns = [ + "dt", + "pt", + "pn", + "RSR", + "ffill", + "RMSE", + "ADF", + "SR==1", + "weights", + ] # ###################### # # ###### STAGE 3 ####### # @@ -1884,68 +2026,79 @@ def run_bootstrap(self, concat_ce = pd.concat([kdf.cleaning_events for kdf in bt_kdfs], axis=1) # Find confidence intervals for SR and soiling rates - df_out['SR_low'] = concat_SR.quantile(ci_low_edge, 1) - df_out['SR_high'] = concat_SR.quantile(ci_high_edge, 1) - df_out['rates_low'] = concat_r_s.quantile(ci_low_edge, 1) - df_out['rates_high'] = concat_r_s.quantile(ci_high_edge, 1) + df_out["SR_low"] = concat_SR.quantile(ci_low_edge, 1) + df_out["SR_high"] = concat_SR.quantile(ci_high_edge, 1) + df_out["rates_low"] = concat_r_s.quantile(ci_low_edge, 1) + df_out["rates_high"] = concat_r_s.quantile(ci_high_edge, 1) # Save best estimate and bootstrapped estimates of SR and soiling rates df_out.soiling_ratio = df_out.soiling_ratio.clip(lower=0, upper=1) - df_out.loc[df_out.soiling_ratio.diff() == 0, 'soiling_rates'] = 0 - df_out['bt_soiling_ratio'] = (concat_SR * weights).sum(axis=1) - df_out['bt_soiling_rates'] = (concat_r_s * weights).sum(axis=1) + df_out.loc[df_out.soiling_ratio.diff() == 0, "soiling_rates"] = 0 + df_out["bt_soiling_ratio"] = (concat_SR * weights).sum(axis=1) + df_out["bt_soiling_rates"] = (concat_r_s * weights).sum(axis=1) # Set probability of cleaning events df_out.cleaning_events = (concat_ce * weights).sum(axis=1) # Find degradation rates - self.degradation = [np.dot(bt_deg, weights), - np.quantile(bt_deg, ci_low_edge), - np.quantile(bt_deg, ci_high_edge)] - df_out.degradation_trend = 1 + np.arange(len(pi)) * \ - self.degradation[0] / 100 / 365.0 + self.degradation = [ + np.dot(bt_deg, weights), + np.quantile(bt_deg, ci_low_edge), + np.quantile(bt_deg, ci_high_edge), + ] + df_out.degradation_trend = 1 + np.arange(len(pi)) * self.degradation[0] / 100 / 365.0 # Soiling losses - self.soiling_loss = [np.dot(bt_SL, weights), - np.quantile(bt_SL, ci_low_edge), - np.quantile(bt_SL, ci_high_edge)] + self.soiling_loss = [ + np.dot(bt_SL, weights), + np.quantile(bt_SL, ci_low_edge), + np.quantile(bt_SL, ci_high_edge), + ] # Save "confidence intervals" for seasonal component df_out.seasonal_component = (seasonal_samples * weights).sum(axis=1) - df_out['seasonal_low'] = seasonal_samples.quantile(ci_low_edge, 1) - df_out['seasonal_high'] = seasonal_samples.quantile(ci_high_edge, 1) + df_out["seasonal_low"] = seasonal_samples.quantile(ci_low_edge, 1) + df_out["seasonal_high"] = seasonal_samples.quantile(ci_high_edge, 1) # Total model with confidence intervals - df_out.total_model = (df_out.degradation_trend * - df_out.seasonal_component * - df_out.soiling_ratio) - df_out['model_low'] = concat_tot_mod.quantile(ci_low_edge, 1) - df_out['model_high'] = concat_tot_mod.quantile(ci_high_edge, 1) + df_out.total_model = ( + df_out.degradation_trend * df_out.seasonal_component * df_out.soiling_ratio + ) + df_out["model_low"] = concat_tot_mod.quantile(ci_low_edge, 1) + df_out["model_high"] = concat_tot_mod.quantile(ci_high_edge, 1) # Residuals and residual shift df_out.residuals = pi / df_out.total_model self.residual_shift = df_out.residuals.mean() df_out.total_model *= self.residual_shift self.RMSE = _RMSE(pi, df_out.total_model) - self.adf_results = adfuller(df_out.residuals.dropna(), - regression='ctt', autolag=None) + self.adf_results = adfuller(df_out.residuals.dropna(), regression="ctt", autolag=None) self.result_df = df_out self.errors = errors if verbose: - print('\nFinal RMSE: {:.5f}'.format(self.RMSE)) + print("\nFinal RMSE: {:.5f}".format(self.RMSE)) if len(self.errors) > 1: print(self.errors) return self.result_df, self.degradation, self.soiling_loss - def _Kalman_filter_for_SR(self, zs_series, process_noise=1e-4, zs_std=.05, - rate_std=.005, max_soiling_rates=.0005, - pruning_iterations=1, clean_pruning_sensitivity=.6, - renormalize_SR=None, perfect_cleaning=False, - prescient_cleaning_events=None, - clip_soiling=True, ffill=True): - ''' + def _Kalman_filter_for_SR( + self, + zs_series, + process_noise=1e-4, + zs_std=0.05, + rate_std=0.005, + max_soiling_rates=0.0005, + pruning_iterations=1, + clean_pruning_sensitivity=0.6, + renormalize_SR=None, + perfect_cleaning=False, + prescient_cleaning_events=None, + clip_soiling=True, + ffill=True, + ): + """ A function for estimating the underlying Soiling Ratio (SR) and the rate of change of the SR (the soiling rate), based on a noisy time series of daily (corrected) normalized energy using a Kalman Filter (KF). See @@ -2016,52 +2169,47 @@ def _Kalman_filter_for_SR(self, zs_series, process_noise=1e-4, zs_std=.05, References ---------- .. [1] R. R. Labbe, Kalman and Bayesian Filters in Python. 2016. - ''' + """ # Ensure numeric index zs_series = zs_series.copy() # Make copy, so as not to change input zs_series = zs_series.astype(float) original_index = zs_series.index.copy() - if (original_index.dtype not in [int, 'int64']): + if original_index.dtype not in [int, "int64"]: zs_series.index = range(len(zs_series)) # Check prescient_cleaning_events. If not present, find cleaning events if isinstance(prescient_cleaning_events, list): cleaning_events = prescient_cleaning_events else: - if (isinstance(prescient_cleaning_events, type(zs_series)) and - (prescient_cleaning_events.sum() > 4)): + if isinstance(prescient_cleaning_events, type(zs_series)) and ( + prescient_cleaning_events.sum() > 4 + ): if len(prescient_cleaning_events) == len(zs_series): prescient_cleaning_events = prescient_cleaning_events.copy() prescient_cleaning_events.index = zs_series.index else: raise ValueError( - "The indices of prescient_cleaning_events must correspond to the" + - " indices of zs_series; they must be of the same length") + "The indices of prescient_cleaning_events must correspond to the" + + " indices of zs_series; they must be of the same length" + ) else: # If no prescient cleaning events, detect cleaning events - ce, rm9 = _rolling_median_ce_detection( - zs_series.index, zs_series, tuner=0.5) - prescient_cleaning_events = \ - _collapse_cleaning_events(ce, rm9.diff().values, 5) + ce, rm9 = _rolling_median_ce_detection(zs_series.index, zs_series, tuner=0.5) + prescient_cleaning_events = _collapse_cleaning_events(ce, rm9.diff().values, 5) cleaning_events = prescient_cleaning_events[prescient_cleaning_events].index.tolist() # Find soiling events (e.g. dust storms) - soiling_events = _soiling_event_detection( - zs_series.index, zs_series, ffill=ffill, tuner=5) + soiling_events = _soiling_event_detection(zs_series.index, zs_series, ffill=ffill, tuner=5) soiling_events = soiling_events[soiling_events].index.tolist() # Initialize various parameters if ffill: - rolling_median_13 = \ - zs_series.ffill().rolling(13, center=True).median().ffill().bfill() - rolling_median_7 = \ - zs_series.ffill().rolling(7, center=True).median().ffill().bfill() + rolling_median_13 = zs_series.ffill().rolling(13, center=True).median().ffill().bfill() + rolling_median_7 = zs_series.ffill().rolling(7, center=True).median().ffill().bfill() else: - rolling_median_13 = \ - zs_series.bfill().rolling(13, center=True).median().ffill().bfill() - rolling_median_7 = \ - zs_series.bfill().rolling(7, center=True).median().ffill().bfill() + rolling_median_13 = zs_series.bfill().rolling(13, center=True).median().ffill().bfill() + rolling_median_7 = zs_series.bfill().rolling(7, center=True).median().ffill().bfill() # A rough estimate of the measurement noise measurement_noise = (rolling_median_13 - zs_series).var() # An initial guess of the slope @@ -2069,28 +2217,38 @@ def _Kalman_filter_for_SR(self, zs_series, process_noise=1e-4, zs_std=.05, dt = 1 # All time stemps are one day # Initialize Kalman filter - f = self._initialize_univariate_model(zs_series, dt, process_noise, - measurement_noise, rate_std, - zs_std, initial_slope) + f = self._initialize_univariate_model( + zs_series, dt, process_noise, measurement_noise, rate_std, zs_std, initial_slope + ) # Initialize miscallenous variables - dfk = pd.DataFrame(index=zs_series.index, dtype=float, - columns=['raw_pi', 'raw_rates', 'smooth_pi', - 'smooth_rates', 'soiling_ratio', - 'soiling_rates', 'cleaning_events', - 'days_since_ce']) - dfk['cleaning_events'] = False + dfk = pd.DataFrame( + index=zs_series.index, + dtype=float, + columns=[ + "raw_pi", + "raw_rates", + "smooth_pi", + "smooth_rates", + "soiling_ratio", + "soiling_rates", + "cleaning_events", + "days_since_ce", + ], + ) + dfk["cleaning_events"] = False # Kalman Filter part: ####################################################################### # Call the forward pass function (the actual KF procedure) Xs, Ps, rate_std, zs_std = self._forward_pass( - f, zs_series, rolling_median_7, cleaning_events, soiling_events) + f, zs_series, rolling_median_7, cleaning_events, soiling_events + ) # Save results and smooth with rts smoother dfk, Xs, Ps = self._smooth_results( - dfk, f, Xs, Ps, zs_series, cleaning_events, soiling_events, - perfect_cleaning) + dfk, f, Xs, Ps, zs_series, cleaning_events, soiling_events, perfect_cleaning + ) ####################################################################### # Some steps to clean up the soiling data: @@ -2103,34 +2261,38 @@ def _Kalman_filter_for_SR(self, zs_series, process_noise=1e-4, zs_std=.05, rm_smooth_pi = dfk.smooth_pi.rolling(7).median().shift(-6) pi_after_cleaning = rm_smooth_pi.loc[cleaning_events] # Detect outiers/false positives - false_positives = _find_numeric_outliers(pi_after_cleaning, - clean_pruning_sensitivity, 'lower') - cleaning_events = \ - false_positives[~false_positives].index.tolist() + false_positives = _find_numeric_outliers( + pi_after_cleaning, clean_pruning_sensitivity, "lower" + ) + cleaning_events = false_positives[~false_positives].index.tolist() # 2: Remove longer periods with positive (soiling) rates if (dfk.smooth_rates > max_soiling_rates).sum() > 1: exceeding_rates = dfk.smooth_rates > max_soiling_rates new_cleaning_events = _collapse_cleaning_events( - exceeding_rates, dfk.smooth_rates, 4) - cleaning_events.extend( - new_cleaning_events[new_cleaning_events].index) + exceeding_rates, dfk.smooth_rates, 4 + ) + cleaning_events.extend(new_cleaning_events[new_cleaning_events].index) cleaning_events.sort() # 3: If the list of cleaning events has changed, run the Kalman # Filter and smoother again if not ce_0 == cleaning_events: - f = self._initialize_univariate_model(zs_series, dt, - process_noise, - measurement_noise, - rate_std, zs_std, - initial_slope) + f = self._initialize_univariate_model( + zs_series, + dt, + process_noise, + measurement_noise, + rate_std, + zs_std, + initial_slope, + ) Xs, Ps, rate_std, zs_std = self._forward_pass( - f, zs_series, rolling_median_7, cleaning_events, - soiling_events) + f, zs_series, rolling_median_7, cleaning_events, soiling_events + ) dfk, Xs, Ps = self._smooth_results( - dfk, f, Xs, Ps, zs_series, cleaning_events, - soiling_events, perfect_cleaning) + dfk, f, Xs, Ps, zs_series, cleaning_events, soiling_events, perfect_cleaning + ) else: counter = 100 # Make sure the while loop stops @@ -2139,14 +2301,14 @@ def _Kalman_filter_for_SR(self, zs_series, process_noise=1e-4, zs_std=.05, if perfect_cleaning: # SR = 1 after cleaning events if len(cleaning_events) > 0: pi_dummy = pd.Series(index=dfk.index, data=np.nan) - pi_dummy.loc[cleaning_events] = \ - dfk.smooth_pi.loc[cleaning_events] + pi_dummy.loc[cleaning_events] = dfk.smooth_pi.loc[cleaning_events] dfk.soiling_ratio = 1 / pi_dummy.ffill() * dfk.smooth_pi # Set the SR in the first soiling period based on the mean # ratio of the Kalman estimate (smooth_pi) and the SR - dfk.loc[:cleaning_events[0], 'soiling_ratio'] = \ - dfk.loc[:cleaning_events[0], 'smooth_pi'] \ + dfk.loc[: cleaning_events[0], "soiling_ratio"] = ( + dfk.loc[: cleaning_events[0], "smooth_pi"] * (dfk.soiling_ratio / dfk.smooth_pi).mean() + ) else: # If no cleaning events dfk.soiling_ratio = 1 else: # Otherwise, if the inut signal has been decomposed, and @@ -2154,40 +2316,38 @@ def _Kalman_filter_for_SR(self, zs_series, process_noise=1e-4, zs_std=.05, dfk.soiling_ratio = dfk.smooth_pi # 5: Renormalize Soiling Ratio if renormalize_SR is not None: - dfk.soiling_ratio /= dfk.loc[cleaning_events, 'soiling_ratio' - ].quantile(renormalize_SR) + dfk.soiling_ratio /= dfk.loc[cleaning_events, "soiling_ratio"].quantile( + renormalize_SR + ) # 6: Force soiling ratio to not exceed 1: if clip_soiling: - dfk['soiling_ratio'] = dfk['soiling_ratio'].clip(upper=1) + dfk["soiling_ratio"] = dfk["soiling_ratio"].clip(upper=1) dfk.soiling_rates = dfk.smooth_rates - dfk.loc[dfk.soiling_ratio.diff() == 0, 'soiling_rates'] = 0 + dfk.loc[dfk.soiling_ratio.diff() == 0, "soiling_rates"] = 0 # Set number of days since cleaning event nr_days_dummy = pd.Series(index=dfk.index, data=np.nan) - nr_days_dummy.loc[cleaning_events] = [int(date-dfk.index[0]) - for date in cleaning_events] + nr_days_dummy.loc[cleaning_events] = [int(date - dfk.index[0]) for date in cleaning_events] nr_days_dummy.iloc[0] = 0 dfk.days_since_ce = range(len(zs_series)) - nr_days_dummy.ffill() # Save cleaning events and soiling events - dfk.loc[cleaning_events, 'cleaning_events'] = True + dfk.loc[cleaning_events, "cleaning_events"] = True dfk.index = original_index # Set index back to orignial index return dfk, Ps - def _forward_pass(self, f, zs_series, rolling_median_7, cleaning_events, - soiling_events): - ''' Run the forward pass of the Kalman Filter algortihm ''' + def _forward_pass(self, f, zs_series, rolling_median_7, cleaning_events, soiling_events): + """Run the forward pass of the Kalman Filter algortihm""" zs = zs_series.values N = len(zs) Xs, Ps = np.zeros((N, 2)), np.zeros((N, 2, 2)) # Enter forward pass of filtering algorithm for i, z in enumerate(zs): - if 7 < i < N-7 and (i in cleaning_events or i in soiling_events): - rolling_median_local = rolling_median_7.loc[i-5:i+5].values - u = self._set_control_input(f, rolling_median_local, i, - cleaning_events) + if 7 < i < N - 7 and (i in cleaning_events or i in soiling_events): + rolling_median_local = rolling_median_7.loc[i - 5 : i + 5].values + u = self._set_control_input(f, rolling_median_local, i, cleaning_events) f.predict(u=u) # Predict wth control input u else: # If no cleaning detection, predict without control input f.predict() @@ -2199,49 +2359,51 @@ def _forward_pass(self, f, zs_series, rolling_median_7, cleaning_events, rate_std, zs_std = Ps[-1, 1, 1], Ps[-1, 0, 0] return Xs, Ps, rate_std, zs_std # Convert to numpy and return - def _set_control_input(self, f, rolling_median_local, index, - cleaning_events): - ''' + def _set_control_input(self, f, rolling_median_local, index, cleaning_events): + """ For each cleaning event, sets control input u based on current Kalman Filter state estimate (f.x), and the median value for the following week. If the cleaning event seems to be misplaced, moves the cleaning event to a more sensible location. If the cleaning event seems to be correct, removes other cleaning events in the 10 days surrounding this day - ''' + """ u = np.zeros(f.x.shape) # u is the control input window_size = 11 # len of rolling_median_local HW = 5 # Half window moving_diff = np.diff(rolling_median_local) # Index of maximum change in rolling median max_diff_index = moving_diff.argmax() - if max_diff_index == HW-1 or index not in cleaning_events: + if max_diff_index == HW - 1 or index not in cleaning_events: # The median zs of the week after the cleaning event - z_med = rolling_median_local[HW+3] + z_med = rolling_median_local[HW + 3] # Set control input this future median u[0] = z_med - np.dot(f.H, np.dot(f.F, f.x)).item() # If the change is bigger than the measurement noise: - if np.abs(u[0]) > np.sqrt(f.R)/2: - index_dummy = [n+3 for n in range(window_size-HW-1) - if n+3 != HW] - cleaning_events = [ce for ce in cleaning_events - if ce-index+HW not in index_dummy] + if np.abs(u[0]) > np.sqrt(f.R) / 2: + index_dummy = [n + 3 for n in range(window_size - HW - 1) if n + 3 != HW] + cleaning_events = [ + ce for ce in cleaning_events if ce - index + HW not in index_dummy + ] else: # If the cleaning event is insignificant u[0] = 0 if index in cleaning_events: cleaning_events.remove(index) else: # If the index with the maximum difference is not today... cleaning_events.remove(index) # ...remove today from the list - if moving_diff[max_diff_index] > 0 \ - and index+max_diff_index-HW+1 not in cleaning_events: + if ( + moving_diff[max_diff_index] > 0 + and index + max_diff_index - HW + 1 not in cleaning_events + ): # ...and add the missing day - bisect.insort(cleaning_events, index+max_diff_index-HW+1) + bisect.insort(cleaning_events, index + max_diff_index - HW + 1) return u - def _smooth_results(self, dfk, f, Xs, Ps, zs_series, cleaning_events, - soiling_events, perfect_cleaning): - ''' Smoother for Kalman Filter estimates. Smooths the Kalaman estimate - between given cleaning events and saves all in DataFrame dfk''' + def _smooth_results( + self, dfk, f, Xs, Ps, zs_series, cleaning_events, soiling_events, perfect_cleaning + ): + """Smoother for Kalman Filter estimates. Smooths the Kalaman estimate + between given cleaning events and saves all in DataFrame dfk""" # Save unsmoothed estimates dfk.raw_pi = Xs[:, 0] dfk.raw_rates = Xs[:, 1] @@ -2256,8 +2418,7 @@ def _smooth_results(self, dfk, f, Xs, Ps, zs_series, cleaning_events, # Smooth between cleaning events for start, end in zip(ce_dummy[:-1], ce_dummy[1:]): num_ind = df_num_ind.loc[start:end].iloc[:-1] - Xs[num_ind], Ps[num_ind], _, _ = f.rts_smoother(Xs[num_ind], - Ps[num_ind]) + Xs[num_ind], Ps[num_ind], _, _ = f.rts_smoother(Xs[num_ind], Ps[num_ind]) # Save smoothed estimates dfk.smooth_pi = Xs[:, 0] @@ -2265,17 +2426,15 @@ def _smooth_results(self, dfk, f, Xs, Ps, zs_series, cleaning_events, return dfk, Xs, Ps - def _initialize_univariate_model(self, zs_series, dt, process_noise, - measurement_noise, rate_std, zs_std, - initial_slope): - ''' Initializes the univariate Kalman Filter model, using the filterpy - package ''' + def _initialize_univariate_model( + self, zs_series, dt, process_noise, measurement_noise, rate_std, zs_std, initial_slope + ): + """Initializes the univariate Kalman Filter model, using the filterpy + package""" f = KalmanFilter(dim_x=2, dim_z=1) - f.F = np.array([[1., dt], - [0., 1.]]) - f.H = np.array([[1., 0.]]) - f.P = np.array([[zs_std**2, 0], - [0, rate_std**2]]) + f.F = np.array([[1.0, dt], [0.0, 1.0]]) + f.H = np.array([[1.0, 0.0]]) + f.P = np.array([[zs_std**2, 0], [0, rate_std**2]]) f.Q = Q_discrete_white_noise(dim=2, dt=dt, var=process_noise**2) f.x = np.array([initial_slope[1], initial_slope[0]]) f.B = np.zeros(f.F.shape) @@ -2284,19 +2443,20 @@ def _initialize_univariate_model(self, zs_series, dt, process_noise, return f -def soiling_cods(energy_normalized_daily, - reps=512, - confidence_level=68.2, - degradation_method='YoY', - process_noise=1e-4, - order_alternatives=(('SR', 'SC', 'Rd'), - ('SC', 'SR', 'Rd')), - cleaning_sensitivity_alternatives=(.25, .75), - clean_pruning_sensitivity_alternatives=(1/1.5, 1.5), - forward_fill_alternatives=(True, False), - verbose=False, - **kwargs): - ''' +def soiling_cods( + energy_normalized_daily, + reps=512, + confidence_level=68.2, + degradation_method="YoY", + process_noise=1e-4, + order_alternatives=(("SR", "SC", "Rd"), ("SC", "SR", "Rd")), + cleaning_sensitivity_alternatives=(0.25, 0.75), + clean_pruning_sensitivity_alternatives=(1 / 1.5, 1.5), + forward_fill_alternatives=(True, False), + verbose=False, + **kwargs, +): + """ Functional wrapper for :py:class:`~rdtools.soiling.CODSAnalysis` and its subroutine :py:func:`~rdtools.soiling.CODSAnalysis.run_bootstrap`. Runs the combined degradation and soiling (CODS) algorithm with bootstrapping. @@ -2409,7 +2569,7 @@ def soiling_cods(energy_normalized_daily, ---------- .. [1] Skomedal, Å. and Deceglie, M. G., IEEE Journal of Photovoltaics, Sept. 2020. https://doi.org/10.1109/JPHOTOV.2020.3018219 - ''' + """ CODS = CODSAnalysis(energy_normalized_daily) @@ -2423,17 +2583,17 @@ def soiling_cods(energy_normalized_daily, cleaning_sensitivity_alternatives=cleaning_sensitivity_alternatives, clean_pruning_sensitivity_alternatives=clean_pruning_sensitivity_alternatives, forward_fill_alternatives=forward_fill_alternatives, - **kwargs) + **kwargs, + ) sr = 1 - CODS.soiling_loss[0] / 100 sr_ci = 1 - np.array(CODS.soiling_loss[1:3]) / 100 - return sr, sr_ci, CODS.degradation[0], np.array(CODS.degradation[1:3]), \ - CODS.result_df + return sr, sr_ci, CODS.degradation[0], np.array(CODS.degradation[1:3]), CODS.result_df def _collapse_cleaning_events(inferred_ce_in, metric, f=4): - ''' A function for replacing quick successive cleaning events with one + """A function for replacing quick successive cleaning events with one (most probable) cleaning event. Parameters @@ -2450,10 +2610,9 @@ def _collapse_cleaning_events(inferred_ce_in, metric, f=4): ------- inferred_ce : pandas.Series boolean values for cleaning events - ''' + """ # Ensure numeric index - if isinstance(inferred_ce_in.index, - pd.core.indexes.datetimes.DatetimeIndex): + if isinstance(inferred_ce_in.index, pd.core.indexes.datetimes.DatetimeIndex): saveindex = inferred_ce_in.copy().index inferred_ce_in.index = range(len(saveindex)) else: @@ -2473,11 +2632,10 @@ def _collapse_cleaning_events(inferred_ce_in, metric, f=4): end_true_vals = collapsed_ce_dummy.loc[start_true_vals:].idxmin() - 1 if end_true_vals >= start_true_vals: # If the island ends # Find the day with mac probability of being a cleaning event - max_diff_day = \ - metric.loc[start_true_vals-f:end_true_vals+f].idxmax() + max_diff_day = metric.loc[start_true_vals - f : end_true_vals + f].idxmax() # Set all days in this period as false - collapsed_ce.loc[start_true_vals-f:end_true_vals+f] = False - collapsed_ce_dummy.loc[start_true_vals-f:end_true_vals+f] = False + collapsed_ce.loc[start_true_vals - f : end_true_vals + f] = False + collapsed_ce_dummy.loc[start_true_vals - f : end_true_vals + f] = False # Set the max probability day as True (cleaning event) collapsed_ce.loc[max_diff_day] = True # Find the next island of true values @@ -2491,51 +2649,54 @@ def _collapse_cleaning_events(inferred_ce_in, metric, f=4): def _rolling_median_ce_detection(x, y, ffill=True, rolling_window=9, tuner=1.5): - ''' Finds cleaning events in a time series of performance index (y) ''' + """Finds cleaning events in a time series of performance index (y)""" y = pd.Series(index=x, data=y) y = y.astype(float) if ffill: # forward fill NaNs in y before running mean rm = y.ffill().rolling(rolling_window, center=True).median() else: # ... or backfill instead rm = y.bfill().rolling(rolling_window, center=True).median() - Q3 = rm.diff().abs().quantile(.75) - Q1 = rm.diff().abs().quantile(.25) + Q3 = rm.diff().abs().quantile(0.75) + Q1 = rm.diff().abs().quantile(0.25) limit = Q3 + tuner * (Q3 - Q1) cleaning_events = rm.diff() > limit return cleaning_events, rm def _soiling_event_detection(x, y, ffill=True, tuner=5): - ''' Finds cleaning events in a time series of performance index (y) ''' + """Finds cleaning events in a time series of performance index (y)""" y = pd.Series(index=x, data=y) y = y.astype(float) if ffill: # forward fill NaNs in y before running mean rm = y.ffill().rolling(9, center=True).median() else: # ... or backfill instead rm = y.bfill().rolling(9, center=True).median() - Q3 = rm.diff().abs().quantile(.99) - Q1 = rm.diff().abs().quantile(.01) + Q3 = rm.diff().abs().quantile(0.99) + Q1 = rm.diff().abs().quantile(0.01) limit = Q1 - tuner * (Q3 - Q1) soiling_events = rm.diff() < limit return soiling_events -def _make_seasonal_samples(list_of_SCs, sample_nr=10, min_multiplier=0.5, - max_multiplier=2, max_shift=20): - ''' Generate seasonal samples by perturbing the amplitude and the phase of - a seasonal components found with the fitted CODS model ''' - samples = pd.DataFrame(index=list_of_SCs[0].index, - columns=range(int(sample_nr*len(list_of_SCs))), - dtype=float) +def _make_seasonal_samples( + list_of_SCs, sample_nr=10, min_multiplier=0.5, max_multiplier=2, max_shift=20 +): + """Generate seasonal samples by perturbing the amplitude and the phase of + a seasonal components found with the fitted CODS model""" + samples = pd.DataFrame( + index=list_of_SCs[0].index, columns=range(int(sample_nr * len(list_of_SCs))), dtype=float + ) # From each fitted signal, we will generate new seaonal components for i, signal in enumerate(list_of_SCs): # Remove beginning and end of signal signal_mean = signal.mean() # Make a signal matrix where each column is a year and each row a date - year_matrix = signal.rename('values').to_frame().assign( - doy=signal.index.dayofyear, - year=signal.index.year - ).pivot(index='doy', columns='year', values='values') + year_matrix = ( + signal.rename("values") + .to_frame() + .assign(doy=signal.index.dayofyear, year=signal.index.year) + .pivot(index="doy", columns="year", values="values") + ) # We will use the median signal through all the years... median_signal = year_matrix.median(axis=1) for j in range(sample_nr): @@ -2546,25 +2707,22 @@ def _make_seasonal_samples(list_of_SCs, sample_nr=10, min_multiplier=0.5, # constructing the new signal based on median_signal shifted_signal = pd.Series( index=signal.index, - data=median_signal.reindex( - (signal.index.dayofyear-shift) % 365 + 1).values) + data=median_signal.reindex((signal.index.dayofyear - shift) % 365 + 1).values, + ) # Perturb amplitude by recentering to 0 multiplying by multiplier - samples.loc[:, i*sample_nr + j] = \ - multiplier * (shifted_signal - signal_mean) + 1 + samples.loc[:, i * sample_nr + j] = multiplier * (shifted_signal - signal_mean) + 1 return samples def _force_periodicity(in_signal, signal_index, out_index): - ''' Function for forcing periodicity in a seasonal component signal ''' + """Function for forcing periodicity in a seasonal component signal""" # Make sure the in_signal is a Series if isinstance(in_signal, np.ndarray): - signal = pd.Series(index=pd.DatetimeIndex(signal_index.date), - data=in_signal) + signal = pd.Series(index=pd.DatetimeIndex(signal_index.date), data=in_signal) elif isinstance(in_signal, pd.Series): - signal = pd.Series(index=pd.DatetimeIndex(signal_index.date), - data=in_signal.values) + signal = pd.Series(index=pd.DatetimeIndex(signal_index.date), data=in_signal.values) else: - raise ValueError('in_signal must be numpy array or pandas Series') + raise ValueError("in_signal must be numpy array or pandas Series") # Make sure that we don't remove too much of the data: remove_length = np.min([180, int((len(signal) - 365) / 2)]) @@ -2576,66 +2734,62 @@ def _force_periodicity(in_signal, signal_index, out_index): # Make a signal matrix where each column is a year and each row is a date year_matrix = pd.DataFrame(index=np.arange(0, 365), columns=unique_years) for year in unique_years: - dates_in_year = pd.date_range(str(year)+'-01-01', str(year)+'-12-31') + dates_in_year = pd.date_range(str(year) + "-01-01", str(year) + "-12-31") # We cut off the extra day(s) of leap years - year_matrix[year] = \ - signal.loc[str(year)].reindex(dates_in_year).values[:365] + year_matrix[year] = signal.loc[str(year)].reindex(dates_in_year).values[:365] # We will use the median signal through all the years... median_signal = year_matrix.median(axis=1) # The output is the median signal broadcasted to the whole time series - output = pd.Series( - index=out_index, - data=median_signal.reindex(out_index.dayofyear - 1).values) + output = pd.Series(index=out_index, data=median_signal.reindex(out_index.dayofyear - 1).values) return output -def _find_numeric_outliers(x, multiplier=1.5, where='both', verbose=False): - ''' Function for finding numeric outliers ''' +def _find_numeric_outliers(x, multiplier=1.5, where="both", verbose=False): + """Function for finding numeric outliers""" try: # Calulate third and first quartile - Q3 = np.quantile(x, .75) - Q1 = np.quantile(x, .25) + Q3 = np.quantile(x, 0.75) + Q1 = np.quantile(x, 0.25) except IndexError as ie: print(ie, x) except RuntimeWarning as rw: print(rw, x) IQR = Q3 - Q1 # Interquartile range - if where == 'upper': # If detecting upper outliers + if where == "upper": # If detecting upper outliers if verbose: - print('Upper limit', Q3 + multiplier * IQR) - return (x > Q3 + multiplier * IQR) - elif where == 'lower': # If detecting lower outliers + print("Upper limit", Q3 + multiplier * IQR) + return x > Q3 + multiplier * IQR + elif where == "lower": # If detecting lower outliers if verbose: - print('Lower limit', Q1 - multiplier * IQR) - return (x < Q1 - multiplier * IQR) - elif where == 'both': # If detecting both lower and upper outliers + print("Lower limit", Q1 - multiplier * IQR) + return x < Q1 - multiplier * IQR + elif where == "both": # If detecting both lower and upper outliers if verbose: - print('Upper, lower limit', - Q3 + multiplier * IQR, - Q1 - multiplier * IQR) + print("Upper, lower limit", Q3 + multiplier * IQR, Q1 - multiplier * IQR) return (x > Q3 + multiplier * IQR), (x < Q1 - multiplier * IQR) def _RMSE(y_true, y_pred): - '''Calculates the Root Mean Squared Error for y_true and y_pred, where - y_pred is the "prediction", and y_true is the truth.''' + """Calculates the Root Mean Squared Error for y_true and y_pred, where + y_pred is the "prediction", and y_true is the truth.""" mask = ~np.isnan(y_pred) - return np.sqrt(np.mean((y_pred[mask]-y_true[mask])**2)) + return np.sqrt(np.mean((y_pred[mask] - y_true[mask]) ** 2)) def _MSD(y_true, y_pred): - '''Calculates the Mean Signed Deviation for y_true and y_pred, where y_pred - is the "prediction", and y_true is the truth.''' + """Calculates the Mean Signed Deviation for y_true and y_pred, where y_pred + is the "prediction", and y_true is the truth.""" return np.mean(y_pred - y_true) def _progressBarWithETA(value, endvalue, time, bar_length=20): - ''' Prints a progressbar with an estimated time of "arrival" ''' + """Prints a progressbar with an estimated time of "arrival" """ percent = float(value) / endvalue * 100 - arrow = '-' * int(round(percent/100 * bar_length)-1) + '>' - spaces = ' ' * (bar_length - len(arrow)) + arrow = "-" * int(round(percent / 100 * bar_length) - 1) + ">" + spaces = " " * (bar_length - len(arrow)) used = time / 60 # Time Used - left = used / percent*(100-percent) # Estimated time left + left = used / percent * (100 - percent) # Estimated time left sys.stdout.write( - "\r# {:} | Used: {:.1f} min | Left: {:.1f}".format(value, used, left) + - " min | Progress: [{:}] {:.0f} %".format(arrow + spaces, percent)) + "\r# {:} | Used: {:.1f} min | Left: {:.1f}".format(value, used, left) + + " min | Progress: [{:}] {:.0f} %".format(arrow + spaces, percent) + ) sys.stdout.flush() diff --git a/rdtools/test/soiling_test.py b/rdtools/test/soiling_test.py index 464f6a18..3fb55b00 100644 --- a/rdtools/test/soiling_test.py +++ b/rdtools/test/soiling_test.py @@ -13,171 +13,244 @@ def test_soiling_srr(soiling_normalized_daily, soiling_insolation, soiling_times reps = 10 np.random.seed(1977) sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, reps=reps) - assert 0.964369 == pytest.approx(sr, abs=1e-6), \ - 'Soiling ratio different from expected value' - assert np.array([0.962540, 0.965295]) == pytest.approx(sr_ci, abs=1e-6), \ - 'Confidence interval different from expected value' - assert 0.960205 == pytest.approx(soiling_info['exceedance_level'], abs=1e-6), \ - 'Exceedance level different from expected value' - assert 0.984079 == pytest.approx(soiling_info['renormalizing_factor'], abs=1e-6), \ - 'Renormalizing factor different from expected value' - assert len(soiling_info['stochastic_soiling_profiles']) == reps, \ - 'Length of soiling_info["stochastic_soiling_profiles"] different than expected' - assert isinstance(soiling_info['stochastic_soiling_profiles'], list), \ - 'soiling_info["stochastic_soiling_profiles"] is not a list' + assert 0.964369 == pytest.approx(sr, abs=1e-6), "Soiling ratio different from expected value" + assert np.array([0.962540, 0.965295]) == pytest.approx( + sr_ci, abs=1e-6 + ), "Confidence interval different from expected value" + assert 0.960205 == pytest.approx( + soiling_info["exceedance_level"], abs=1e-6 + ), "Exceedance level different from expected value" + assert 0.984079 == pytest.approx( + soiling_info["renormalizing_factor"], abs=1e-6 + ), "Renormalizing factor different from expected value" + assert ( + len(soiling_info["stochastic_soiling_profiles"]) == reps + ), 'Length of soiling_info["stochastic_soiling_profiles"] different than expected' + assert isinstance( + soiling_info["stochastic_soiling_profiles"], list + ), 'soiling_info["stochastic_soiling_profiles"] is not a list' # Check soiling_info['soiling_interval_summary'] - expected_summary_columns = ['start', 'end', 'soiling_rate', 'soiling_rate_low', - 'soiling_rate_high', 'inferred_start_loss', 'inferred_end_loss', - 'length', 'valid'] - actual_summary_columns = soiling_info['soiling_interval_summary'].columns.values + expected_summary_columns = [ + "start", + "end", + "soiling_rate", + "soiling_rate_low", + "soiling_rate_high", + "inferred_start_loss", + "inferred_end_loss", + "length", + "valid", + ] + actual_summary_columns = soiling_info["soiling_interval_summary"].columns.values for x in actual_summary_columns: - assert x in expected_summary_columns, \ - f"'{x}' not an expected column in soiling_info['soiling_interval_summary']" + assert ( + x in expected_summary_columns + ), f"'{x}' not an expected column in soiling_info['soiling_interval_summary']" for x in expected_summary_columns: - assert x in actual_summary_columns, \ - f"'{x}' was expected as a column, but not in soiling_info['soiling_interval_summary']" - assert isinstance(soiling_info['soiling_interval_summary'], pd.DataFrame), \ - 'soiling_info["soiling_interval_summary"] not a dataframe' - expected_means = pd.Series({'soiling_rate': -0.002644544, - 'soiling_rate_low': -0.002847504, - 'soiling_rate_high': -0.002455915, - 'inferred_start_loss': 1.020124, - 'inferred_end_loss': 0.9566552, - 'length': 24.0, - 'valid': 1.0}) - expected_means = expected_means[['soiling_rate', 'soiling_rate_low', 'soiling_rate_high', - 'inferred_start_loss', 'inferred_end_loss', - 'length', 'valid']] - actual_means = soiling_info['soiling_interval_summary'][expected_means.index].mean() + assert ( + x in actual_summary_columns + ), f"'{x}' was expected as a column, but not in soiling_info['soiling_interval_summary']" + assert isinstance( + soiling_info["soiling_interval_summary"], pd.DataFrame + ), 'soiling_info["soiling_interval_summary"] not a dataframe' + expected_means = pd.Series( + { + "soiling_rate": -0.002644544, + "soiling_rate_low": -0.002847504, + "soiling_rate_high": -0.002455915, + "inferred_start_loss": 1.020124, + "inferred_end_loss": 0.9566552, + "length": 24.0, + "valid": 1.0, + } + ) + expected_means = expected_means[ + [ + "soiling_rate", + "soiling_rate_low", + "soiling_rate_high", + "inferred_start_loss", + "inferred_end_loss", + "length", + "valid", + ] + ] + actual_means = soiling_info["soiling_interval_summary"][expected_means.index].mean() pd.testing.assert_series_equal(expected_means, actual_means, check_exact=False) # Check soiling_info['soiling_ratio_perfect_clean'] - pd.testing.assert_index_equal(soiling_info['soiling_ratio_perfect_clean'].index, soiling_times, - check_names=False) - sr_mean = soiling_info['soiling_ratio_perfect_clean'].mean() - assert 0.968265 == pytest.approx(sr_mean, abs=1e-6), \ - "The mean of soiling_info['soiling_ratio_perfect_clean'] differs from expected" - assert isinstance(soiling_info['soiling_ratio_perfect_clean'], pd.Series), \ - 'soiling_info["soiling_ratio_perfect_clean"] not a pandas series' + pd.testing.assert_index_equal( + soiling_info["soiling_ratio_perfect_clean"].index, soiling_times, check_names=False + ) + sr_mean = soiling_info["soiling_ratio_perfect_clean"].mean() + assert 0.968265 == pytest.approx( + sr_mean, abs=1e-6 + ), "The mean of soiling_info['soiling_ratio_perfect_clean'] differs from expected" + assert isinstance( + soiling_info["soiling_ratio_perfect_clean"], pd.Series + ), 'soiling_info["soiling_ratio_perfect_clean"] not a pandas series' @pytest.mark.filterwarnings("ignore:.*20% or more of the daily data.*:UserWarning") -@pytest.mark.parametrize('method,expected_sr', - [('random_clean', 0.936177), - ('half_norm_clean', 0.915093), - ('perfect_clean', 0.977116)]) -def test_soiling_srr_consecutive_invalid(soiling_normalized_daily, soiling_insolation, - soiling_times, method, expected_sr): +@pytest.mark.parametrize( + "method,expected_sr", + [("random_clean", 0.936177), ("half_norm_clean", 0.915093), ("perfect_clean", 0.977116)], +) +def test_soiling_srr_consecutive_invalid( + soiling_normalized_daily, soiling_insolation, soiling_times, method, expected_sr +): reps = 10 np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, reps=reps, - max_relative_slope_error=20.0, method=method) - assert expected_sr == pytest.approx(sr, abs=1e-6), \ - f'Soiling ratio different from expected value for {method} with consecutive invalid intervals' # noqa: E501 - - -@pytest.mark.parametrize('clean_criterion,expected_sr', - [('precip_and_shift', 0.982546), - ('precip_or_shift', 0.973433), - ('precip', 0.976196), - ('shift', 0.964369)]) -def test_soiling_srr_with_precip(soiling_normalized_daily, soiling_insolation, soiling_times, - clean_criterion, expected_sr): + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, + soiling_insolation, + reps=reps, + max_relative_slope_error=20.0, + method=method, + ) + assert expected_sr == pytest.approx( + sr, abs=1e-6 + ), f"Soiling ratio different from expected value for {method} with consecutive invalid intervals" # noqa: E501 + + +@pytest.mark.parametrize( + "clean_criterion,expected_sr", + [ + ("precip_and_shift", 0.982546), + ("precip_or_shift", 0.973433), + ("precip", 0.976196), + ("shift", 0.964369), + ], +) +def test_soiling_srr_with_precip( + soiling_normalized_daily, soiling_insolation, soiling_times, clean_criterion, expected_sr +): precip = pd.Series(index=soiling_times, data=0) - precip['2019-01-18 00:00:00-07:00'] = 1 - precip['2019-02-20 00:00:00-07:00'] = 1 + precip["2019-01-18 00:00:00-07:00"] = 1 + precip["2019-02-20 00:00:00-07:00"] = 1 - kwargs = { - 'reps': 10, - 'precipitation_daily': precip - } + kwargs = {"reps": 10, "precipitation_daily": precip} np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, - clean_criterion=clean_criterion, **kwargs) - assert expected_sr == pytest.approx(sr, abs=1e-6), \ - f"Soiling ratio with clean_criterion='{clean_criterion}' different from expected" + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, soiling_insolation, clean_criterion=clean_criterion, **kwargs + ) + assert expected_sr == pytest.approx( + sr, abs=1e-6 + ), f"Soiling ratio with clean_criterion='{clean_criterion}' different from expected" def test_soiling_srr_confidence_levels(soiling_normalized_daily, soiling_insolation): - 'Tests SRR with different confidence level settingsf from above' + "Tests SRR with different confidence level settingsf from above" np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, - confidence_level=95, reps=10, exceedance_prob=80.0) - assert np.array([0.959322, 0.966066]) == pytest.approx(sr_ci, abs=1e-6), \ - 'Confidence interval with confidence_level=95 different than expected' - assert 0.962691 == pytest.approx(soiling_info['exceedance_level'], abs=1e-6), \ - 'soiling_info["exceedance_level"] different than expected when exceedance_prob=80' + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, + soiling_insolation, + confidence_level=95, + reps=10, + exceedance_prob=80.0, + ) + assert np.array([0.959322, 0.966066]) == pytest.approx( + sr_ci, abs=1e-6 + ), "Confidence interval with confidence_level=95 different than expected" + assert 0.962691 == pytest.approx( + soiling_info["exceedance_level"], abs=1e-6 + ), 'soiling_info["exceedance_level"] different than expected when exceedance_prob=80' def test_soiling_srr_dayscale(soiling_normalized_daily, soiling_insolation): - 'Test that a long dayscale can prevent valid intervals from being found' + "Test that a long dayscale can prevent valid intervals from being found" with pytest.raises(NoValidIntervalError): np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, - confidence_level=68.2, reps=10, day_scale=91) + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, + soiling_insolation, + confidence_level=68.2, + reps=10, + day_scale=91, + ) def test_soiling_srr_clean_threshold(soiling_normalized_daily, soiling_insolation): - '''Test that clean test_soiling_srr_clean_threshold works with a float and - can cause no soiling intervals to be found''' + """Test that clean test_soiling_srr_clean_threshold works with a float and + can cause no soiling intervals to be found""" np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, reps=10, - clean_threshold=0.01) - assert 0.964369 == pytest.approx(sr, abs=1e-6), \ - 'Soiling ratio with specified clean_threshold different from expected value' + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, soiling_insolation, reps=10, clean_threshold=0.01 + ) + assert 0.964369 == pytest.approx( + sr, abs=1e-6 + ), "Soiling ratio with specified clean_threshold different from expected value" with pytest.raises(NoValidIntervalError): np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, - reps=10, clean_threshold=0.1) + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, soiling_insolation, reps=10, clean_threshold=0.1 + ) def test_soiling_srr_trim(soiling_normalized_daily, soiling_insolation): np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, reps=10, - trim=True) + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, soiling_insolation, reps=10, trim=True + ) - assert 0.978093 == pytest.approx(sr, abs=1e-6), \ - 'Soiling ratio with trim=True different from expected value' - assert len(soiling_info['soiling_interval_summary']) == 1, \ - 'Wrong number of soiling intervals found with trim=True' + assert 0.978093 == pytest.approx( + sr, abs=1e-6 + ), "Soiling ratio with trim=True different from expected value" + assert ( + len(soiling_info["soiling_interval_summary"]) == 1 + ), "Wrong number of soiling intervals found with trim=True" -@pytest.mark.parametrize('method,expected_sr', - [('random_clean', 0.920444), - ('perfect_clean', 0.966912) - ]) +@pytest.mark.parametrize( + "method,expected_sr", [("random_clean", 0.920444), ("perfect_clean", 0.966912)] +) def test_soiling_srr_method(soiling_normalized_daily, soiling_insolation, method, expected_sr): np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, reps=10, - method=method) - assert expected_sr == pytest.approx(sr, abs=1e-6), \ - f'Soiling ratio with method="{method}" different from expected value' + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, soiling_insolation, reps=10, method=method + ) + assert expected_sr == pytest.approx( + sr, abs=1e-6 + ), f'Soiling ratio with method="{method}" different from expected value' def test_soiling_srr_min_interval_length(soiling_normalized_daily, soiling_insolation): - 'Test that a long minimum interval length prevents finding shorter intervals' + "Test that a long minimum interval length prevents finding shorter intervals" with pytest.raises(NoValidIntervalError): np.random.seed(1977) # normalized_daily intervals are 25 days long, so min=26 should fail: - _ = soiling_srr(soiling_normalized_daily, soiling_insolation, confidence_level=68.2, - reps=10, min_interval_length=26) + _ = soiling_srr( + soiling_normalized_daily, + soiling_insolation, + confidence_level=68.2, + reps=10, + min_interval_length=26, + ) # but min=24 should be fine: - _ = soiling_srr(soiling_normalized_daily, soiling_insolation, confidence_level=68.2, - reps=10, min_interval_length=24) + _ = soiling_srr( + soiling_normalized_daily, + soiling_insolation, + confidence_level=68.2, + reps=10, + min_interval_length=24, + ) def test_soiling_srr_recenter_false(soiling_normalized_daily, soiling_insolation): np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, reps=10, - recenter=False) - assert 1 == soiling_info['renormalizing_factor'], \ - 'Renormalizing factor != 1 with recenter=False' - assert 0.966387 == pytest.approx(sr, abs=1e-6), \ - 'Soiling ratio different than expected when recenter=False' + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, soiling_insolation, reps=10, recenter=False + ) + assert ( + 1 == soiling_info["renormalizing_factor"] + ), "Renormalizing factor != 1 with recenter=False" + assert 0.966387 == pytest.approx( + sr, abs=1e-6 + ), "Soiling ratio different than expected when recenter=False" def test_soiling_srr_negative_step(soiling_normalized_daily, soiling_insolation): @@ -185,97 +258,111 @@ def test_soiling_srr_negative_step(soiling_normalized_daily, soiling_insolation) stepped_daily.iloc[37:] = stepped_daily.iloc[37:] - 0.1 np.random.seed(1977) - with pytest.warns(UserWarning, match='20% or more of the daily data'): + with pytest.warns(UserWarning, match="20% or more of the daily data"): sr, sr_ci, soiling_info = soiling_srr(stepped_daily, soiling_insolation, reps=10) - assert list(soiling_info['soiling_interval_summary']['valid'].values) == [True, False, True], \ - 'Soiling interval validity differs from expected when a large negative step\ - is incorporated into the data' + assert list(soiling_info["soiling_interval_summary"]["valid"].values) == [ + True, + False, + True, + ], "Soiling interval validity differs from expected when a large negative step\ + is incorporated into the data" - assert 0.936932 == pytest.approx(sr, abs=1e-6), \ - 'Soiling ratio different from expected when a large negative step is incorporated into the data' # noqa: E501 + assert 0.936932 == pytest.approx( + sr, abs=1e-6 + ), "Soiling ratio different from expected when a large negative step is incorporated into the data" # noqa: E501 def test_soiling_srr_max_negative_slope_error(soiling_normalized_daily, soiling_insolation): np.random.seed(1977) - with pytest.warns(UserWarning, match='20% or more of the daily data'): - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily, soiling_insolation, - reps=10, max_relative_slope_error=45.0) + with pytest.warns(UserWarning, match="20% or more of the daily data"): + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily, soiling_insolation, reps=10, max_relative_slope_error=45.0 + ) - assert list(soiling_info['soiling_interval_summary']['valid'].values) == [True, True, False], \ - 'Soiling interval validity differs from expected when max_relative_slope_error=45.0' + assert list(soiling_info["soiling_interval_summary"]["valid"].values) == [ + True, + True, + False, + ], "Soiling interval validity differs from expected when max_relative_slope_error=45.0" - assert 0.958761 == pytest.approx(sr, abs=1e-6), \ - 'Soiling ratio different from expected when max_relative_slope_error=45.0' + assert 0.958761 == pytest.approx( + sr, abs=1e-6 + ), "Soiling ratio different from expected when max_relative_slope_error=45.0" def test_soiling_srr_with_nan_interval(soiling_normalized_daily, soiling_insolation): - ''' + """ Previous versions had a bug which would have raised an error when an entire interval was NaN. See https://github.com/NatLabRockies/rdtools/issues/129 - ''' + """ reps = 10 normalized_corrupt = soiling_normalized_daily.copy() normalized_corrupt[26:50] = np.nan np.random.seed(1977) - with pytest.warns(UserWarning, match='20% or more of the daily data'): + with pytest.warns(UserWarning, match="20% or more of the daily data"): sr, sr_ci, soiling_info = soiling_srr(normalized_corrupt, soiling_insolation, reps=reps) - assert 0.948792 == pytest.approx(sr, abs=1e-6), \ - 'Soiling ratio different from expected value when an entire interval was NaN' + assert 0.948792 == pytest.approx( + sr, abs=1e-6 + ), "Soiling ratio different from expected value when an entire interval was NaN" def test_soiling_srr_outlier_factor(soiling_normalized_daily, soiling_insolation): - _, _, info = soiling_srr(soiling_normalized_daily, soiling_insolation, - reps=1, outlier_factor=8) - assert len(info['soiling_interval_summary']) == 2, \ - 'Increasing the outlier_factor did not result in the expected number of soiling intervals' + _, _, info = soiling_srr( + soiling_normalized_daily, soiling_insolation, reps=1, outlier_factor=8 + ) + assert ( + len(info["soiling_interval_summary"]) == 2 + ), "Increasing the outlier_factor did not result in the expected number of soiling intervals" def test_soiling_srr_kwargs(monkeypatch, soiling_normalized_daily, soiling_insolation): - ''' + """ Make sure that all soiling_srr parameters get passed on to SRRAnalysis and SRRAnalysis.run(), i.e. all necessary inputs to SRRAnalysis are provided by soiling_srr. Done by removing the SRRAnalysis default param values and making sure everything still runs. - ''' + """ # the __defaults__ attr is the tuple of default values in py3 monkeypatch.delattr(SRRAnalysis.__init__, "__defaults__") monkeypatch.delattr(SRRAnalysis.run, "__defaults__") _ = soiling_srr(soiling_normalized_daily, soiling_insolation, reps=10) -@pytest.mark.parametrize(('start,expected_sr'), - [(18, 0.984779), (17, 0.981258)]) -def test_soiling_srr_min_interval_length_default(soiling_normalized_daily, soiling_insolation, - start, expected_sr): - ''' +@pytest.mark.parametrize(("start,expected_sr"), [(18, 0.984779), (17, 0.981258)]) +def test_soiling_srr_min_interval_length_default( + soiling_normalized_daily, soiling_insolation, start, expected_sr +): + """ Make sure that the default value of min_interval_length is 7 days by testing on a cropped version of the example data - ''' + """ reps = 10 np.random.seed(1977) - sr, sr_ci, soiling_info = soiling_srr(soiling_normalized_daily[start:], - soiling_insolation[start:], reps=reps) - assert expected_sr == pytest.approx(sr, abs=1e-6), \ - 'Soiling ratio different from expected value' + sr, sr_ci, soiling_info = soiling_srr( + soiling_normalized_daily[start:], soiling_insolation[start:], reps=reps + ) + assert expected_sr == pytest.approx( + sr, abs=1e-6 + ), "Soiling ratio different from expected value" -@pytest.mark.parametrize('test_param', ['energy_normalized_daily', - 'insolation_daily', - 'precipitation_daily']) +@pytest.mark.parametrize( + "test_param", ["energy_normalized_daily", "insolation_daily", "precipitation_daily"] +) def test_soiling_srr_non_daily_inputs(test_param): - ''' + """ Validate the frequency check for input time series - ''' - dummy_daily_explicit = pd.Series(0, index=pd.date_range('2019-01-01', periods=10, freq='d')) - dummy_daily_implicit = pd.Series(0, index=pd.date_range('2019-01-01', periods=10, freq='d')) + """ + dummy_daily_explicit = pd.Series(0, index=pd.date_range("2019-01-01", periods=10, freq="d")) + dummy_daily_implicit = pd.Series(0, index=pd.date_range("2019-01-01", periods=10, freq="d")) dummy_daily_implicit.index.freq = None dummy_nondaily = pd.Series(0, index=dummy_daily_explicit.index[::2]) kwargs = { - 'energy_normalized_daily': dummy_daily_explicit, - 'insolation_daily': dummy_daily_explicit, - 'precipitation_daily': dummy_daily_explicit, + "energy_normalized_daily": dummy_daily_explicit, + "insolation_daily": dummy_daily_explicit, + "precipitation_daily": dummy_daily_explicit, } # no error for implicit daily inputs kwargs[test_param] = dummy_daily_implicit @@ -283,27 +370,27 @@ def test_soiling_srr_non_daily_inputs(test_param): # yes error for non-daily inputs kwargs[test_param] = dummy_nondaily - with pytest.raises(ValueError, match='must have daily frequency'): + with pytest.raises(ValueError, match="must have daily frequency"): _ = SRRAnalysis(**kwargs) def test_soiling_srr_argument_checks(soiling_normalized_daily, soiling_insolation): - ''' + """ Make sure various argument validation warnings and errors are raised - ''' + """ kwargs = { - 'energy_normalized_daily': soiling_normalized_daily, - 'insolation_daily': soiling_insolation, - 'reps': 10 + "energy_normalized_daily": soiling_normalized_daily, + "insolation_daily": soiling_insolation, + "reps": 10, } - with pytest.warns(UserWarning, match='An even value of day_scale was passed'): + with pytest.warns(UserWarning, match="An even value of day_scale was passed"): _ = soiling_srr(day_scale=12, **kwargs) - with pytest.raises(ValueError, match='clean_criterion must be one of'): - _ = soiling_srr(clean_criterion='bad', **kwargs) + with pytest.raises(ValueError, match="clean_criterion must be one of"): + _ = soiling_srr(clean_criterion="bad", **kwargs) - with pytest.raises(ValueError, match='Invalid method specification'): - _ = soiling_srr(method='bad', **kwargs) + with pytest.raises(ValueError, match="Invalid method specification"): + _ = soiling_srr(method="bad", **kwargs) # ########################### @@ -313,25 +400,25 @@ def test_soiling_srr_argument_checks(soiling_normalized_daily, soiling_insolatio @pytest.fixture() def multi_year_profiles(): - times = pd.date_range('01-01-2018', '11-30-2019', freq='D') - data = np.array([0]*365 + [10]*334) + times = pd.date_range("01-01-2018", "11-30-2019", freq="D") + data = np.array([0] * 365 + [10] * 334) profiles = [pd.Series(x + data, times) for x in range(10)] # make insolation slighly longer to test for proper normalization - times = pd.date_range('01-01-2018', '12-31-2019', freq='D') - insolation = 350*[0.8] + (len(times)-350)*[1] + times = pd.date_range("01-01-2018", "12-31-2019", freq="D") + insolation = 350 * [0.8] + (len(times) - 350) * [1] insolation = pd.Series(insolation, index=times) return profiles, insolation def test_annual_soiling_ratios(multi_year_profiles): - expected_data = np.array([[2018, 4.5, 1.431, 7.569], - [2019, 14.5, 11.431, 17.569]]) - expected = pd.DataFrame(data=expected_data, - columns=['year', 'soiling_ratio_median', 'soiling_ratio_low', - 'soiling_ratio_high']) - expected['year'] = expected['year'].astype(int) + expected_data = np.array([[2018, 4.5, 1.431, 7.569], [2019, 14.5, 11.431, 17.569]]) + expected = pd.DataFrame( + data=expected_data, + columns=["year", "soiling_ratio_median", "soiling_ratio_low", "soiling_ratio_high"], + ) + expected["year"] = expected["year"].astype(int) srr_profiles, insolation = multi_year_profiles result = annual_soiling_ratios(srr_profiles, insolation) @@ -340,12 +427,12 @@ def test_annual_soiling_ratios(multi_year_profiles): def test_annual_soiling_ratios_confidence_interval(multi_year_profiles): - expected_data = np.array([[2018, 4.5, 0.225, 8.775], - [2019, 14.5, 10.225, 18.775]]) - expected = pd.DataFrame(data=expected_data, - columns=['year', 'soiling_ratio_median', 'soiling_ratio_low', - 'soiling_ratio_high']) - expected['year'] = expected['year'].astype(int) + expected_data = np.array([[2018, 4.5, 0.225, 8.775], [2019, 14.5, 10.225, 18.775]]) + expected = pd.DataFrame( + data=expected_data, + columns=["year", "soiling_ratio_median", "soiling_ratio_low", "soiling_ratio_high"], + ) + expected["year"] = expected["year"].astype(int) srr_profiles, insolation = multi_year_profiles result = annual_soiling_ratios(srr_profiles, insolation, confidence_level=95) @@ -356,9 +443,11 @@ def test_annual_soiling_ratios_confidence_interval(multi_year_profiles): def test_annual_soiling_ratios_warning(multi_year_profiles): srr_profiles, insolation = multi_year_profiles insolation = insolation.iloc[:-200] - match = ('The indexes of stochastic_soiling_profiles are not entirely contained ' - 'within the index of insolation_daily. Every day in stochastic_soiling_profiles ' - 'should be represented in insolation_daily. This may cause erroneous results.') + match = ( + "The indexes of stochastic_soiling_profiles are not entirely contained " + "within the index of insolation_daily. Every day in stochastic_soiling_profiles " + "should be represented in insolation_daily. This may cause erroneous results." + ) with pytest.warns(UserWarning, match=match): _ = annual_soiling_ratios(srr_profiles, insolation) @@ -370,41 +459,48 @@ def test_annual_soiling_ratios_warning(multi_year_profiles): @pytest.fixture() def soiling_interval_summary(): - starts = ['2019/01/01', '2019/01/16', '2019/02/08', '2019/03/06'] - starts = pd.to_datetime(starts).tz_localize('America/Denver') - ends = ['2019/01/15', '2019/02/07', '2019/03/05', '2019/04/07'] - ends = pd.to_datetime(ends).tz_localize('America/Denver') + starts = ["2019/01/01", "2019/01/16", "2019/02/08", "2019/03/06"] + starts = pd.to_datetime(starts).tz_localize("America/Denver") + ends = ["2019/01/15", "2019/02/07", "2019/03/05", "2019/04/07"] + ends = pd.to_datetime(ends).tz_localize("America/Denver") slopes = [-0.005, -0.002, -0.001, -0.002] slopes_low = [-0.0055, -0.0025, -0.0015, -0.003] slopes_high = [-0.004, 0, 0, -0.001] valids = [True, True, False, True] soiling_interval_summary = pd.DataFrame() - soiling_interval_summary['start'] = starts - soiling_interval_summary['end'] = ends - soiling_interval_summary['soiling_rate'] = slopes - soiling_interval_summary['soiling_rate_low'] = slopes_low - soiling_interval_summary['soiling_rate_high'] = slopes_high - soiling_interval_summary['inferred_start_loss'] = np.nan - soiling_interval_summary['inferred_end_loss'] = np.nan - soiling_interval_summary['length'] = (ends - starts).days - soiling_interval_summary['valid'] = valids + soiling_interval_summary["start"] = starts + soiling_interval_summary["end"] = ends + soiling_interval_summary["soiling_rate"] = slopes + soiling_interval_summary["soiling_rate_low"] = slopes_low + soiling_interval_summary["soiling_rate_high"] = slopes_high + soiling_interval_summary["inferred_start_loss"] = np.nan + soiling_interval_summary["inferred_end_loss"] = np.nan + soiling_interval_summary["length"] = (ends - starts).days + soiling_interval_summary["valid"] = valids return soiling_interval_summary def _build_monthly_summary(top_rows): - ''' + """ Convienience function to build a full monthly soiling summary dataframe from the expected_top_rows which summarize Jan-April - ''' - - all_rows = np.vstack((top_rows, [[1, np.nan, np.nan, np.nan, 0]]*8)) - - df = pd.DataFrame(data=all_rows, - columns=['month', 'soiling_rate_median', 'soiling_rate_low', - 'soiling_rate_high', 'interval_count']) - df['month'] = range(1, 13) + """ + + all_rows = np.vstack((top_rows, [[1, np.nan, np.nan, np.nan, 0]] * 8)) + + df = pd.DataFrame( + data=all_rows, + columns=[ + "month", + "soiling_rate_median", + "soiling_rate_low", + "soiling_rate_high", + "interval_count", + ], + ) + df["month"] = range(1, 13) return df @@ -413,11 +509,14 @@ def test_monthly_soiling_rates(soiling_interval_summary): np.random.seed(1977) result = monthly_soiling_rates(soiling_interval_summary) - expected = np.array([ - [1.00000000e+00, -2.42103810e-03, -5.00912766e-03, -7.68551806e-04, 2.00000000e+00], - [2.00000000e+00, -1.25092837e-03, -2.10091842e-03, -3.97354321e-04, 1.00000000e+00], - [3.00000000e+00, -2.00313359e-03, -2.68359541e-03, -1.31927678e-03, 1.00000000e+00], - [4.00000000e+00, -1.99729563e-03, -2.68067699e-03, -1.31667446e-03, 1.00000000e+00]]) + expected = np.array( + [ + [1.00000000e00, -2.42103810e-03, -5.00912766e-03, -7.68551806e-04, 2.00000000e00], + [2.00000000e00, -1.25092837e-03, -2.10091842e-03, -3.97354321e-04, 1.00000000e00], + [3.00000000e00, -2.00313359e-03, -2.68359541e-03, -1.31927678e-03, 1.00000000e00], + [4.00000000e00, -1.99729563e-03, -2.68067699e-03, -1.31667446e-03, 1.00000000e00], + ] + ) expected = _build_monthly_summary(expected) pd.testing.assert_frame_equal(result, expected, check_dtype=False) @@ -427,11 +526,14 @@ def test_monthly_soiling_rates_min_interval_length(soiling_interval_summary): np.random.seed(1977) result = monthly_soiling_rates(soiling_interval_summary, min_interval_length=20) - expected = np.array([ - [1.00000000e+00, -1.24851539e-03, -2.10394564e-03, -3.98358211e-04, 1.00000000e+00], - [2.00000000e+00, -1.25092837e-03, -2.10091842e-03, -3.97330424e-04, 1.00000000e+00], - [3.00000000e+00, -2.00309454e-03, -2.68359541e-03, -1.31927678e-03, 1.00000000e+00], - [4.00000000e+00, -1.99729563e-03, -2.68067699e-03, -1.31667446e-03, 1.00000000e+00]]) + expected = np.array( + [ + [1.00000000e00, -1.24851539e-03, -2.10394564e-03, -3.98358211e-04, 1.00000000e00], + [2.00000000e00, -1.25092837e-03, -2.10091842e-03, -3.97330424e-04, 1.00000000e00], + [3.00000000e00, -2.00309454e-03, -2.68359541e-03, -1.31927678e-03, 1.00000000e00], + [4.00000000e00, -1.99729563e-03, -2.68067699e-03, -1.31667446e-03, 1.00000000e00], + ] + ) expected = _build_monthly_summary(expected) pd.testing.assert_frame_equal(result, expected, check_dtype=False) @@ -441,11 +543,14 @@ def test_monthly_soiling_rates_max_slope_err(soiling_interval_summary): np.random.seed(1977) result = monthly_soiling_rates(soiling_interval_summary, max_relative_slope_error=120) - expected = np.array([ - [1.00000000e+00, -4.74910923e-03, -5.26236739e-03, -4.23901493e-03, 1.00000000e+00], - [2.00000000e+00, np.nan, np.nan, np.nan, 0.00000000e+00], - [3.00000000e+00, -2.00074270e-03, -2.68073474e-03, -1.31786434e-03, 1.00000000e+00], - [4.00000000e+00, -2.00309454e-03, -2.68359541e-03, -1.31927678e-03, 1.00000000e+00]]) + expected = np.array( + [ + [1.00000000e00, -4.74910923e-03, -5.26236739e-03, -4.23901493e-03, 1.00000000e00], + [2.00000000e00, np.nan, np.nan, np.nan, 0.00000000e00], + [3.00000000e00, -2.00074270e-03, -2.68073474e-03, -1.31786434e-03, 1.00000000e00], + [4.00000000e00, -2.00309454e-03, -2.68359541e-03, -1.31927678e-03, 1.00000000e00], + ] + ) expected = _build_monthly_summary(expected) pd.testing.assert_frame_equal(result, expected, check_dtype=False) @@ -455,11 +560,14 @@ def test_monthly_soiling_rates_confidence_level(soiling_interval_summary): np.random.seed(1977) result = monthly_soiling_rates(soiling_interval_summary, confidence_level=95) - expected = np.array([ - [1.00000000e+00, -2.42103810e-03, -5.42313113e-03, -1.21156562e-04, 2.00000000e+00], - [2.00000000e+00, -1.25092837e-03, -2.43731574e-03, -6.23842627e-05, 1.00000000e+00], - [3.00000000e+00, -2.00313359e-03, -2.94998476e-03, -1.04988760e-03, 1.00000000e+00], - [4.00000000e+00, -1.99729563e-03, -2.95063841e-03, -1.04869949e-03, 1.00000000e+00]]) + expected = np.array( + [ + [1.00000000e00, -2.42103810e-03, -5.42313113e-03, -1.21156562e-04, 2.00000000e00], + [2.00000000e00, -1.25092837e-03, -2.43731574e-03, -6.23842627e-05, 1.00000000e00], + [3.00000000e00, -2.00313359e-03, -2.94998476e-03, -1.04988760e-03, 1.00000000e00], + [4.00000000e00, -1.99729563e-03, -2.95063841e-03, -1.04869949e-03, 1.00000000e00], + ] + ) expected = _build_monthly_summary(expected) @@ -470,11 +578,14 @@ def test_monthly_soiling_rates_reps(soiling_interval_summary): np.random.seed(1977) result = monthly_soiling_rates(soiling_interval_summary, reps=3) - expected = np.array([ - [1.00000000e+00, -2.88594088e-03, -5.03736679e-03, -6.47391131e-04, 2.00000000e+00], - [2.00000000e+00, -1.67359565e-03, -2.00504171e-03, -1.33240044e-03, 1.00000000e+00], - [3.00000000e+00, -1.22306993e-03, -2.19274892e-03, -1.11793240e-03, 1.00000000e+00], - [4.00000000e+00, -1.94675549e-03, -2.42574164e-03, -1.54850795e-03, 1.00000000e+00]]) + expected = np.array( + [ + [1.00000000e00, -2.88594088e-03, -5.03736679e-03, -6.47391131e-04, 2.00000000e00], + [2.00000000e00, -1.67359565e-03, -2.00504171e-03, -1.33240044e-03, 1.00000000e00], + [3.00000000e00, -1.22306993e-03, -2.19274892e-03, -1.11793240e-03, 1.00000000e00], + [4.00000000e00, -1.94675549e-03, -2.42574164e-03, -1.54850795e-03, 1.00000000e00], + ] + ) expected = _build_monthly_summary(expected)