From e15998014047a8f46fea0c1f3f3dbd4734fe60da Mon Sep 17 00:00:00 2001 From: RyanAugust Date: Wed, 29 Apr 2026 20:27:39 +0000 Subject: [PATCH 1/3] param init fully handled outside of runtime --- examples/example_config_dict.py | 20 +++- src/pysimmmulator/load_parameters.py | 35 ++++++- src/pysimmmulator/simulate.py | 122 ++++++++-------------- tests/spot_test.py | 12 +-- tests/test_adstock_saturation.py | 1 - tests/test_config.py | 2 +- tests/test_edge_cases.py | 9 +- tests/test_sim.py | 151 +++++++++++++-------------- 8 files changed, 178 insertions(+), 174 deletions(-) diff --git a/examples/example_config_dict.py b/examples/example_config_dict.py index 9bfec8c..74e9268 100644 --- a/examples/example_config_dict.py +++ b/examples/example_config_dict.py @@ -5,11 +5,25 @@ 'baseline_params': {'base_p': 100,'trend_p': 2,'temp_var': 8,'temp_coef_mean': 50000,'temp_coef_sd': 5000,'error_std': 90}, 'ad_spend_params': {'campaign_spend_mean': 329000, 'campaign_spend_std': 100000, - 'max_min_proportion_on_each_channel':{'Amazon': {'min': 0.1, 'max': 0.35},'TV': {'min': 0.05, 'max': 0.3},'YouTube': {'min': 0.01, 'max': 0.1},'Search': {'min': 0.1, 'max': 0.3},'Meta': {'min': 0.1, 'max': 0.3}}}, + 'max_min_proportion_on_each_channel':{'Amazon': {'min': 0.1, 'max': 0.35}, + 'TV': {'min': 0.05, 'max': 0.3}, + 'YouTube': {'min': 0.01, 'max': 0.1}, + 'Search': {'min': 0.1, 'max': 0.3}, + 'Meta': {'min': 0.1, 'max': 0.3}}}, 'media_params': {'true_cpm': {'Amazon': 3.5,'TV': 12.5,'YouTube': 6.5,'Meta': 4.25}, 'true_cpc': {'Search': 2.6,"Walmart": 3.6}, - 'noisy_cpm_cpc': {'Amazon': {'loc': 0.0, 'scale': 0.25},'Meta': {'loc': 0.0, 'scale': 1.25},'Walmart': {'loc': 0.0, 'scale': 0.75},'TV': {'loc': 0.0, 'scale': 1.0},'YouTube': {'loc': 0.0, 'scale': 1.0},'Search': {'loc': 0.0, 'scale': 0.25}}}, - 'cvr_params': {'noisy_cvr': {'Amazon': {'loc': 0.0, 'scale': 0.01},'TV': {'loc': 0.0, 'scale': 0.0025},'YouTube': {'loc': 0.0, 'scale': 0.005},'Search': {'loc': 0.0, 'scale': 0.01},'Meta': {'loc': 0.0, 'scale': 0.05},'Walmart': {'loc': 0.0, 'scale': 0.05}}}, + 'noisy_cpm_cpc': {'Amazon': {'loc': 0.0, 'scale': 0.25}, + 'Meta': {'loc': 0.0, 'scale': 1.25}, + 'Walmart': {'loc': 0.0, 'scale': 0.75}, + 'TV': {'loc': 0.0, 'scale': 1.0}, + 'YouTube': {'loc': 0.0, 'scale': 1.0}, + 'Search': {'loc': 0.0, 'scale': 0.25}}}, + 'cvr_params': {'noisy_cvr': {'Amazon': {'loc': 0.0, 'scale': 0.01}, + 'TV': {'loc': 0.0, 'scale': 0.0025}, + 'YouTube': {'loc': 0.0, 'scale': 0.005}, + 'Search': {'loc': 0.0, 'scale': 0.01}, + 'Meta': {'loc': 0.0, 'scale': 0.05}, + 'Walmart': {'loc': 0.0, 'scale': 0.05}}}, 'adstock_params': { 'adstock': { 'Amazon': {'type': 'geometric', 'params': {'lambda': 0.1}}, diff --git a/src/pysimmmulator/load_parameters.py b/src/pysimmmulator/load_parameters.py index a75d5d0..f9e2ace 100644 --- a/src/pysimmmulator/load_parameters.py +++ b/src/pysimmmulator/load_parameters.py @@ -7,6 +7,7 @@ CVRParameters, AdstockParameters, OutputParameters, + GeoParameters, ) def load_config(config_path: str) -> dict: @@ -16,7 +17,7 @@ def load_config(config_path: str) -> dict: def define_basic_params(years, channels_clicks, channels_impressions, frequency_of_campaigns, start_date, true_cvr, revenue_per_conv,): "Takes in requirements for basic_params and loads with dataclass for validation as precursor" - my_basic_params = BasicParameters( + return BasicParameters( years=years, channels_clicks=channels_clicks, channels_impressions=channels_impressions, @@ -26,7 +27,37 @@ def define_basic_params(years, channels_clicks, channels_impressions, frequency_ revenue_per_conv=revenue_per_conv, ) - return my_basic_params +def create_all_parameters(config: dict) -> dict: + """Instantiates all parameter dataclasses from a configuration dictionary. + + Args: + config (dict): Complete configuration dictionary. + Returns: + dict: Dictionary containing instantiated parameter objects.""" + params = {} + params["basic_params"] = define_basic_params(**config["basic_params"]) + + params["baseline_params"] = BaselineParameters( + basic_params=params["basic_params"], **config["baseline_params"] + ) + + params["ad_spend_params"] = AdSpendParameters(**config["ad_spend_params"]) + + params["media_params"] = MediaParameters(**config["media_params"]) + params["media_params"].check(basic_params=params["basic_params"]) + + params["cvr_params"] = CVRParameters(**config["cvr_params"]) + params["cvr_params"].check(basic_params=params["basic_params"]) + + params["adstock_params"] = AdstockParameters(**config["adstock_params"]) + params["adstock_params"].check(basic_params=params["basic_params"]) + + params["output_params"] = OutputParameters(**config["output_params"]) + + if "geo_params" in config: + params["geo_params"] = GeoParameters(**config["geo_params"]) + + return params def validate_config(config_path: str, return_individual_results: bool = False): """Loads and validates the parameters against individual diff --git a/src/pysimmmulator/simulate.py b/src/pysimmmulator/simulate.py index 6500740..0d32e24 100644 --- a/src/pysimmmulator/simulate.py +++ b/src/pysimmmulator/simulate.py @@ -59,7 +59,7 @@ def _report_random_state(self) -> int: """ return self.rng.bit_generator - def simulate_baseline( self, base_p: int, trend_p: int, temp_var: int, temp_coef_mean: int, temp_coef_sd: int, error_std: int,) -> pd.DataFrame: + def simulate_baseline(self, params: BaselineParameters) -> pd.DataFrame: """Simulation of baseline sales and revenue for the subject business. The simulation calculates daily baseline sales as a sum of: @@ -72,23 +72,10 @@ def simulate_baseline( self, base_p: int, trend_p: int, temp_var: int, temp_coef If the combined terms result in negative sales, they are clamped to zero. Args: - base_p (int): Daily base sales units (non-marketing driven). - trend_p (int): Total linear growth units over the full duration. - temp_var (int): Amplitude of the seasonal sine function. - temp_coef_mean (int): Mean scaling factor for seasonal impact. - temp_coef_sd (int): Standard deviation of seasonal impact scaling. - error_std (int): Standard deviation of daily statistical noise. + params (BaselineParameters): Parameters for baseline simulation. Returns: pd.DataFrame: Daily baseline sales components.""" - self.baseline_params = BaselineParameters( - basic_params=self.basic_params, - base_p=base_p, - trend_p=trend_p, - temp_var=temp_var, - temp_coef_mean=temp_coef_mean, - temp_coef_sd=temp_coef_sd, - error_std=error_std, - ) + self.baseline_params = params days = np.arange(0, self.basic_params.years * 365) base = (np.zeros(shape=self.basic_params.years * 365) + self.baseline_params.base_p) @@ -103,7 +90,7 @@ def simulate_baseline( self, base_p: int, trend_p: int, temp_var: int, temp_coef baseline_sales = base + trend + seasonality + error if np.any(baseline_sales < 0): - baseline_sales = np.where(baseline_sales < 0, 0, baseline_sales) + baseline_sales = np.where(baseline_sales < 0, 0, baseline_sales) return pd.DataFrame({ "days": days, @@ -114,43 +101,34 @@ def simulate_baseline( self, base_p: int, trend_p: int, temp_var: int, temp_coef "seasonality": seasonality, "error": error, }) - - def simulate_ad_spend( self, baseline_sales_df: pd.DataFrame, campaign_spend_mean: int, campaign_spend_std: int, max_min_proportion_on_each_channel: dict) -> pd.DataFrame: + def simulate_ad_spend( self, baseline_sales_df: pd.DataFrame, params: AdSpendParameters) -> pd.DataFrame: """Simulation of ad spend based on normal distribution parameters for campaign spend. Overall campaign spend is then divided amongst each channel based on passed min-max proportionality. Args: baseline_sales_df (pd.DataFrame): DataFrame containing baseline sales - campaign_spend_mean (int): The average amount of money spent on a campaign. - campaign_spend_std (int): The standard deviation of money spent on a campaign - max_min_proportion_on_each_channel (dict): Specifies the minimum and maximum percentages of total spend allocated to each channel. + params (AdSpendParameters): Parameters for ad spend simulation. Returns: pd.DataFrame: DataFrame containing ad spend data""" - ad_spend_params = AdSpendParameters( - campaign_spend_mean=campaign_spend_mean, - campaign_spend_std=campaign_spend_std, - max_min_proportion_on_each_channel=max_min_proportion_on_each_channel, - ) - campaign_count = int(self.basic_params.years * 365 / self.basic_params.frequency_of_campaigns) # specify amount spent on each campaign according to a normal distribution campaign_spends = np.round( self._truncated_normal( - loc=ad_spend_params.campaign_spend_mean, - scale=ad_spend_params.campaign_spend_std, + loc=params.campaign_spend_mean, + scale=params.campaign_spend_std, size=campaign_count, ), 2, ) campaign_channel_spend_proportions = {} total_proportions = np.zeros(campaign_count) - for (channel, proportions,) in ad_spend_params.max_min_proportion_on_each_channel.items(): + for (channel, proportions,) in params.max_min_proportion_on_each_channel.items(): campaign_channel_spend_proportions[channel] = self.rng.uniform(low=proportions["min"], high=proportions["max"], size=campaign_count,) total_proportions += campaign_channel_spend_proportions[channel] - remaining_channels = [c for c in self.basic_params.all_channels if c not in ad_spend_params.max_min_proportion_on_each_channel.keys()] + remaining_channels = [c for c in self.basic_params.all_channels if c not in params.max_min_proportion_on_each_channel.keys()] if remaining_channels: remaining_channel = remaining_channels[0] campaign_channel_spend_proportions[remaining_channel] = np.maximum(0, 1.0 - total_proportions) @@ -216,7 +194,7 @@ def _negative_replace(self, df: pd.DataFrame, column: str) -> pd.DataFrame: df.loc[df[column] < 0, column] = 0 return df - def simulate_media(self, spend_df: pd.DataFrame, true_cpm: dict, true_cpc: dict, noisy_cpm_cpc: dict) -> pd.DataFrame: + def simulate_media(self, spend_df: pd.DataFrame, params: MediaParameters) -> pd.DataFrame: """Simulation of relevant media metrics for each channel. True values are passed and noise is applied in accordance with a normal distribution described within the noisy dict. Media metrics are checked for 0 values stemming from the random noise applied and will be flagged with logger when found. @@ -224,29 +202,24 @@ def simulate_media(self, spend_df: pd.DataFrame, true_cpm: dict, true_cpc: dict, Args: spend_df (pd.DataFrame): DataFrame containing ad spend data - true_cpm (dict): Specifies the true Cost per Impression (CPM) of each channel (noise will be added to this to simulate number of impressions) - true_cpc (dict): Specifies the true Cost per Click (CPC) of each channel (noise will be added to this to simulate number of clicks) - noisy_cpm_cpc (dict): Specifies the bias and scale of noise added to the true value CPM or CPC for each channel. + params (MediaParameters): Parameters for media simulation. Returns: pd.DataFrame: Updated spend DataFrame""" - media_params = MediaParameters(true_cpm, true_cpc, noisy_cpm_cpc) - media_params.check(basic_params=self.basic_params) - - for channel in media_params.noise_channels: + for channel in params.noise_channels: channel_idx = spend_df[spend_df["channel"] == channel].index channel_noise = self._truncated_normal( size=len(channel_idx), - **noisy_cpm_cpc[channel], - low=-min(true_cpm.get(channel, np.inf), true_cpc.get(channel, np.inf))) + **params.noisy_cpm_cpc[channel], + low=-min(params.true_cpm.get(channel, np.inf), params.true_cpc.get(channel, np.inf))) - channel_true_cpm_value = (true_cpm[channel] if channel in true_cpm.keys() else np.nan) - channel_noisy_cpm_value = (true_cpm[channel] + channel_noise if channel in true_cpm.keys() else np.nan) + channel_true_cpm_value = (params.true_cpm[channel] if channel in params.true_cpm.keys() else np.nan) + channel_noisy_cpm_value = (params.true_cpm[channel] + channel_noise if channel in params.true_cpm.keys() else np.nan) spend_df.loc[channel_idx, "true_cpm"] = channel_true_cpm_value spend_df.loc[channel_idx, "noisy_cpm"] = channel_noisy_cpm_value - channel_true_cpc_value = (true_cpc[channel] if channel in true_cpc.keys() else np.nan) - channel_noisy_cpc_value = (true_cpc[channel] + channel_noise if channel in true_cpc.keys() else np.nan) + channel_true_cpc_value = (params.true_cpc[channel] if channel in params.true_cpc.keys() else np.nan) + channel_noisy_cpc_value = (params.true_cpc[channel] + channel_noise if channel in params.true_cpc.keys() else np.nan) spend_df.loc[channel_idx, "true_cpc"] = channel_true_cpc_value spend_df.loc[channel_idx, "noisy_cpc"] = channel_noisy_cpc_value @@ -266,22 +239,18 @@ def simulate_media(self, spend_df: pd.DataFrame, true_cpm: dict, true_cpc: dict, logger.info("You have completed running step 3: Simulating media.") return spend_df - def simulate_cvr(self, spend_df: pd.DataFrame, noisy_cvr: dict) -> pd.DataFrame: + def simulate_cvr(self, spend_df: pd.DataFrame, params: CVRParameters) -> pd.DataFrame: """Generate Conversion Rate using the true conversion rates passed in the basic params with noise parameters passed in this function. Args: spend_df (pd.DataFrame): DataFrame containing ad spend data - noisy_cvr (dict): Specifies the bias and scale of noise added to the true value CVR for each channel. + params (CVRParameters): Parameters for CVR simulation. Returns: pd.DataFrame: Updated spend DataFrame""" - cvr_params = CVRParameters(noisy_cvr) - cvr_params.check(basic_params=self.basic_params) - - for channel in cvr_params.noise_channels: + for channel in params.noise_channels: channel_idx = spend_df[spend_df["channel"] == channel].index - channel_noise = self.rng.normal(size=len(channel_idx), **noisy_cvr[channel]) - channel_noise = self.rng.weibull((1 / noisy_cvr[channel]["scale"]) / 10 + 1, size=len(channel_idx)) + channel_noise = self.rng.weibull((1 / params.noisy_cvr[channel]["scale"]) / 10 + 1, size=len(channel_idx)) spend_df.loc[channel_idx, "noisy_cvr"] = (channel_noise * self.basic_params.true_cvr[channel]) self._negative_check(spend_df.loc[channel_idx], column="noisy_cvr", channel=channel) @@ -360,35 +329,32 @@ def _simulate_diminishing_returns(self, mmm_df: pd.DataFrame, saturation_config: logger.info("You have completed running step 5c: apply diminishing marginal returns.") return mmm_df - def simulate_decay_returns(self, spend_df: pd.DataFrame, adstock: dict, saturation: dict) -> pd.DataFrame: + def simulate_decay_returns(self, spend_df: pd.DataFrame, params: AdstockParameters) -> pd.DataFrame: """Generates the decay and returns values associated with ad stocking and diminishing returns. Args: spend_df (pd.DataFrame): DataFrame containing ad spend data - adstock (dict): Nested dictionary for adstock configuration - saturation (dict): Nested dictionary for saturation configuration + params (AdstockParameters): Parameters for adstock and saturation. Returns: pd.DataFrame: MMM input DataFrame with decay and returns applied""" - adstock_params = AdstockParameters(adstock, saturation) mmm_df = self._reformat_for_mmm(spend_df=spend_df) - mmm_df = self._simulate_decay(mmm_df=mmm_df, adstock_config=adstock_params.adstock) + mmm_df = self._simulate_decay(mmm_df=mmm_df, adstock_config=params.adstock) mmm_df = self._simulate_diminishing_returns( mmm_df=mmm_df, - saturation_config=adstock_params.saturation, + saturation_config=params.saturation, ) logger.info("You have completed running step 5: Simulating adstock.") return mmm_df - def simulate_geos(self, mmm_df: pd.DataFrame, geo_params: dict) -> pd.DataFrame: + def simulate_geos(self, mmm_df: pd.DataFrame, params: GeoParameters) -> pd.DataFrame: """Distributes the consolidated MMM dataframe into geographies. Args: mmm_df (pd.DataFrame): Consolidated MMM DataFrame - geo_params (dict): Parameters for geographic distribution + params (GeoParameters): Parameters for geographic distribution. Returns: pd.DataFrame: MMM DataFrame with geographic distribution""" - params = GeoParameters(**geo_params) geos = Geos(total_population=params.total_population, random_seed=None) geo_details = geos(geo_specs=params.geo_specs, universal_scale=params.universal_scale, count=params.count) @@ -459,21 +425,20 @@ def calculate_channel_roi(self, mmm_df: pd.DataFrame) -> dict: channel_roi[channel] = total_roi return channel_roi - def finalize_output(self, mmm_df: pd.DataFrame, aggregation_level: str) -> pd.DataFrame: + def finalize_output(self, mmm_df: pd.DataFrame, params: OutputParameters) -> pd.DataFrame: """Provide aggregation (daily, weekly) and column filtering for final output Args: mmm_df (pd.DataFrame): Consolidated MMM DataFrame - aggregation_level (str): [daily, weekly] the granulatiry at which to return output data + params (OutputParameters): Parameters for output finalization. Returns: pd.DataFrame: Finalized output DataFrame""" - output_params = OutputParameters(aggregation_level) metric_cols = [f"{channel}_impressions" for channel in self.basic_params.channels_impressions] [metric_cols.append(f"{channel}_clicks") for channel in self.basic_params.channels_clicks] spend_cols = [] [spend_cols.append(f"{channel}_spend") for channel in self.basic_params.all_channels] - if output_params.aggregation_level == "daily": + if params.aggregation_level == "daily": if "geo_name" in mmm_df.index.names: final_df = mmm_df[metric_cols + spend_cols + ["total_revenue"]] else: @@ -493,24 +458,27 @@ def finalize_output(self, mmm_df: pd.DataFrame, aggregation_level: str) -> pd.Da final_df = (mmm_df[metric_cols + spend_cols + ["total_revenue"] + group_cols].groupby(group_cols).sum()) - logger.info(f"You have completed running step 9: Finalization of output dataframe at the {aggregation_level} level") + logger.info(f"You have completed running step 9: Finalization of output dataframe at the {params.aggregation_level} level") return final_df def run_with_config(self, config: dict) -> tuple[pd.DataFrame, dict]: - if self.basic_params is None: self.basic_params = BasicParameters(**config["basic_params"]) - baseline_sales_df = self.simulate_baseline(**config["baseline_params"]) - spend_df = self.simulate_ad_spend(baseline_sales_df=baseline_sales_df, **config["ad_spend_params"]) - spend_df = self.simulate_media(spend_df=spend_df, **config["media_params"]) - spend_df = self.simulate_cvr(spend_df=spend_df, **config["cvr_params"]) - mmm_df = self.simulate_decay_returns(spend_df=spend_df, **config["adstock_params"]) + from .load_parameters import create_all_parameters + params = create_all_parameters(config) + self.basic_params = params["basic_params"] + + baseline_sales_df = self.simulate_baseline(params["baseline_params"]) + spend_df = self.simulate_ad_spend(baseline_sales_df=baseline_sales_df, params=params["ad_spend_params"]) + spend_df = self.simulate_media(spend_df=spend_df, params=params["media_params"]) + spend_df = self.simulate_cvr(spend_df=spend_df, params=params["cvr_params"]) + mmm_df = self.simulate_decay_returns(spend_df=spend_df, params=params["adstock_params"]) mmm_df = self.calculate_conversions(mmm_df=mmm_df) mmm_df = self.consolidate_dataframe(mmm_df=mmm_df, baseline_sales_df=baseline_sales_df) - if "geo_params" in config: - mmm_df = self.simulate_geos(mmm_df=mmm_df, geo_params=config["geo_params"]) + if "geo_params" in params: + mmm_df = self.simulate_geos(mmm_df=mmm_df, params=params["geo_params"]) channel_roi = self.calculate_channel_roi(mmm_df=mmm_df) - final_df = self.finalize_output(mmm_df=mmm_df, **config["output_params"]) + final_df = self.finalize_output(mmm_df=mmm_df, params=params["output_params"]) return (final_df, channel_roi) diff --git a/tests/spot_test.py b/tests/spot_test.py index 9fd358a..4104eda 100644 --- a/tests/spot_test.py +++ b/tests/spot_test.py @@ -3,12 +3,12 @@ from pysimmmulator import load_parameters, Simulate cfg = load_parameters.load_config(config_path="./examples/example_config.yaml") -my_basic_params = load_parameters.define_basic_params(**cfg["basic_params"]) -sim = Simulate(my_basic_params) -baseline_df = sim.simulate_baseline(**cfg["baseline_params"]) -spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, **cfg["ad_spend_params"]) -spend_df = sim.simulate_media(spend_df=spend_df, **cfg["media_params"]) -spend_df = sim.simulate_cvr(spend_df=spend_df, **cfg["cvr_params"]) +params = load_parameters.create_all_parameters(cfg) +sim = Simulate(params["basic_params"]) +baseline_df = sim.simulate_baseline(params["baseline_params"]) +spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) +spend_df = sim.simulate_media(spend_df=spend_df, params=params["media_params"]) +spend_df = sim.simulate_cvr(spend_df=spend_df, params=params["cvr_params"]) date_backbone = pd.date_range( start=sim.basic_params.start_date, end=sim.basic_params.end_date, freq="D" diff --git a/tests/test_adstock_saturation.py b/tests/test_adstock_saturation.py index 9597e62..fc73228 100644 --- a/tests/test_adstock_saturation.py +++ b/tests/test_adstock_saturation.py @@ -1,4 +1,3 @@ -import pytest import pandas as pd import numpy as np from pysimmmulator.simulate import Simulate diff --git a/tests/test_config.py b/tests/test_config.py index 6aad1d0..d17f0ec 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -6,7 +6,7 @@ def config(): return load_parameters.load_config(config_path="./examples/example_config.yaml") def test_load_cfg(config): - assert type(config) == dict + assert isinstance(config,dict) def test_cfg_base_keys(config): assert "basic_params" in config.keys() diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py index bf2e486..b2b08ec 100644 --- a/tests/test_edge_cases.py +++ b/tests/test_edge_cases.py @@ -4,7 +4,7 @@ from pysimmmulator import ( Simulate, Geos, Study, BatchStudy, load_parameters ) -from pysimmmulator.param_handlers import BasicParameters +from pysimmmulator.param_handlers import BasicParameters, BaselineParameters def test_basic_parameters_repr(): params = BasicParameters( @@ -81,15 +81,14 @@ def test_simulate_negative_baseline_sales(): revenue_per_conv=10.0 ) sim = Simulate(basic_params) - # base_p=100, error_std=90 satisfies error_std < base_p - # temp_var=1000, temp_coef_mean=-1 will make baseline_sales negative - df = sim.simulate_baseline( + params = BaselineParameters( + basic_params=basic_params, base_p=100, trend_p=0, temp_var=1000, temp_coef_mean=-1, temp_coef_sd=0, error_std=90 ) + df = sim.simulate_baseline(params) assert (df["baseline_sales"] >= 0).all() assert (df["seasonality"].min() < -500) - def test_negative_check_warning(caplog): sim = Simulate() df = pd.DataFrame({"test_col": [-1, 2, 3]}) diff --git a/tests/test_sim.py b/tests/test_sim.py index d648403..400652e 100644 --- a/tests/test_sim.py +++ b/tests/test_sim.py @@ -5,102 +5,95 @@ def config(): return load_parameters.load_config(config_path="./examples/example_config.yaml") -def test_initiate_sim(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - Simulate(my_basic_params) - - -def test_step1_baseline(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - sim.simulate_baseline(**config["baseline_params"]) - - -def test_step2_adspend(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - baseline_df = sim.simulate_baseline(**config["baseline_params"]) - sim.simulate_ad_spend(baseline_sales_df=baseline_df, **config["ad_spend_params"]) - - -def test_step3_media(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - baseline_df = sim.simulate_baseline(**config["baseline_params"]) - spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, **config["ad_spend_params"]) - sim.simulate_media(spend_df=spend_df, **config["media_params"]) - - -def test_step4_cvr(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - baseline_df = sim.simulate_baseline(**config["baseline_params"]) - spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, **config["ad_spend_params"]) - spend_df = sim.simulate_media(spend_df=spend_df, **config["media_params"]) - sim.simulate_cvr(spend_df=spend_df, **config["cvr_params"]) - - -def tests_step5_adstock(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - baseline_df = sim.simulate_baseline(**config["baseline_params"]) - spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, **config["ad_spend_params"]) - spend_df = sim.simulate_media(spend_df=spend_df, **config["media_params"]) - spend_df = sim.simulate_cvr(spend_df=spend_df, **config["cvr_params"]) - sim.simulate_decay_returns(spend_df=spend_df, **config["adstock_params"]) - - -def tests_step6_conversions(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - baseline_df = sim.simulate_baseline(**config["baseline_params"]) - spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, **config["ad_spend_params"]) - spend_df = sim.simulate_media(spend_df=spend_df, **config["media_params"]) - spend_df = sim.simulate_cvr(spend_df=spend_df, **config["cvr_params"]) - mmm_df = sim.simulate_decay_returns(spend_df=spend_df, **config["adstock_params"]) +@pytest.fixture +def params(config): + return load_parameters.create_all_parameters(config) + +def test_initiate_sim(params): + Simulate(params["basic_params"]) + + +def test_step1_baseline(params): + sim = Simulate(params["basic_params"]) + sim.simulate_baseline(params["baseline_params"]) + + +def test_step2_adspend(params): + sim = Simulate(params["basic_params"]) + baseline_df = sim.simulate_baseline(params["baseline_params"]) + sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) + + +def test_step3_media(params): + sim = Simulate(params["basic_params"]) + baseline_df = sim.simulate_baseline(params["baseline_params"]) + spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) + sim.simulate_media(spend_df=spend_df, params=params["media_params"]) + + +def test_step4_cvr(params): + sim = Simulate(params["basic_params"]) + baseline_df = sim.simulate_baseline(params["baseline_params"]) + spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) + spend_df = sim.simulate_media(spend_df=spend_df, params=params["media_params"]) + sim.simulate_cvr(spend_df=spend_df, params=params["cvr_params"]) + + +def tests_step5_adstock(params): + sim = Simulate(params["basic_params"]) + baseline_df = sim.simulate_baseline(params["baseline_params"]) + spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) + spend_df = sim.simulate_media(spend_df=spend_df, params=params["media_params"]) + spend_df = sim.simulate_cvr(spend_df=spend_df, params=params["cvr_params"]) + sim.simulate_decay_returns(spend_df=spend_df, params=params["adstock_params"]) + + +def tests_step6_conversions(params): + sim = Simulate(params["basic_params"]) + baseline_df = sim.simulate_baseline(params["baseline_params"]) + spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) + spend_df = sim.simulate_media(spend_df=spend_df, params=params["media_params"]) + spend_df = sim.simulate_cvr(spend_df=spend_df, params=params["cvr_params"]) + mmm_df = sim.simulate_decay_returns(spend_df=spend_df, params=params["adstock_params"]) sim.calculate_conversions(mmm_df=mmm_df) -def tests_step7_consolidatedataframe(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - baseline_df = sim.simulate_baseline(**config["baseline_params"]) - spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, **config["ad_spend_params"]) - spend_df = sim.simulate_media(spend_df=spend_df, **config["media_params"]) - spend_df = sim.simulate_cvr(spend_df=spend_df, **config["cvr_params"]) - mmm_df = sim.simulate_decay_returns(spend_df=spend_df, **config["adstock_params"]) +def tests_step7_consolidatedataframe(params): + sim = Simulate(params["basic_params"]) + baseline_df = sim.simulate_baseline(params["baseline_params"]) + spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) + spend_df = sim.simulate_media(spend_df=spend_df, params=params["media_params"]) + spend_df = sim.simulate_cvr(spend_df=spend_df, params=params["cvr_params"]) + mmm_df = sim.simulate_decay_returns(spend_df=spend_df, params=params["adstock_params"]) mmm_df = sim.calculate_conversions(mmm_df=mmm_df) sim.consolidate_dataframe(mmm_df=mmm_df, baseline_sales_df=baseline_df) -def tests_step8_roi(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - baseline_df = sim.simulate_baseline(**config["baseline_params"]) - spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, **config["ad_spend_params"]) - spend_df = sim.simulate_media(spend_df=spend_df, **config["media_params"]) - spend_df = sim.simulate_cvr(spend_df=spend_df, **config["cvr_params"]) - mmm_df = sim.simulate_decay_returns(spend_df=spend_df, **config["adstock_params"]) +def tests_step8_roi(params): + sim = Simulate(params["basic_params"]) + baseline_df = sim.simulate_baseline(params["baseline_params"]) + spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) + spend_df = sim.simulate_media(spend_df=spend_df, params=params["media_params"]) + spend_df = sim.simulate_cvr(spend_df=spend_df, params=params["cvr_params"]) + mmm_df = sim.simulate_decay_returns(spend_df=spend_df, params=params["adstock_params"]) mmm_df = sim.calculate_conversions(mmm_df=mmm_df) mmm_df = sim.consolidate_dataframe(mmm_df=mmm_df, baseline_sales_df=baseline_df) sim.calculate_channel_roi(mmm_df=mmm_df) -def tests_step9_consolidatedataframe(config): - my_basic_params = load_parameters.define_basic_params(**config["basic_params"]) - sim = Simulate(my_basic_params) - baseline_df = sim.simulate_baseline(**config["baseline_params"]) - spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, **config["ad_spend_params"]) - spend_df = sim.simulate_media(spend_df=spend_df, **config["media_params"]) - spend_df = sim.simulate_cvr(spend_df=spend_df, **config["cvr_params"]) - mmm_df = sim.simulate_decay_returns(spend_df=spend_df, **config["adstock_params"]) +def tests_step9_consolidatedataframe(params): + sim = Simulate(params["basic_params"]) + baseline_df = sim.simulate_baseline(params["baseline_params"]) + spend_df = sim.simulate_ad_spend(baseline_sales_df=baseline_df, params=params["ad_spend_params"]) + spend_df = sim.simulate_media(spend_df=spend_df, params=params["media_params"]) + spend_df = sim.simulate_cvr(spend_df=spend_df, params=params["cvr_params"]) + mmm_df = sim.simulate_decay_returns(spend_df=spend_df, params=params["adstock_params"]) mmm_df = sim.calculate_conversions(mmm_df=mmm_df) mmm_df = sim.consolidate_dataframe(mmm_df=mmm_df, baseline_sales_df=baseline_df) - sim.finalize_output(mmm_df=mmm_df, **config["output_params"]) + sim.finalize_output(mmm_df=mmm_df, params=params["output_params"]) def test_run_with_config(config): - config = load_parameters.load_config(config_path="./examples/example_config.yaml") sim = Simulate() sim.run_with_config(config=config) From 5ebfcb5209d4175e3d0ee68fc24d770154f1fe3b Mon Sep 17 00:00:00 2001 From: RyanAugust Date: Thu, 30 Apr 2026 12:53:32 +0000 Subject: [PATCH 2/3] indents --- src/pysimmmulator/transforms.py | 100 ++++++++++++++++---------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/src/pysimmmulator/transforms.py b/src/pysimmmulator/transforms.py index 385a962..07ce2ac 100644 --- a/src/pysimmmulator/transforms.py +++ b/src/pysimmmulator/transforms.py @@ -2,59 +2,59 @@ import numpy as np def geometric_adstock(vector: pd.Series, lambda_: float) -> pd.Series: - """Applies geometric decay adstock to a vector.""" - decayed_vector = [vector.values[0]] - for i, val in enumerate(vector.values[1:]): - decayed_vector.append(val + lambda_ * decayed_vector[i]) - return pd.Series(decayed_vector, index=vector.index) + """Applies geometric decay adstock to a vector.""" + decayed_vector = [vector.values[0]] + for i, val in enumerate(vector.values[1:]): + decayed_vector.append(val + lambda_ * decayed_vector[i]) + return pd.Series(decayed_vector, index=vector.index) def weibull_adstock(vector: pd.Series, shape: float, scale: float, adstock_type: str = 'pdf') -> pd.Series: - """Applies Weibull adstock to a vector. - - Args: - vector (pd.Series): media vector - shape (float): shape parameter (k) - scale (float): scale parameter (theta) - adstock_type (str): 'pdf' or 'cdf' - """ - n = len(vector) - x = np.arange(n) - if adstock_type == 'pdf': - # Weibull PDF: (k/theta) * (x/theta)**(k-1) * exp(-(x/theta)**k) - # We normalize it so it can be used as a weighting vector - weights = (shape / scale) * (x / scale)**(shape - 1) * np.exp(-(x / scale)**shape) - else: - # Weibull CDF: 1 - exp(-(x/theta)**k) - # For adstock, we typically use the survival function (1-CDF) or its increments - weights = np.exp(-(x / scale)**shape) - - weights = weights / weights.sum() if weights.sum() > 0 else weights - - # Convolution for adstock - # We use 'full' and then slice to maintain length - adstocked = np.convolve(vector.values, weights)[:n] - return pd.Series(adstocked, index=vector.index) + """Applies Weibull adstock to a vector. + + Args: + vector (pd.Series): media vector + shape (float): shape parameter (k) + scale (float): scale parameter (theta) + adstock_type (str): 'pdf' or 'cdf' + """ + n = len(vector) + x = np.arange(n) + if adstock_type == 'pdf': + # Weibull PDF: (k/theta) * (x/theta)**(k-1) * exp(-(x/theta)**k) + # We normalize it so it can be used as a weighting vector + weights = (shape / scale) * (x / scale)**(shape - 1) * np.exp(-(x / scale)**shape) + else: + # Weibull CDF: 1 - exp(-(x/theta)**k) + # For adstock, we typically use the survival function (1-CDF) or its increments + weights = np.exp(-(x / scale)**shape) + + weights = weights / weights.sum() if weights.sum() > 0 else weights + + # Convolution for adstock + # We use 'full' and then slice to maintain length + adstocked = np.convolve(vector.values, weights)[:n] + return pd.Series(adstocked, index=vector.index) def scurve_saturation(vector: pd.Series, alpha: float, gamma: float) -> pd.Series: - """Applies S-curve saturation (Logistic) to a vector.""" - # gamma is treated as a quantile to find the inflection point - gamma_trans = np.quantile(np.linspace(min(vector), max(vector), num=100), gamma) - denom = vector**alpha + gamma_trans**alpha - return (vector**alpha / denom) * vector if np.any(denom != 0) else vector + """Applies S-curve saturation (Logistic) to a vector.""" + # gamma is treated as a quantile to find the inflection point + gamma_trans = np.quantile(np.linspace(min(vector), max(vector), num=100), gamma) + denom = vector**alpha + gamma_trans**alpha + return (vector**alpha / denom) * vector if np.any(denom != 0) else vector def hill_saturation(vector: pd.Series, alpha: float, gamma: float) -> pd.Series: - """Applies Hill saturation to a vector. - - Args: - vector (pd.Series): adstocked media vector - alpha (float): shape parameter (slope) - gamma (float): scale parameter (half-saturation point) - """ - # Hill function: x**alpha / (x**alpha + gamma**alpha) - # Often gamma is specified as a value in the same scale as x - # Here we'll treat gamma as a quantile similar to scurve for consistency in config if preferred, - # but the classic Hill uses an absolute value. - # Let's use absolute value for Hill to differentiate it. - inflection = gamma * np.max(vector) if gamma <= 1.0 else gamma - denom = vector**alpha + inflection**alpha - return (vector**alpha / denom) * vector if np.any(denom != 0) else vector + """Applies Hill saturation to a vector. + + Args: + vector (pd.Series): adstocked media vector + alpha (float): shape parameter (slope) + gamma (float): scale parameter (half-saturation point) + """ + # Hill function: x**alpha / (x**alpha + gamma**alpha) + # Often gamma is specified as a value in the same scale as x + # Here we'll treat gamma as a quantile similar to scurve for consistency in config if preferred, + # but the classic Hill uses an absolute value. + # Let's use absolute value for Hill to differentiate it. + inflection = gamma * np.max(vector) if gamma <= 1.0 else gamma + denom = vector**alpha + inflection**alpha + return (vector**alpha / denom) * vector if np.any(denom != 0) else vector From 5a4bb29840eff7141b7b32b775a6ee62b97b9432 Mon Sep 17 00:00:00 2001 From: RyanAugust Date: Thu, 30 Apr 2026 12:55:01 +0000 Subject: [PATCH 3/3] more indents --- src/pysimmmulator/load_parameters.py | 2 +- src/pysimmmulator/simulate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pysimmmulator/load_parameters.py b/src/pysimmmulator/load_parameters.py index f9e2ace..f15d299 100644 --- a/src/pysimmmulator/load_parameters.py +++ b/src/pysimmmulator/load_parameters.py @@ -55,7 +55,7 @@ def create_all_parameters(config: dict) -> dict: params["output_params"] = OutputParameters(**config["output_params"]) if "geo_params" in config: - params["geo_params"] = GeoParameters(**config["geo_params"]) + params["geo_params"] = GeoParameters(**config["geo_params"]) return params diff --git a/src/pysimmmulator/simulate.py b/src/pysimmmulator/simulate.py index 0d32e24..1eec7c3 100644 --- a/src/pysimmmulator/simulate.py +++ b/src/pysimmmulator/simulate.py @@ -90,7 +90,7 @@ def simulate_baseline(self, params: BaselineParameters) -> pd.DataFrame: baseline_sales = base + trend + seasonality + error if np.any(baseline_sales < 0): - baseline_sales = np.where(baseline_sales < 0, 0, baseline_sales) + baseline_sales = np.where(baseline_sales < 0, 0, baseline_sales) return pd.DataFrame({ "days": days,