Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions examples/us_retail_exogenous_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
basic_params:
years: 3
channels_impressions: ["Amazon", "TV", "Meta"]
channels_clicks: ["Search"]
frequency_of_campaigns: 1
start_date: "2023/1/1"
true_cvr:
Amazon: 0.006
TV: 0.001
Meta: 0.008
Search: 0.012
revenue_per_conv: 15.0

baseline_params:
base_p: 1000
trend_p: 200
temp_var: 50
temp_coef_mean: 10
temp_coef_sd: 2
error_std: 50
exogenous_factors:
- name: "New Year's Day"
dates: ["2023-01-01", "2024-01-01", "2025-01-01"]
impact: 1.5
type: "multiplier"
- name: "Independence Day"
dates: ["2023-07-04", "2024-07-04", "2025-07-04"]
impact: 1.3
type: "multiplier"
- name: "Labor Day Weekend"
start_date: "2023-09-02"
end_date: "2023-09-04"
impact: 1.4
type: "multiplier"
- name: "Black Friday"
dates: ["2023-11-24", "2024-11-29", "2025-11-28"]
impact: 3.5
type: "multiplier"
- name: "Cyber Monday"
dates: ["2023-11-27", "2024-12-02", "2025-12-01"]
impact: 3.0
type: "multiplier"
- name: "Christmas Peak"
start_date: "2023-12-20"
end_date: "2023-12-24"
impact: 2.0
type: "multiplier"
- name: "Economic Stimulus"
start_date: "2023-03-01"
end_date: "2023-03-15"
impact: 500
type: "additive"

ad_spend_params:
campaign_spend_mean: 500000
campaign_spend_std: 100000
max_min_proportion_on_each_channel:
Amazon:
min: 0.2
max: 0.4
TV:
min: 0.1
max: 0.3
Meta:
min: 0.1
max: 0.2

media_params:
true_cpm:
Amazon: 4.0
TV: 15.0
Meta: 5.0
true_cpc:
Search: 3.0
noisy_cpm_cpc:
Amazon: {loc: 0.0, scale: 0.5}
TV: {loc: 0.0, scale: 2.0}
Meta: {loc: 0.0, scale: 1.0}
Search: {loc: 0.0, scale: 0.3}

cvr_params:
noisy_cvr:
Amazon: {loc: 0.0, scale: 0.01}
TV: {loc: 0.0, scale: 0.005}
Meta: {loc: 0.0, scale: 0.02}
Search: {loc: 0.0, scale: 0.01}

adstock_params:
adstock:
Amazon: {type: "geometric", params: {lambda: 0.1}}
TV: {type: "geometric", params: {lambda: 0.3}}
Meta: {type: "geometric", params: {lambda: 0.15}}
Search: {type: "geometric", params: {lambda: 0.05}}
saturation:
Amazon: {type: "scurve", params: {alpha: 3.0, gamma: 0.2}}
TV: {type: "scurve", params: {alpha: 2.0, gamma: 0.3}}
Meta: {type: "scurve", params: {alpha: 4.0, gamma: 0.25}}
Search: {type: "scurve", params: {alpha: 1.0, gamma: 0.5}}

output_params:
aggregation_level: "daily"
4 changes: 2 additions & 2 deletions src/pysimmmulator/geos.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,13 @@ def distribute_to_geos(
media_cost_spec: tuple[float, float] = (0.0, 0.069),
perf_spec: tuple[float, float] = (0.0, 0.069)
) -> 'pd.DataFrame':
"""Distributes MMM data to supplied geographies. Allows randomization in the scale of the distributon
"""Distributes MMM data to supplied geographies. Allows randomization in the scale of the distribution.

Args:
mmm_input (pd.DataFrame): simulated MMM data that was generated as part of a prior process
geo_details (dict): formulated dict or output of the `geos` creation call (ie `geos(count=50)`)
random_seed (int): random seed for rng--if needed
rng (np.random.Generator): optional random number generator
rng (np.random.Generator): optional pre-instantiated random number generator
dist_spec (tuple[float, float]): Parameters to control the normal distribution function for populations of the geographies
media_cost_spec (tuple[float, float]): Parameters to control the normal distribution function for allocation of spend across geographies
perf_spec (tuple[float, float]): Parameters to control the normal distribution function for allocation of performance across geographies
Expand Down
5 changes: 4 additions & 1 deletion src/pysimmmulator/param_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ class BaselineParameters:
(the larger this number, the more important seasonality is for sales)
temp_coef_sd (int): The standard deviation of how important seasonality is in our data
(the larger this number, the more variable the importance of seasonality is for sales)
error_std (int): Amount of statistical noise added to baseline sales (the larger this number, the noisier baseline sales will be)."""
error_std (int): Amount of statistical noise added to baseline sales (the larger this number, the noisier baseline sales will be).
exogenous_factors (Optional[list[dict]]): List of external factors like holidays or shocks.
"""

basic_params: BasicParameters
base_p: int
Expand All @@ -89,6 +91,7 @@ class BaselineParameters:
temp_coef_mean: int
temp_coef_sd: int
error_std: int
exogenous_factors: Optional[list[dict]] = None

def __post_init__(self):
assert self.error_std < self.base_p, "Error std can not exceed base sales value"
Expand Down
77 changes: 67 additions & 10 deletions src/pysimmmulator/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def simulate_baseline(self, params: BaselineParameters) -> pd.DataFrame:
- Trend: Linear growth over the period (total growth of trend_p)
- Seasonality: Modeled via a sine function (height temp_var) scaled by a random
importance coefficient (mean temp_coef_mean, std temp_coef_sd)
- Exogenous Factors: Holidays or shocks applied as multipliers or additions.
- Error: Gaussian noise (std error_std)

If the combined terms result in negative sales, they are clamped to zero.
Expand All @@ -86,21 +87,50 @@ def simulate_baseline(self, params: BaselineParameters) -> pd.DataFrame:
temp = self.baseline_params.temp_var * np.sin(days * 3.14 / 182.5)
seasonality = self.rng.normal(loc=self.baseline_params.temp_coef_mean, scale=self.baseline_params.temp_coef_sd, size=1) * temp

# Calculate Exogenous Impacts
multiplier_impact = np.ones(len(days))
additive_impact = np.zeros(len(days))

if self.baseline_params.exogenous_factors:
date_backbone = pd.Series(pd.date_range(start=self.basic_params.start_date, periods=len(days), freq="D"))
for factor in self.baseline_params.exogenous_factors:
impact = factor.get("impact", 0.0)
f_type = factor.get("type", "multiplier")

# Identify target indices
mask = np.zeros(len(days), dtype=bool)
if "dates" in factor:
event_dates = pd.to_datetime(factor["dates"])
mask = date_backbone.isin(event_dates)
elif "start_date" in factor and "end_date" in factor:
start = pd.to_datetime(factor["start_date"])
end = pd.to_datetime(factor["end_date"])
mask = (date_backbone >= start) & (date_backbone <= end)

if f_type == "multiplier":
multiplier_impact[mask] *= impact
else:
additive_impact[mask] += impact

error = self._truncated_normal(loc=0, scale=self.baseline_params.error_std, size=self.basic_params.years * 365, low=-np.inf)

baseline_sales = base + trend + seasonality + error
baseline_sales = (base + trend + seasonality) * multiplier_impact + additive_impact + error
if np.any(baseline_sales < 0):
baseline_sales = np.where(baseline_sales < 0, 0, baseline_sales)

return pd.DataFrame({
"days": days,
"date": date_backbone if self.baseline_params.exogenous_factors is not None else pd.date_range(start=self.basic_params.start_date, periods=len(days), freq="D"),
"baseline_sales": baseline_sales,
"base": base,
"trend": trend,
"temp": temp,
"seasonality": seasonality,
"multiplier_impact": multiplier_impact,
"additive_impact": additive_impact,
"error": error,
})

def simulate_ad_spend( self, baseline_sales_df: pd.DataFrame, params: AdSpendParameters) -> pd.DataFrame:
"""Simulation of ad spend based on normal distribution parameters for campaign spend.
Overall campaign spend is then divided amongst each channel based on passed
Expand Down Expand Up @@ -291,7 +321,12 @@ def _reformat_for_mmm(self, spend_df: pd.DataFrame) -> pd.DataFrame:
def _simulate_decay(self, mmm_df: pd.DataFrame, adstock_config: dict) -> pd.DataFrame:
"""Helper function for the simulation of adstocking.
Ad stocking is the idea that an ad has a lasting effect for some amount of time in the future.
"""

Args:
mmm_df (pd.DataFrame): MMM DataFrame containing media metrics.
adstock_config (dict): Nested dictionary mapping channels to adstock types and parameters.
Returns:
pd.DataFrame: Updated mmm_df with adstocked media columns."""
from .transforms import geometric_adstock, weibull_adstock
for channel, config in adstock_config.items():
metric = ("impressions" if channel in self.basic_params.channels_impressions else "clicks")
Expand All @@ -312,7 +347,13 @@ def _simulate_decay(self, mmm_df: pd.DataFrame, adstock_config: dict) -> pd.Data
return mmm_df

def _simulate_diminishing_returns(self, mmm_df: pd.DataFrame, saturation_config: dict) -> pd.DataFrame:
"""Helper function for the simulation of diminishing returns."""
"""Helper function for the simulation of diminishing returns.

Args:
mmm_df (pd.DataFrame): MMM DataFrame containing adstocked media metrics.
saturation_config (dict): Nested dictionary mapping channels to saturation types and parameters.
Returns:
pd.DataFrame: Updated mmm_df with saturated media columns."""
from .transforms import scurve_saturation, hill_saturation
for channel, config in saturation_config.items():
metric = ("impressions" if channel in self.basic_params.channels_impressions else "clicks")
Expand Down Expand Up @@ -463,6 +504,17 @@ def finalize_output(self, mmm_df: pd.DataFrame, params: OutputParameters) -> pd.
return final_df

def run_with_config(self, config: dict) -> tuple[pd.DataFrame, dict]:
"""Orchestrates the full simulation pipeline using a configuration dictionary.

This method handles parameter instantiation, baseline simulation, media and CVR
simulation, adstock/saturation, conversion calculation, and optional
geographic distribution.

Args:
config (dict): Complete configuration dictionary.
Returns:
tuple[pd.DataFrame, dict]: Finalized simulation DataFrame and a dictionary
of ground-truth ROI values per channel."""
from .load_parameters import create_all_parameters
params = create_all_parameters(config)
self.basic_params = params["basic_params"]
Expand Down Expand Up @@ -491,23 +543,28 @@ def __init__(self):
self.rois = []

def stash_outputs(self, final_df: pd.DataFrame, channel_roi: dict):
"""Stores the final simulation dataframe as well as the ground truth channel ROI values
for each run of the multiple simulations.
"""
"""Stores the outputs of a single simulation run.

Args:
final_df (pd.DataFrame): Final simulation DataFrame.
channel_roi (dict): Ground-truth ROI values."""
self.final_frames.append(final_df)
self.rois.append(channel_roi)

@property
def get_data(self):
"""Provies the iterable generator for simulaton final dataframes and channel ground truth ROI values
"""Provides the iterable generator for simulation final dataframes and channel ground truth ROI values

Args:
None
Returns:
data (iterable): iterable of final sim dataframes and channel ROI values"""
data (iterable): iterable of final sim dataframes and channel ROI values"""
return self.data

def run(self, config: dict, runs: int) -> None:
"""Executes multiple simulation runs.

Args:
config (dict): Simulation configuration.
runs (int): Number of runs to execute."""
for run in range(runs):
frame, roi = self.run_with_config(config=config)
self.stash_outputs(final_df=frame, channel_roi=roi)
Expand Down
4 changes: 2 additions & 2 deletions src/pysimmmulator/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def generate(self, count: int = 1) -> 'np.array':

Args:
count (int): number of study results to return (default is 1)
Retuns:
Returns:
study_results (iterable[float]): an array of study results """
return self.rng.normal(loc=self._true_roi + self._bias, scale=self._stdev, size=count)

Expand Down Expand Up @@ -113,7 +113,7 @@ def generate(self, count: int = 1) -> dict[str, 'np.array']:

Args:
count (int): number of study results to return (default is 1)
Retuns:
Returns:
study_results (dict[iterable[float]]): an array of study results"""
return {k: v.generate(count) for k, v in self._study_hold.items()}

Expand Down
Loading
Loading