RyanAugust · RyanAugust · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/examples/us_retail_exogenous_config.yaml b/examples/us_retail_exogenous_config.yaml
@@ -0,0 +1,101 @@
+basic_params:
+  years: 3
+  channels_impressions: ["Amazon", "TV", "Meta"]
+  channels_clicks: ["Search"]
+  frequency_of_campaigns: 1
+  start_date: "2023/1/1"
+  true_cvr:
+    Amazon: 0.006
+    TV: 0.001
+    Meta: 0.008
+    Search: 0.012
+  revenue_per_conv: 15.0
+
+baseline_params:
+  base_p: 1000
+  trend_p: 200
+  temp_var: 50
+  temp_coef_mean: 10
+  temp_coef_sd: 2
+  error_std: 50
+  exogenous_factors:
+    - name: "New Year's Day"
+      dates: ["2023-01-01", "2024-01-01", "2025-01-01"]
+      impact: 1.5
+      type: "multiplier"
+    - name: "Independence Day"
+      dates: ["2023-07-04", "2024-07-04", "2025-07-04"]
+      impact: 1.3
+      type: "multiplier"
+    - name: "Labor Day Weekend"
+      start_date: "2023-09-02"
+      end_date: "2023-09-04"
+      impact: 1.4
+      type: "multiplier"
+    - name: "Black Friday"
+      dates: ["2023-11-24", "2024-11-29", "2025-11-28"]
+      impact: 3.5
+      type: "multiplier"
+    - name: "Cyber Monday"
+      dates: ["2023-11-27", "2024-12-02", "2025-12-01"]
+      impact: 3.0
+      type: "multiplier"
+    - name: "Christmas Peak"
+      start_date: "2023-12-20"
+      end_date: "2023-12-24"
+      impact: 2.0
+      type: "multiplier"
+    - name: "Economic Stimulus"
+      start_date: "2023-03-01"
+      end_date: "2023-03-15"
+      impact: 500
+      type: "additive"
+
+ad_spend_params:
+  campaign_spend_mean: 500000
+  campaign_spend_std: 100000
+  max_min_proportion_on_each_channel:
+    Amazon:
+      min: 0.2
+      max: 0.4
+    TV:
+      min: 0.1
+      max: 0.3
+    Meta:
+      min: 0.1
+      max: 0.2
+
+media_params:
+  true_cpm:
+    Amazon: 4.0
+    TV: 15.0
+    Meta: 5.0
+  true_cpc:
+    Search: 3.0
+  noisy_cpm_cpc:
+    Amazon: {loc: 0.0, scale: 0.5}
+    TV: {loc: 0.0, scale: 2.0}
+    Meta: {loc: 0.0, scale: 1.0}
+    Search: {loc: 0.0, scale: 0.3}
+
+cvr_params:
+  noisy_cvr:
+    Amazon: {loc: 0.0, scale: 0.01}
+    TV: {loc: 0.0, scale: 0.005}
+    Meta: {loc: 0.0, scale: 0.02}
+    Search: {loc: 0.0, scale: 0.01}
+
+adstock_params:
+  adstock:
+    Amazon: {type: "geometric", params: {lambda: 0.1}}
+    TV: {type: "geometric", params: {lambda: 0.3}}
+    Meta: {type: "geometric", params: {lambda: 0.15}}
+    Search: {type: "geometric", params: {lambda: 0.05}}
+  saturation:
+    Amazon: {type: "scurve", params: {alpha: 3.0, gamma: 0.2}}
+    TV: {type: "scurve", params: {alpha: 2.0, gamma: 0.3}}
+    Meta: {type: "scurve", params: {alpha: 4.0, gamma: 0.25}}
+    Search: {type: "scurve", params: {alpha: 1.0, gamma: 0.5}}
+
+output_params:
+  aggregation_level: "daily"
diff --git a/src/pysimmmulator/geos.py b/src/pysimmmulator/geos.py
@@ -121,13 +121,13 @@ def distribute_to_geos(
   media_cost_spec: tuple[float, float] = (0.0, 0.069),
   perf_spec: tuple[float, float] = (0.0, 0.069)
 ) -> 'pd.DataFrame':
-  """Distributes MMM data to supplied geographies. Allows randomization in the scale of the distributon
+  """Distributes MMM data to supplied geographies. Allows randomization in the scale of the distribution.
 
   Args:
     mmm_input (pd.DataFrame): simulated MMM data that was generated as part of a prior process
     geo_details (dict): formulated dict or output of the `geos` creation call (ie `geos(count=50)`)
     random_seed (int): random seed for rng--if needed
-    rng (np.random.Generator): optional random number generator
+    rng (np.random.Generator): optional pre-instantiated random number generator
     dist_spec (tuple[float, float]): Parameters to control the normal distribution function for populations of the geographies
     media_cost_spec (tuple[float, float]): Parameters to control the normal distribution function for allocation of spend across geographies
     perf_spec (tuple[float, float]): Parameters to control the normal distribution function for allocation of performance across geographies

diff --git a/src/pysimmmulator/param_handlers.py b/src/pysimmmulator/param_handlers.py
@@ -80,7 +80,9 @@ class BaselineParameters:
       (the larger this number, the more important seasonality is for sales)
     temp_coef_sd (int): The standard deviation of how important seasonality is in our data
       (the larger this number, the more variable the importance of seasonality is for sales)
-    error_std (int): Amount of statistical noise added to baseline sales (the larger this number, the noisier baseline sales will be)."""
+    error_std (int): Amount of statistical noise added to baseline sales (the larger this number, the noisier baseline sales will be).
+    exogenous_factors (Optional[list[dict]]): List of external factors like holidays or shocks.
+  """
 
   basic_params: BasicParameters
   base_p: int
@@ -89,6 +91,7 @@ class BaselineParameters:
   temp_coef_mean: int
   temp_coef_sd: int
   error_std: int
+  exogenous_factors: Optional[list[dict]] = None
 
   def __post_init__(self):
     assert self.error_std < self.base_p, "Error std can not exceed base sales value"

diff --git a/src/pysimmmulator/simulate.py b/src/pysimmmulator/simulate.py
@@ -67,6 +67,7 @@ def simulate_baseline(self, params: BaselineParameters) -> pd.DataFrame:
     - Trend: Linear growth over the period (total growth of trend_p)
     - Seasonality: Modeled via a sine function (height temp_var) scaled by a random
       importance coefficient (mean temp_coef_mean, std temp_coef_sd)
+    - Exogenous Factors: Holidays or shocks applied as multipliers or additions.
     - Error: Gaussian noise (std error_std)
 
     If the combined terms result in negative sales, they are clamped to zero.
@@ -86,21 +87,50 @@ def simulate_baseline(self, params: BaselineParameters) -> pd.DataFrame:
     temp = self.baseline_params.temp_var * np.sin(days * 3.14 / 182.5)
     seasonality = self.rng.normal(loc=self.baseline_params.temp_coef_mean, scale=self.baseline_params.temp_coef_sd, size=1) * temp
 
+    # Calculate Exogenous Impacts
+    multiplier_impact = np.ones(len(days))
+    additive_impact = np.zeros(len(days))
+
+    if self.baseline_params.exogenous_factors:
+      date_backbone = pd.Series(pd.date_range(start=self.basic_params.start_date, periods=len(days), freq="D"))
+      for factor in self.baseline_params.exogenous_factors:
+        impact = factor.get("impact", 0.0)
+        f_type = factor.get("type", "multiplier")
+
+        # Identify target indices
+        mask = np.zeros(len(days), dtype=bool)
+        if "dates" in factor:
+          event_dates = pd.to_datetime(factor["dates"])
+          mask = date_backbone.isin(event_dates)
+        elif "start_date" in factor and "end_date" in factor:
+          start = pd.to_datetime(factor["start_date"])
+          end = pd.to_datetime(factor["end_date"])
+          mask = (date_backbone >= start) & (date_backbone <= end)
+
+        if f_type == "multiplier":
+          multiplier_impact[mask] *= impact
+        else:
+          additive_impact[mask] += impact
+
     error = self._truncated_normal(loc=0, scale=self.baseline_params.error_std, size=self.basic_params.years * 365, low=-np.inf)
 
-    baseline_sales = base + trend + seasonality + error
+    baseline_sales = (base + trend + seasonality) * multiplier_impact + additive_impact + error
     if np.any(baseline_sales < 0):
       baseline_sales = np.where(baseline_sales < 0, 0, baseline_sales)
 
     return pd.DataFrame({
       "days": days,
+      "date": date_backbone if self.baseline_params.exogenous_factors is not None else pd.date_range(start=self.basic_params.start_date, periods=len(days), freq="D"),
       "baseline_sales": baseline_sales,
       "base": base,
       "trend": trend,
       "temp": temp,
       "seasonality": seasonality,
+      "multiplier_impact": multiplier_impact,
+      "additive_impact": additive_impact,
       "error": error,
     })
+
   def simulate_ad_spend( self, baseline_sales_df: pd.DataFrame, params: AdSpendParameters) -> pd.DataFrame:
     """Simulation of ad spend based on normal distribution parameters for campaign spend.
     Overall campaign spend is then divided amongst each channel based on passed
@@ -291,7 +321,12 @@ def _reformat_for_mmm(self, spend_df: pd.DataFrame) -> pd.DataFrame:
   def _simulate_decay(self, mmm_df: pd.DataFrame, adstock_config: dict) -> pd.DataFrame:
     """Helper function for the simulation of adstocking.
     Ad stocking is the idea that an ad has a lasting effect for some amount of time in the future.
-    """
+
+    Args:
+      mmm_df (pd.DataFrame): MMM DataFrame containing media metrics.
+      adstock_config (dict): Nested dictionary mapping channels to adstock types and parameters.
+    Returns:
+      pd.DataFrame: Updated mmm_df with adstocked media columns."""
     from .transforms import geometric_adstock, weibull_adstock
     for channel, config in adstock_config.items():
       metric = ("impressions" if channel in self.basic_params.channels_impressions else "clicks")
@@ -312,7 +347,13 @@ def _simulate_decay(self, mmm_df: pd.DataFrame, adstock_config: dict) -> pd.Data
     return mmm_df
 
   def _simulate_diminishing_returns(self, mmm_df: pd.DataFrame, saturation_config: dict) -> pd.DataFrame:
-    """Helper function for the simulation of diminishing returns."""
+    """Helper function for the simulation of diminishing returns.
+
+    Args:
+      mmm_df (pd.DataFrame): MMM DataFrame containing adstocked media metrics.
+      saturation_config (dict): Nested dictionary mapping channels to saturation types and parameters.
+    Returns:
+      pd.DataFrame: Updated mmm_df with saturated media columns."""
     from .transforms import scurve_saturation, hill_saturation
     for channel, config in saturation_config.items():
       metric = ("impressions" if channel in self.basic_params.channels_impressions else "clicks")
@@ -463,6 +504,17 @@ def finalize_output(self, mmm_df: pd.DataFrame, params: OutputParameters) -> pd.
     return final_df
 
   def run_with_config(self, config: dict) -> tuple[pd.DataFrame, dict]:
+    """Orchestrates the full simulation pipeline using a configuration dictionary.
+
+    This method handles parameter instantiation, baseline simulation, media and CVR
+    simulation, adstock/saturation, conversion calculation, and optional
+    geographic distribution.
+
+    Args:
+      config (dict): Complete configuration dictionary.
+    Returns:
+      tuple[pd.DataFrame, dict]: Finalized simulation DataFrame and a dictionary
+        of ground-truth ROI values per channel."""
     from .load_parameters import create_all_parameters
     params = create_all_parameters(config)
     self.basic_params = params["basic_params"]
@@ -491,23 +543,28 @@ def __init__(self):
     self.rois = []
 
   def stash_outputs(self, final_df: pd.DataFrame, channel_roi: dict):
-    """Stores the final simulation dataframe as well as the ground truth channel ROI values
-    for each run of the multiple simulations.
-    """
+    """Stores the outputs of a single simulation run.
+
+    Args:
+      final_df (pd.DataFrame): Final simulation DataFrame.
+      channel_roi (dict): Ground-truth ROI values."""
     self.final_frames.append(final_df)
     self.rois.append(channel_roi)
 
   @property
   def get_data(self):
-    """Provies the iterable generator for simulaton final dataframes and channel ground truth ROI values
+    """Provides the iterable generator for simulation final dataframes and channel ground truth ROI values
 
-    Args:
-    	None
     Returns:
-    	data (iterable): iterable of final sim dataframes and channel ROI values"""
+      data (iterable): iterable of final sim dataframes and channel ROI values"""
     return self.data
 
   def run(self, config: dict, runs: int) -> None:
+    """Executes multiple simulation runs.
+
+    Args:
+      config (dict): Simulation configuration.
+      runs (int): Number of runs to execute."""
     for run in range(runs):
       frame, roi = self.run_with_config(config=config)
       self.stash_outputs(final_df=frame, channel_roi=roi)

diff --git a/src/pysimmmulator/study.py b/src/pysimmmulator/study.py
@@ -70,7 +70,7 @@ def generate(self, count: int = 1) -> 'np.array':
 
     Args:
       count (int): number of study results to return (default is 1)
-    Retuns:
+    Returns:
       study_results (iterable[float]): an array of study results """
     return self.rng.normal(loc=self._true_roi + self._bias, scale=self._stdev, size=count)
 
@@ -113,7 +113,7 @@ def generate(self, count: int = 1) -> dict[str, 'np.array']:
 
     Args:
       count (int): number of study results to return (default is 1)
-    Retuns:
+    Returns:
       study_results (dict[iterable[float]]): an array of study results"""
     return {k: v.generate(count) for k, v in self._study_hold.items()}