ReEDS-Model · kodiobika · Apr 24, 2026 · May 1, 2026 · May 1, 2026 · May 26, 2026
diff --git a/hourlize/inputs/load/sector_config.json b/hourlize/inputs/load/sector_config.json
@@ -45,10 +45,12 @@
     "subsectors": {
       "commercial": ["data center cooling", "data center it"]
     },
-    "model_years": [2021, 2025, 2030, 2035, 2040, 2045, 2050],
-    "filepaths": ["/kfs2/projects/eerload/challoran/eer_splice/dummy_agg_op_datacenters_by_state.csv"],
-    "unit_conversion_factor": 1,
-    "timezone": "Etc/GMT+6",
-    "regional_scope": "state"
+    "model_years": [2025, 2030, 2035, 2040, 2045, 2050],
+    "scenario": "central",
+    "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv",
+    "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv",
+    "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv",
+    "replace_existing_data_center_demand": true,
+    "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx"
   }
 }
diff --git a/hourlize/load.py b/hourlize/load.py
@@ -9,6 +9,7 @@
 import pandas as pd
 import site
 from types import SimpleNamespace
+from reveal2reeds import reveal2reeds
 
 def get_state_name_code_map(reeds_path: str) -> dict:
     """
@@ -269,6 +270,24 @@ def create_hourly_state_load_for_model_year(
         compression='gzip',
         parse_dates=['weather_datetime']
     )
+
+    # If applicable, replace or add to data center cooling and IT projections,
+    # as specified in inputs/load/sector_config.json
+    if 'Data Centers' in replace_sectors:
+        data_center_config = sector_config['Data Centers']
+        data_center_config['cooling_proportions_source'] = (
+            data_center_config['cooling_proportions_source']
+            .format(scenario=data_center_config['scenario'])
+        )
+        if model_year in data_center_config['model_years']:
+            df_load = reveal2reeds.apply_custom_data_center_demand_projections(
+                df_load,
+                model_year,
+                data_center_config
+            )
+        else:
+            pass
+
     # Downselect to specified weather years
     df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)]
 
@@ -283,6 +302,10 @@ def create_hourly_state_load_for_model_year(
     # sectoral load from the raw load profiles
     replacement_load_list = []
     for sector in replace_sectors:
+        # Skip 'data centers' sector, as it was already processed above
+        if sector == 'Data Centers':
+            continue
+
         print(f"Removing endogenous load for '{sector}' sector...")
         if sector not in sector_config:
             raise NotImplementedError(
@@ -348,6 +371,8 @@ def create_hourly_state_load_for_model_year(
             model_year
         )
         for sector in replace_sectors
+        # Skip 'data centers' sector, as it was already processed above
+        if sector != 'Data Centers'
     ]
 
     # Aggregate the exogenous sectoral load to the state level and
@@ -443,9 +468,8 @@ def main(
             )
 
         output_fpath = os.path.join(
-            reeds_path,
-            "inputs",
-            "load",
+            cf.outpath,
+            'results',
             f"demand_{scenario_outfile_prefix_map[scenario]}.h5"
         )
         for model_year in model_years:

diff --git a/hourlize/reveal2reeds/reveal2reeds.py b/hourlize/reveal2reeds/reveal2reeds.py
@@ -0,0 +1,239 @@
+import numpy as np
+import pandas as pd
+
+def get_national_model_year_data_center_demand(
+    national_demand_source_path: str,
+    model_year: int
+) -> int:
+    data_center_demand = pd.read_csv(national_demand_source_path)
+    model_year_data_center_demand = (
+        data_center_demand.loc[(
+            data_center_demand.year == model_year
+        )]
+        .copy()
+    )
+    national_model_year_data_center_demand = (
+        model_year_data_center_demand['total_data_center_mw'].sum()
+    )
+
+    return national_model_year_data_center_demand
+
+def get_propagation_by_weather_year(
+    propagation_source_path: str,
+    scenario: str
+) -> pd.Series:
+    propagation_by_weather_year = pd.read_csv(propagation_source_path)
+    propagation_by_weather_year = (
+        propagation_by_weather_year.loc[(
+            propagation_by_weather_year.scenario == scenario
+        )]
+        .set_index('year')
+        ['avg_prop']
+    )
+
+    return propagation_by_weather_year
+
+
+def calculate_national_data_center_demand_hourly(
+    df_load: pd.DataFrame,
+    model_year: int,
+    scenario: str,
+    national_demand_source_path: str,
+    propagation_source_path: str
+):
+    # Calculate national projected data center demand for the model year
+    national_data_center_demand = get_national_model_year_data_center_demand(
+        national_demand_source_path,
+        model_year
+    )
+
+    # Get propagation factors by weather year for the given scenario.
+    # Propagation factors represent the percentage of projected national
+    # data center demand for the model year that is expected to be
+    # realized during each hour of each weather year.
+    propagation_by_weather_year = get_propagation_by_weather_year(
+        propagation_source_path,
+        scenario
+    )
+
+    # Estimate national hourly load values for each weather year
+    # by multiplying the propagation factors by national data
+    # center demand for the model year.
+    national_data_center_demand_hourly = pd.DataFrame(
+        index=df_load['weather_datetime'].drop_duplicates()
+    )
+    national_data_center_demand_hourly['propagation_factor'] = (
+        national_data_center_demand_hourly.index.year
+        .map(propagation_by_weather_year)
+    )
+    national_data_center_demand_hourly['demand_MW'] = (
+        national_data_center_demand_hourly['propagation_factor']
+        * national_data_center_demand
+    )
+    national_data_center_demand_hourly = (
+        national_data_center_demand_hourly['demand_MW']
+    )
+
+    return national_data_center_demand_hourly
+
+def get_data_center_cooling_weights(
+    cooling_proportions_source_path: str
+) -> pd.DataFrame:
+    state_cooling_weights = pd.read_csv(cooling_proportions_source_path)
+    state_cooling_weights["weather_datetime"] = (
+        pd.to_datetime(state_cooling_weights["weather_datetime"])
+    )
+    national_cooling_weights = (
+        state_cooling_weights.groupby("weather_datetime")
+        ["cooling_prop"]
+        .mean()
+    )
+
+    return national_cooling_weights
+
+def get_data_center_state_weights(
+    state_proportions_source_path: str,
+    model_year: int,
+    scenario: str
+) -> pd.DataFrame:
+    data_center_year = 2024 if model_year == 2025 else model_year
+    state_weights = pd.read_excel(state_proportions_source_path)
+    state_weights = (
+        state_weights.loc[
+            (state_weights['Run Name'] == scenario)
+            & (state_weights['Year'] == data_center_year)
+        ]
+        .set_index('State')
+        ["% of Total Data Center Load"]
+    )
+
+    return state_weights
+
+
+def apply_state_and_subsector_weights(
+    national_demand: pd.DataFrame,
+    state_weights: pd.Series,
+    subsector_weights: pd.Series,
+    subsector: str,
+):
+    national_subsector_demand = national_demand * subsector_weights
+    state_subsector_demand = pd.DataFrame(
+        np.outer(national_subsector_demand, state_weights),
+        index=national_subsector_demand.index,
+        columns=state_weights.index
+    )
+    state_subsector_demand = (
+        state_subsector_demand.reset_index()
+        .assign(
+            sector='commercial',
+            subsector=subsector,
+            dispatch_feeder='Commercial'
+        )
+        .rename_axis(columns='')
+    )
+
+    return state_subsector_demand
+
+def calculate_state_subsector_data_center_demand_hourly(
+    df_load: pd.DataFrame,
+    model_year: int,
+    scenario: str,
+    national_demand_source_path: str,
+    cooling_proportions_source_path: str,
+    propagation_source_path: str,
+    state_proportions_source_path: str
+) -> pd.DataFrame:
+    # Calculate hourly national data center demand
+    national_data_center_demand_hourly = (
+        calculate_national_data_center_demand_hourly(
+            df_load,
+            model_year,
+            scenario,
+            national_demand_source_path,
+            propagation_source_path
+        )
+    )
+    # Calculate proportion of national demand attributable to each state
+    state_weights = get_data_center_state_weights(
+        state_proportions_source_path,
+        model_year,
+        scenario
+    )
+    state_weights = state_weights.loc[state_weights.index.isin(df_load.columns)]
+    # Get proportion of hourly demand attributable to cooling
+    data_center_cooling_weights = get_data_center_cooling_weights(
+        cooling_proportions_source_path
+    )
+    # Calculate state-by-state hourly demand for data center cooling subsector
+    state_data_center_cooling_demand_hourly = apply_state_and_subsector_weights(
+        national_demand=national_data_center_demand_hourly,
+        state_weights=state_weights,
+        subsector_weights=data_center_cooling_weights,
+        subsector='data center cooling',
+    )
+    # Calculate state-by-state hourly demand for data center IT subsector
+    data_center_it_weights = 1 - data_center_cooling_weights
+    state_data_center_it_demand_hourly = apply_state_and_subsector_weights(
+        national_demand=national_data_center_demand_hourly,
+        state_weights=state_weights,
+        subsector_weights=data_center_it_weights,
+        subsector='data center it',
+    )
+    # Concatenate all state subsector-level demand
+    state_subsector_data_center_demand_hourly = (
+        pd.concat(
+            [
+                state_data_center_cooling_demand_hourly,
+                state_data_center_it_demand_hourly
+            ],
+            ignore_index=True
+        )
+        .fillna(0)
+    )
+    return state_subsector_data_center_demand_hourly
+
+def apply_custom_data_center_demand_projections(
+    df_load: pd.DataFrame,
+    model_year: int,
+    cf: dict
+):
+    state_subsector_data_center_demand_hourly = (
+        calculate_state_subsector_data_center_demand_hourly(
+            df_load,
+            model_year,
+            cf['scenario'],
+            cf['national_demand_source'],
+            cf['cooling_proportions_source'],
+            cf['propagation_source'],
+            cf['state_proportions_source']
+        )
+    )
+
+    if cf['replace_existing_data_center_demand']:
+        data_center_subsectors = cf['subsectors']['commercial']
+        df_load = pd.concat(
+            [
+                df_load.loc[~df_load.subsector.isin(data_center_subsectors)],
+                state_subsector_data_center_demand_hourly
+            ],
+            ignore_index=True
+        )
+    else:
+        df_load = (
+            pd.concat(
+                [df_load, state_subsector_data_center_demand_hourly],
+                ignore_index=True
+            )
+            .groupby(
+                [
+                    'weather_datetime',
+                    'sector',
+                    'subsector',
+                    'dispatch_feeder'
+                ],
+                as_index=False
+            )
+            .sum(numeric_only=True)
+        )
+
+    return df_load