From 24bc66c018f566ec11c8035c34b4f5546a3e236e Mon Sep 17 00:00:00 2001
From: kodiobika <kodiobika@gmail.com>
Date: Fri, 24 Apr 2026 16:31:13 -0400
Subject: [PATCH 1/3] Migrate reVeal2ReEDS pipeline to hourlize

---
 hourlize/inputs/configs/config_base.json |   1 +
 hourlize/load.py                         |  24 +++
 hourlize/reveal2reeds/config.json        |   8 +
 hourlize/reveal2reeds/reveal2reeds.py    | 239 +++++++++++++++++++++++
 4 files changed, 272 insertions(+)
 create mode 100644 hourlize/reveal2reeds/config.json
 create mode 100644 hourlize/reveal2reeds/reveal2reeds.py

diff --git a/hourlize/inputs/configs/config_base.json b/hourlize/inputs/configs/config_base.json
index 1409d39a..3f840d3f 100644
--- a/hourlize/inputs/configs/config_base.json
+++ b/hourlize/inputs/configs/config_base.json
@@ -51,6 +51,7 @@
       "2045": 1,
       "2050": 1
     },
+    "custom_data_center_projection_years": [2025, 2030, 2035, 2040, 2045, 2050],
     "scenarios": ["IRA cons", "central", "baseline"],
     "sector_config_file": "{hourlize_path}/inputs/load/sector_config.json",
     "weather_years": [2007,2008,2009,2010,2011,2012,2013,2016,2017,2018,2019,2020,2021,2022,2023]
diff --git a/hourlize/load.py b/hourlize/load.py
index 0bc27802..97cbb4ed 100644
--- a/hourlize/load.py
+++ b/hourlize/load.py
@@ -9,6 +9,19 @@
 import pandas as pd
 import site
 from types import SimpleNamespace
+from reveal2reeds import reveal2reeds
+
+def get_reveal2reeds_config() -> dict:
+    configpath = "reveal2reeds/config.json"
+    with open(configpath, "r") as f:
+        config = json.load(f, object_pairs_hook=OrderedDict)
+    reveal2reeds_config = SimpleNamespace(**config)
+    reveal2reeds_config.cooling_proportions_source = (
+        reveal2reeds_config.cooling_proportions_source
+        .format(scenario=reveal2reeds_config.scenario)
+    )
+
+    return reveal2reeds_config
 
 def get_state_name_code_map(reeds_path: str) -> dict:
     """
@@ -269,6 +282,17 @@ def create_hourly_state_load_for_model_year(
         compression='gzip',
         parse_dates=['weather_datetime']
     )
+
+    # If applicable, replace data center cooling and IT projections with
+    # custom projections specified in reveal2reeds/config.json
+    if model_year in cf.custom_data_center_projection_years:
+        reveal2reeds_config = get_reveal2reeds_config()
+        df_load = reveal2reeds.apply_custom_data_center_demand_projections(
+            df_load,
+            model_year,
+            reveal2reeds_config
+        )
+
     # Downselect to specified weather years
     df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)]
 
diff --git a/hourlize/reveal2reeds/config.json b/hourlize/reveal2reeds/config.json
new file mode 100644
index 00000000..6eedc7c0
--- /dev/null
+++ b/hourlize/reveal2reeds/config.json
@@ -0,0 +1,8 @@
+{
+    "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv",
+    "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv",
+    "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv",
+    "replace_existing_data_center_demand": true,
+    "scenario": "central",
+    "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx"
+}
\ No newline at end of file
diff --git a/hourlize/reveal2reeds/reveal2reeds.py b/hourlize/reveal2reeds/reveal2reeds.py
new file mode 100644
index 00000000..03331fd6
--- /dev/null
+++ b/hourlize/reveal2reeds/reveal2reeds.py
@@ -0,0 +1,239 @@
+import numpy as np
+import pandas as pd
+
+def get_national_model_year_data_center_demand(
+    national_demand_source_path: str,
+    model_year: int
+) -> int:
+    data_center_demand = pd.read_csv(national_demand_source_path)
+    model_year_data_center_demand = (
+        data_center_demand.loc[(
+            data_center_demand.year == model_year
+        )]
+        .copy()
+    )
+    national_model_year_data_center_demand = (
+        model_year_data_center_demand['total_data_center_mw'].sum()
+    )
+
+    return national_model_year_data_center_demand
+
+def get_propagation_by_weather_year(
+    propagation_source_path: str,
+    scenario: str
+) -> pd.Series:
+    propagation_by_weather_year = pd.read_csv(propagation_source_path)
+    propagation_by_weather_year = (
+        propagation_by_weather_year.loc[(
+            propagation_by_weather_year.scenario == scenario
+        )]
+        .set_index('year')
+        ['avg_prop']
+    )
+
+    return propagation_by_weather_year
+    
+
+def calculate_national_data_center_demand_hourly(
+    df_load: pd.DataFrame,
+    model_year: int,
+    scenario: str,
+    national_demand_source_path: str,
+    propagation_source_path: str
+):
+    # Calculate national projected data center demand for the model year
+    national_data_center_demand = get_national_model_year_data_center_demand(
+        national_demand_source_path,
+        model_year
+    )
+
+    # Get propagation factors by weather year for the given scenario.
+    # Propagation factors represent the percentage of projected national
+    # data center demand for the model year that is expected to be
+    # realized during each hour of each weather year.
+    propagation_by_weather_year = get_propagation_by_weather_year(
+        propagation_source_path,
+        scenario
+    )
+
+    # Estimate national hourly load values for each weather year
+    # by multiplying the propagation factors by national data
+    # center demand for the model year.
+    national_data_center_demand_hourly = pd.DataFrame(
+        index=df_load['weather_datetime'].drop_duplicates()
+    )
+    national_data_center_demand_hourly['propagation_factor'] = (
+        national_data_center_demand_hourly.index.year
+        .map(propagation_by_weather_year)
+    )
+    national_data_center_demand_hourly['demand_MW'] = (
+        national_data_center_demand_hourly['propagation_factor']
+        * national_data_center_demand
+    )
+    national_data_center_demand_hourly = (
+        national_data_center_demand_hourly['demand_MW']
+    )
+
+    return national_data_center_demand_hourly
+
+def get_data_center_cooling_weights(
+    cooling_proportions_source_path: str
+) -> pd.DataFrame:
+    state_cooling_weights = pd.read_csv(cooling_proportions_source_path)
+    state_cooling_weights["weather_datetime"] = (
+        pd.to_datetime(state_cooling_weights["weather_datetime"])
+    )
+    national_cooling_weights = (
+        state_cooling_weights.groupby("weather_datetime")
+        ["cooling_prop"]
+        .mean()
+    )
+
+    return national_cooling_weights
+
+def get_data_center_state_weights(
+    state_proportions_source_path: str,
+    model_year: int,
+    scenario: str
+) -> pd.DataFrame:
+    data_center_year = 2024 if model_year == 2025 else model_year
+    state_weights = pd.read_excel(state_proportions_source_path)
+    state_weights = (
+        state_weights.loc[
+            (state_weights['Run Name'] == scenario)
+            & (state_weights['Year'] == data_center_year)
+        ]
+        .set_index('State')
+        ["% of Total Data Center Load"]
+    )
+
+    return state_weights
+
+
+def apply_state_and_subsector_weights(
+    national_demand: pd.DataFrame,
+    state_weights: pd.Series,
+    subsector_weights: pd.Series,
+    subsector: str,
+):
+    national_subsector_demand = national_demand * subsector_weights
+    state_subsector_demand = pd.DataFrame(
+        np.outer(national_subsector_demand, state_weights),
+        index=national_subsector_demand.index,
+        columns=state_weights.index
+    )
+    state_subsector_demand = (
+        state_subsector_demand.reset_index()
+        .assign(
+            sector='commercial',
+            subsector=subsector,
+            dispatch_feeder='Commercial'
+        )
+        .rename_axis(columns='')
+    )
+
+    return state_subsector_demand
+
+def calculate_state_subsector_data_center_demand_hourly(
+    df_load: pd.DataFrame,
+    model_year: int,
+    scenario: str,
+    national_demand_source_path: str,
+    cooling_proportions_source_path: str,
+    propagation_source_path: str,
+    state_proportions_source_path: str
+) -> pd.DataFrame:
+    # Calculate hourly national data center demand
+    national_data_center_demand_hourly = (
+        calculate_national_data_center_demand_hourly(
+            df_load,
+            model_year,
+            scenario,
+            national_demand_source_path,
+            propagation_source_path
+        )
+    )
+    # Calculate proportion of national demand attributable to each state
+    state_weights = get_data_center_state_weights(
+        state_proportions_source_path,
+        model_year,
+        scenario
+    )
+    state_weights = state_weights.loc[state_weights.index.isin(df_load.columns)]
+    # Get proportion of hourly demand attributable to cooling
+    data_center_cooling_weights = get_data_center_cooling_weights(
+        cooling_proportions_source_path
+    )
+    # Calculate state-by-state hourly demand for data center cooling subsector
+    state_data_center_cooling_demand_hourly = apply_state_and_subsector_weights(
+        national_demand=national_data_center_demand_hourly,
+        state_weights=state_weights,
+        subsector_weights=data_center_cooling_weights,
+        subsector='data center cooling',
+    )
+    # Calculate state-by-state hourly demand for data center IT subsector
+    data_center_it_weights = 1 - data_center_cooling_weights
+    state_data_center_it_demand_hourly = apply_state_and_subsector_weights(
+        national_demand=national_data_center_demand_hourly,
+        state_weights=state_weights,
+        subsector_weights=data_center_it_weights,
+        subsector='data center it',
+    )
+    # Concatenate all state subsector-level demand
+    state_subsector_data_center_demand_hourly = (
+        pd.concat(
+            [
+                state_data_center_cooling_demand_hourly,
+                state_data_center_it_demand_hourly
+            ],
+            ignore_index=True
+        )
+        .fillna(0)
+    )
+    return state_subsector_data_center_demand_hourly
+
+def apply_custom_data_center_demand_projections(
+    df_load: pd.DataFrame,
+    model_year: int,
+    cf: dict
+):
+    state_subsector_data_center_demand_hourly = (
+        calculate_state_subsector_data_center_demand_hourly(
+            df_load,
+            model_year,
+            cf.scenario,
+            cf.national_demand_source,
+            cf.cooling_proportions_source,
+            cf.propagation_source,
+            cf.state_proportions_source
+        )
+    )
+
+    if cf.replace_existing_data_center_demand:
+        data_center_subsectors = ['data center cooling', 'data center it']
+        df_load = pd.concat(
+            [
+                df_load.loc[~df_load.subsector.isin(data_center_subsectors)],
+                state_subsector_data_center_demand_hourly
+            ],
+            ignore_index=True
+        )
+    else:
+        df_load = (
+            pd.concat(
+                [df_load, state_subsector_data_center_demand_hourly],
+                ignore_index=True
+            )
+            .groupby(
+                [
+                    'weather_datetime',
+                    'sector',
+                    'subsector',
+                    'dispatch_feeder'
+                ],
+                as_index=False
+            )
+            .sum(numeric_only=True)
+        )
+
+    return df_load
\ No newline at end of file

From 695470929f70ed6a6f5638160fee03b223fd022c Mon Sep 17 00:00:00 2001
From: kodiobika <kodiobika@gmail.com>
Date: Thu, 11 Jun 2026 10:41:19 -0400
Subject: [PATCH 2/3] Use existing sector_config.json instead of new
 reveal2reeds config

---
 hourlize/inputs/load/sector_config.json | 12 ++++----
 hourlize/load.py                        | 39 ++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/hourlize/inputs/load/sector_config.json b/hourlize/inputs/load/sector_config.json
index 35997a25..5d012d28 100644
--- a/hourlize/inputs/load/sector_config.json
+++ b/hourlize/inputs/load/sector_config.json
@@ -45,10 +45,12 @@
     "subsectors": {
       "commercial": ["data center cooling", "data center it"]
     },
-    "model_years": [2021, 2025, 2030, 2035, 2040, 2045, 2050],
-    "filepaths": ["/kfs2/projects/eerload/challoran/eer_splice/dummy_agg_op_datacenters_by_state.csv"],
-    "unit_conversion_factor": 1,
-    "timezone": "Etc/GMT+6",
-    "regional_scope": "state"
+    "model_years": [2025, 2030, 2035, 2040, 2045, 2050],
+    "scenario": "central",
+    "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv",
+    "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv",
+    "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv",
+    "replace_existing_data_center_demand": true,
+    "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx"
   }
 }
\ No newline at end of file
diff --git a/hourlize/load.py b/hourlize/load.py
index 97cbb4ed..0349b81c 100644
--- a/hourlize/load.py
+++ b/hourlize/load.py
@@ -283,15 +283,38 @@ def create_hourly_state_load_for_model_year(
         parse_dates=['weather_datetime']
     )
 
-    # If applicable, replace data center cooling and IT projections with
-    # custom projections specified in reveal2reeds/config.json
-    if model_year in cf.custom_data_center_projection_years:
-        reveal2reeds_config = get_reveal2reeds_config()
-        df_load = reveal2reeds.apply_custom_data_center_demand_projections(
-            df_load,
-            model_year,
-            reveal2reeds_config
+    # # If applicable, replace data center cooling and IT projections with
+    # # custom projections specified in reveal2reeds/config.json
+    # if model_year in cf.custom_data_center_projection_years:
+    #     reveal2reeds_config = get_reveal2reeds_config()
+    #     df_load = reveal2reeds.apply_custom_data_center_demand_projections(
+    #         df_load,
+    #         model_year,
+    #         reveal2reeds_config
+    #     )
+
+    # If applicable, replace or add to data center cooling and IT projections,
+    # as specified in inputs/load/sector_config.json
+    # Note this is handled differently and separately from the other
+    # 'replace_sectors'. The logic for handling those is below.
+    if 'Data Centers' in replace_sectors:
+        data_center_config = sector_config['Data Centers']
+        data_center_config.cooling_proportions_source = (
+            data_center_config.cooling_proportions_source
+            .format(scenario=data_center_config.scenario)
         )
+        if model_year in data_center_config['model_years']:
+            df_load = reveal2reeds.apply_custom_data_center_demand_projections(
+                df_load,
+                model_year,
+                data_center_config
+            )
+        else:
+            pass
+
+        # Remove 'data centers' from 'replace_sectors' because we traverse
+        # through the list later to handle the other sectors.
+        replace_sectors.remove('Data Centers')
 
     # Downselect to specified weather years
     df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)]

From 6ca8f679a2cabbef3996efa66faa2e1df82b7b91 Mon Sep 17 00:00:00 2001
From: kodiobika <kodiobika@gmail.com>
Date: Thu, 11 Jun 2026 12:03:01 -0400
Subject: [PATCH 3/3] Move reveal2reeds config to sector_config.json

---
 hourlize/inputs/configs/config_base.json |  1 -
 hourlize/load.py                         | 45 ++++++------------------
 hourlize/reveal2reeds/config.json        |  8 -----
 hourlize/reveal2reeds/reveal2reeds.py    | 14 ++++----
 4 files changed, 18 insertions(+), 50 deletions(-)
 delete mode 100644 hourlize/reveal2reeds/config.json

diff --git a/hourlize/inputs/configs/config_base.json b/hourlize/inputs/configs/config_base.json
index 3f840d3f..1409d39a 100644
--- a/hourlize/inputs/configs/config_base.json
+++ b/hourlize/inputs/configs/config_base.json
@@ -51,7 +51,6 @@
       "2045": 1,
       "2050": 1
     },
-    "custom_data_center_projection_years": [2025, 2030, 2035, 2040, 2045, 2050],
     "scenarios": ["IRA cons", "central", "baseline"],
     "sector_config_file": "{hourlize_path}/inputs/load/sector_config.json",
     "weather_years": [2007,2008,2009,2010,2011,2012,2013,2016,2017,2018,2019,2020,2021,2022,2023]
diff --git a/hourlize/load.py b/hourlize/load.py
index 0349b81c..96280c6c 100644
--- a/hourlize/load.py
+++ b/hourlize/load.py
@@ -11,18 +11,6 @@
 from types import SimpleNamespace
 from reveal2reeds import reveal2reeds
 
-def get_reveal2reeds_config() -> dict:
-    configpath = "reveal2reeds/config.json"
-    with open(configpath, "r") as f:
-        config = json.load(f, object_pairs_hook=OrderedDict)
-    reveal2reeds_config = SimpleNamespace(**config)
-    reveal2reeds_config.cooling_proportions_source = (
-        reveal2reeds_config.cooling_proportions_source
-        .format(scenario=reveal2reeds_config.scenario)
-    )
-
-    return reveal2reeds_config
-
 def get_state_name_code_map(reeds_path: str) -> dict:
     """
     Read from the ReEDS directory a file containing the mapping from state
@@ -283,25 +271,13 @@ def create_hourly_state_load_for_model_year(
         parse_dates=['weather_datetime']
     )
 
-    # # If applicable, replace data center cooling and IT projections with
-    # # custom projections specified in reveal2reeds/config.json
-    # if model_year in cf.custom_data_center_projection_years:
-    #     reveal2reeds_config = get_reveal2reeds_config()
-    #     df_load = reveal2reeds.apply_custom_data_center_demand_projections(
-    #         df_load,
-    #         model_year,
-    #         reveal2reeds_config
-    #     )
-
     # If applicable, replace or add to data center cooling and IT projections,
     # as specified in inputs/load/sector_config.json
-    # Note this is handled differently and separately from the other
-    # 'replace_sectors'. The logic for handling those is below.
     if 'Data Centers' in replace_sectors:
         data_center_config = sector_config['Data Centers']
-        data_center_config.cooling_proportions_source = (
-            data_center_config.cooling_proportions_source
-            .format(scenario=data_center_config.scenario)
+        data_center_config['cooling_proportions_source'] = (
+            data_center_config['cooling_proportions_source']
+            .format(scenario=data_center_config['scenario'])
         )
         if model_year in data_center_config['model_years']:
             df_load = reveal2reeds.apply_custom_data_center_demand_projections(
@@ -312,10 +288,6 @@ def create_hourly_state_load_for_model_year(
         else:
             pass
 
-        # Remove 'data centers' from 'replace_sectors' because we traverse
-        # through the list later to handle the other sectors.
-        replace_sectors.remove('Data Centers')
-
     # Downselect to specified weather years
     df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)]
 
@@ -330,6 +302,10 @@ def create_hourly_state_load_for_model_year(
     # sectoral load from the raw load profiles
     replacement_load_list = []
     for sector in replace_sectors:
+        # Skip 'data centers' sector, as it was already processed above
+        if sector == 'Data Centers':
+            continue
+
         print(f"Removing endogenous load for '{sector}' sector...")
         if sector not in sector_config:
             raise NotImplementedError(
@@ -395,6 +371,8 @@ def create_hourly_state_load_for_model_year(
             model_year
         )
         for sector in replace_sectors
+        # Skip 'data centers' sector, as it was already processed above
+        if sector != 'Data Centers'
     ]
 
     # Aggregate the exogenous sectoral load to the state level and
@@ -490,9 +468,8 @@ def main(
             )
 
         output_fpath = os.path.join(
-            reeds_path,
-            "inputs",
-            "load",
+            cf.outpath,
+            'results',
             f"demand_{scenario_outfile_prefix_map[scenario]}.h5"
         )
         for model_year in model_years:
diff --git a/hourlize/reveal2reeds/config.json b/hourlize/reveal2reeds/config.json
deleted file mode 100644
index 6eedc7c0..00000000
--- a/hourlize/reveal2reeds/config.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv",
-    "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv",
-    "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv",
-    "replace_existing_data_center_demand": true,
-    "scenario": "central",
-    "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx"
-}
\ No newline at end of file
diff --git a/hourlize/reveal2reeds/reveal2reeds.py b/hourlize/reveal2reeds/reveal2reeds.py
index 03331fd6..651e0273 100644
--- a/hourlize/reveal2reeds/reveal2reeds.py
+++ b/hourlize/reveal2reeds/reveal2reeds.py
@@ -201,16 +201,16 @@ def apply_custom_data_center_demand_projections(
         calculate_state_subsector_data_center_demand_hourly(
             df_load,
             model_year,
-            cf.scenario,
-            cf.national_demand_source,
-            cf.cooling_proportions_source,
-            cf.propagation_source,
-            cf.state_proportions_source
+            cf['scenario'],
+            cf['national_demand_source'],
+            cf['cooling_proportions_source'],
+            cf['propagation_source'],
+            cf['state_proportions_source']
         )
     )
 
-    if cf.replace_existing_data_center_demand:
-        data_center_subsectors = ['data center cooling', 'data center it']
+    if cf['replace_existing_data_center_demand']:
+        data_center_subsectors = cf['subsectors']['commercial']
         df_load = pd.concat(
             [
                 df_load.loc[~df_load.subsector.isin(data_center_subsectors)],