From 24bc66c018f566ec11c8035c34b4f5546a3e236e Mon Sep 17 00:00:00 2001 From: kodiobika Date: Fri, 24 Apr 2026 16:31:13 -0400 Subject: [PATCH 1/3] Migrate reVeal2ReEDS pipeline to hourlize --- hourlize/inputs/configs/config_base.json | 1 + hourlize/load.py | 24 +++ hourlize/reveal2reeds/config.json | 8 + hourlize/reveal2reeds/reveal2reeds.py | 239 +++++++++++++++++++++++ 4 files changed, 272 insertions(+) create mode 100644 hourlize/reveal2reeds/config.json create mode 100644 hourlize/reveal2reeds/reveal2reeds.py diff --git a/hourlize/inputs/configs/config_base.json b/hourlize/inputs/configs/config_base.json index 1409d39a..3f840d3f 100644 --- a/hourlize/inputs/configs/config_base.json +++ b/hourlize/inputs/configs/config_base.json @@ -51,6 +51,7 @@ "2045": 1, "2050": 1 }, + "custom_data_center_projection_years": [2025, 2030, 2035, 2040, 2045, 2050], "scenarios": ["IRA cons", "central", "baseline"], "sector_config_file": "{hourlize_path}/inputs/load/sector_config.json", "weather_years": [2007,2008,2009,2010,2011,2012,2013,2016,2017,2018,2019,2020,2021,2022,2023] diff --git a/hourlize/load.py b/hourlize/load.py index 0bc27802..97cbb4ed 100644 --- a/hourlize/load.py +++ b/hourlize/load.py @@ -9,6 +9,19 @@ import pandas as pd import site from types import SimpleNamespace +from reveal2reeds import reveal2reeds + +def get_reveal2reeds_config() -> dict: + configpath = "reveal2reeds/config.json" + with open(configpath, "r") as f: + config = json.load(f, object_pairs_hook=OrderedDict) + reveal2reeds_config = SimpleNamespace(**config) + reveal2reeds_config.cooling_proportions_source = ( + reveal2reeds_config.cooling_proportions_source + .format(scenario=reveal2reeds_config.scenario) + ) + + return reveal2reeds_config def get_state_name_code_map(reeds_path: str) -> dict: """ @@ -269,6 +282,17 @@ def create_hourly_state_load_for_model_year( compression='gzip', parse_dates=['weather_datetime'] ) + + # If applicable, replace data center cooling and IT projections with + # custom projections specified in reveal2reeds/config.json + if model_year in cf.custom_data_center_projection_years: + reveal2reeds_config = get_reveal2reeds_config() + df_load = reveal2reeds.apply_custom_data_center_demand_projections( + df_load, + model_year, + reveal2reeds_config + ) + # Downselect to specified weather years df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)] diff --git a/hourlize/reveal2reeds/config.json b/hourlize/reveal2reeds/config.json new file mode 100644 index 00000000..6eedc7c0 --- /dev/null +++ b/hourlize/reveal2reeds/config.json @@ -0,0 +1,8 @@ +{ + "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv", + "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv", + "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv", + "replace_existing_data_center_demand": true, + "scenario": "central", + "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx" +} \ No newline at end of file diff --git a/hourlize/reveal2reeds/reveal2reeds.py b/hourlize/reveal2reeds/reveal2reeds.py new file mode 100644 index 00000000..03331fd6 --- /dev/null +++ b/hourlize/reveal2reeds/reveal2reeds.py @@ -0,0 +1,239 @@ +import numpy as np +import pandas as pd + +def get_national_model_year_data_center_demand( + national_demand_source_path: str, + model_year: int +) -> int: + data_center_demand = pd.read_csv(national_demand_source_path) + model_year_data_center_demand = ( + data_center_demand.loc[( + data_center_demand.year == model_year + )] + .copy() + ) + national_model_year_data_center_demand = ( + model_year_data_center_demand['total_data_center_mw'].sum() + ) + + return national_model_year_data_center_demand + +def get_propagation_by_weather_year( + propagation_source_path: str, + scenario: str +) -> pd.Series: + propagation_by_weather_year = pd.read_csv(propagation_source_path) + propagation_by_weather_year = ( + propagation_by_weather_year.loc[( + propagation_by_weather_year.scenario == scenario + )] + .set_index('year') + ['avg_prop'] + ) + + return propagation_by_weather_year + + +def calculate_national_data_center_demand_hourly( + df_load: pd.DataFrame, + model_year: int, + scenario: str, + national_demand_source_path: str, + propagation_source_path: str +): + # Calculate national projected data center demand for the model year + national_data_center_demand = get_national_model_year_data_center_demand( + national_demand_source_path, + model_year + ) + + # Get propagation factors by weather year for the given scenario. + # Propagation factors represent the percentage of projected national + # data center demand for the model year that is expected to be + # realized during each hour of each weather year. + propagation_by_weather_year = get_propagation_by_weather_year( + propagation_source_path, + scenario + ) + + # Estimate national hourly load values for each weather year + # by multiplying the propagation factors by national data + # center demand for the model year. + national_data_center_demand_hourly = pd.DataFrame( + index=df_load['weather_datetime'].drop_duplicates() + ) + national_data_center_demand_hourly['propagation_factor'] = ( + national_data_center_demand_hourly.index.year + .map(propagation_by_weather_year) + ) + national_data_center_demand_hourly['demand_MW'] = ( + national_data_center_demand_hourly['propagation_factor'] + * national_data_center_demand + ) + national_data_center_demand_hourly = ( + national_data_center_demand_hourly['demand_MW'] + ) + + return national_data_center_demand_hourly + +def get_data_center_cooling_weights( + cooling_proportions_source_path: str +) -> pd.DataFrame: + state_cooling_weights = pd.read_csv(cooling_proportions_source_path) + state_cooling_weights["weather_datetime"] = ( + pd.to_datetime(state_cooling_weights["weather_datetime"]) + ) + national_cooling_weights = ( + state_cooling_weights.groupby("weather_datetime") + ["cooling_prop"] + .mean() + ) + + return national_cooling_weights + +def get_data_center_state_weights( + state_proportions_source_path: str, + model_year: int, + scenario: str +) -> pd.DataFrame: + data_center_year = 2024 if model_year == 2025 else model_year + state_weights = pd.read_excel(state_proportions_source_path) + state_weights = ( + state_weights.loc[ + (state_weights['Run Name'] == scenario) + & (state_weights['Year'] == data_center_year) + ] + .set_index('State') + ["% of Total Data Center Load"] + ) + + return state_weights + + +def apply_state_and_subsector_weights( + national_demand: pd.DataFrame, + state_weights: pd.Series, + subsector_weights: pd.Series, + subsector: str, +): + national_subsector_demand = national_demand * subsector_weights + state_subsector_demand = pd.DataFrame( + np.outer(national_subsector_demand, state_weights), + index=national_subsector_demand.index, + columns=state_weights.index + ) + state_subsector_demand = ( + state_subsector_demand.reset_index() + .assign( + sector='commercial', + subsector=subsector, + dispatch_feeder='Commercial' + ) + .rename_axis(columns='') + ) + + return state_subsector_demand + +def calculate_state_subsector_data_center_demand_hourly( + df_load: pd.DataFrame, + model_year: int, + scenario: str, + national_demand_source_path: str, + cooling_proportions_source_path: str, + propagation_source_path: str, + state_proportions_source_path: str +) -> pd.DataFrame: + # Calculate hourly national data center demand + national_data_center_demand_hourly = ( + calculate_national_data_center_demand_hourly( + df_load, + model_year, + scenario, + national_demand_source_path, + propagation_source_path + ) + ) + # Calculate proportion of national demand attributable to each state + state_weights = get_data_center_state_weights( + state_proportions_source_path, + model_year, + scenario + ) + state_weights = state_weights.loc[state_weights.index.isin(df_load.columns)] + # Get proportion of hourly demand attributable to cooling + data_center_cooling_weights = get_data_center_cooling_weights( + cooling_proportions_source_path + ) + # Calculate state-by-state hourly demand for data center cooling subsector + state_data_center_cooling_demand_hourly = apply_state_and_subsector_weights( + national_demand=national_data_center_demand_hourly, + state_weights=state_weights, + subsector_weights=data_center_cooling_weights, + subsector='data center cooling', + ) + # Calculate state-by-state hourly demand for data center IT subsector + data_center_it_weights = 1 - data_center_cooling_weights + state_data_center_it_demand_hourly = apply_state_and_subsector_weights( + national_demand=national_data_center_demand_hourly, + state_weights=state_weights, + subsector_weights=data_center_it_weights, + subsector='data center it', + ) + # Concatenate all state subsector-level demand + state_subsector_data_center_demand_hourly = ( + pd.concat( + [ + state_data_center_cooling_demand_hourly, + state_data_center_it_demand_hourly + ], + ignore_index=True + ) + .fillna(0) + ) + return state_subsector_data_center_demand_hourly + +def apply_custom_data_center_demand_projections( + df_load: pd.DataFrame, + model_year: int, + cf: dict +): + state_subsector_data_center_demand_hourly = ( + calculate_state_subsector_data_center_demand_hourly( + df_load, + model_year, + cf.scenario, + cf.national_demand_source, + cf.cooling_proportions_source, + cf.propagation_source, + cf.state_proportions_source + ) + ) + + if cf.replace_existing_data_center_demand: + data_center_subsectors = ['data center cooling', 'data center it'] + df_load = pd.concat( + [ + df_load.loc[~df_load.subsector.isin(data_center_subsectors)], + state_subsector_data_center_demand_hourly + ], + ignore_index=True + ) + else: + df_load = ( + pd.concat( + [df_load, state_subsector_data_center_demand_hourly], + ignore_index=True + ) + .groupby( + [ + 'weather_datetime', + 'sector', + 'subsector', + 'dispatch_feeder' + ], + as_index=False + ) + .sum(numeric_only=True) + ) + + return df_load \ No newline at end of file From 695470929f70ed6a6f5638160fee03b223fd022c Mon Sep 17 00:00:00 2001 From: kodiobika Date: Thu, 11 Jun 2026 10:41:19 -0400 Subject: [PATCH 2/3] Use existing sector_config.json instead of new reveal2reeds config --- hourlize/inputs/load/sector_config.json | 12 ++++---- hourlize/load.py | 39 ++++++++++++++++++++----- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/hourlize/inputs/load/sector_config.json b/hourlize/inputs/load/sector_config.json index 35997a25..5d012d28 100644 --- a/hourlize/inputs/load/sector_config.json +++ b/hourlize/inputs/load/sector_config.json @@ -45,10 +45,12 @@ "subsectors": { "commercial": ["data center cooling", "data center it"] }, - "model_years": [2021, 2025, 2030, 2035, 2040, 2045, 2050], - "filepaths": ["/kfs2/projects/eerload/challoran/eer_splice/dummy_agg_op_datacenters_by_state.csv"], - "unit_conversion_factor": 1, - "timezone": "Etc/GMT+6", - "regional_scope": "state" + "model_years": [2025, 2030, 2035, 2040, 2045, 2050], + "scenario": "central", + "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv", + "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv", + "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv", + "replace_existing_data_center_demand": true, + "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx" } } \ No newline at end of file diff --git a/hourlize/load.py b/hourlize/load.py index 97cbb4ed..0349b81c 100644 --- a/hourlize/load.py +++ b/hourlize/load.py @@ -283,15 +283,38 @@ def create_hourly_state_load_for_model_year( parse_dates=['weather_datetime'] ) - # If applicable, replace data center cooling and IT projections with - # custom projections specified in reveal2reeds/config.json - if model_year in cf.custom_data_center_projection_years: - reveal2reeds_config = get_reveal2reeds_config() - df_load = reveal2reeds.apply_custom_data_center_demand_projections( - df_load, - model_year, - reveal2reeds_config + # # If applicable, replace data center cooling and IT projections with + # # custom projections specified in reveal2reeds/config.json + # if model_year in cf.custom_data_center_projection_years: + # reveal2reeds_config = get_reveal2reeds_config() + # df_load = reveal2reeds.apply_custom_data_center_demand_projections( + # df_load, + # model_year, + # reveal2reeds_config + # ) + + # If applicable, replace or add to data center cooling and IT projections, + # as specified in inputs/load/sector_config.json + # Note this is handled differently and separately from the other + # 'replace_sectors'. The logic for handling those is below. + if 'Data Centers' in replace_sectors: + data_center_config = sector_config['Data Centers'] + data_center_config.cooling_proportions_source = ( + data_center_config.cooling_proportions_source + .format(scenario=data_center_config.scenario) ) + if model_year in data_center_config['model_years']: + df_load = reveal2reeds.apply_custom_data_center_demand_projections( + df_load, + model_year, + data_center_config + ) + else: + pass + + # Remove 'data centers' from 'replace_sectors' because we traverse + # through the list later to handle the other sectors. + replace_sectors.remove('Data Centers') # Downselect to specified weather years df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)] From 6ca8f679a2cabbef3996efa66faa2e1df82b7b91 Mon Sep 17 00:00:00 2001 From: kodiobika Date: Thu, 11 Jun 2026 12:03:01 -0400 Subject: [PATCH 3/3] Move reveal2reeds config to sector_config.json --- hourlize/inputs/configs/config_base.json | 1 - hourlize/load.py | 45 ++++++------------------ hourlize/reveal2reeds/config.json | 8 ----- hourlize/reveal2reeds/reveal2reeds.py | 14 ++++---- 4 files changed, 18 insertions(+), 50 deletions(-) delete mode 100644 hourlize/reveal2reeds/config.json diff --git a/hourlize/inputs/configs/config_base.json b/hourlize/inputs/configs/config_base.json index 3f840d3f..1409d39a 100644 --- a/hourlize/inputs/configs/config_base.json +++ b/hourlize/inputs/configs/config_base.json @@ -51,7 +51,6 @@ "2045": 1, "2050": 1 }, - "custom_data_center_projection_years": [2025, 2030, 2035, 2040, 2045, 2050], "scenarios": ["IRA cons", "central", "baseline"], "sector_config_file": "{hourlize_path}/inputs/load/sector_config.json", "weather_years": [2007,2008,2009,2010,2011,2012,2013,2016,2017,2018,2019,2020,2021,2022,2023] diff --git a/hourlize/load.py b/hourlize/load.py index 0349b81c..96280c6c 100644 --- a/hourlize/load.py +++ b/hourlize/load.py @@ -11,18 +11,6 @@ from types import SimpleNamespace from reveal2reeds import reveal2reeds -def get_reveal2reeds_config() -> dict: - configpath = "reveal2reeds/config.json" - with open(configpath, "r") as f: - config = json.load(f, object_pairs_hook=OrderedDict) - reveal2reeds_config = SimpleNamespace(**config) - reveal2reeds_config.cooling_proportions_source = ( - reveal2reeds_config.cooling_proportions_source - .format(scenario=reveal2reeds_config.scenario) - ) - - return reveal2reeds_config - def get_state_name_code_map(reeds_path: str) -> dict: """ Read from the ReEDS directory a file containing the mapping from state @@ -283,25 +271,13 @@ def create_hourly_state_load_for_model_year( parse_dates=['weather_datetime'] ) - # # If applicable, replace data center cooling and IT projections with - # # custom projections specified in reveal2reeds/config.json - # if model_year in cf.custom_data_center_projection_years: - # reveal2reeds_config = get_reveal2reeds_config() - # df_load = reveal2reeds.apply_custom_data_center_demand_projections( - # df_load, - # model_year, - # reveal2reeds_config - # ) - # If applicable, replace or add to data center cooling and IT projections, # as specified in inputs/load/sector_config.json - # Note this is handled differently and separately from the other - # 'replace_sectors'. The logic for handling those is below. if 'Data Centers' in replace_sectors: data_center_config = sector_config['Data Centers'] - data_center_config.cooling_proportions_source = ( - data_center_config.cooling_proportions_source - .format(scenario=data_center_config.scenario) + data_center_config['cooling_proportions_source'] = ( + data_center_config['cooling_proportions_source'] + .format(scenario=data_center_config['scenario']) ) if model_year in data_center_config['model_years']: df_load = reveal2reeds.apply_custom_data_center_demand_projections( @@ -312,10 +288,6 @@ def create_hourly_state_load_for_model_year( else: pass - # Remove 'data centers' from 'replace_sectors' because we traverse - # through the list later to handle the other sectors. - replace_sectors.remove('Data Centers') - # Downselect to specified weather years df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)] @@ -330,6 +302,10 @@ def create_hourly_state_load_for_model_year( # sectoral load from the raw load profiles replacement_load_list = [] for sector in replace_sectors: + # Skip 'data centers' sector, as it was already processed above + if sector == 'Data Centers': + continue + print(f"Removing endogenous load for '{sector}' sector...") if sector not in sector_config: raise NotImplementedError( @@ -395,6 +371,8 @@ def create_hourly_state_load_for_model_year( model_year ) for sector in replace_sectors + # Skip 'data centers' sector, as it was already processed above + if sector != 'Data Centers' ] # Aggregate the exogenous sectoral load to the state level and @@ -490,9 +468,8 @@ def main( ) output_fpath = os.path.join( - reeds_path, - "inputs", - "load", + cf.outpath, + 'results', f"demand_{scenario_outfile_prefix_map[scenario]}.h5" ) for model_year in model_years: diff --git a/hourlize/reveal2reeds/config.json b/hourlize/reveal2reeds/config.json deleted file mode 100644 index 6eedc7c0..00000000 --- a/hourlize/reveal2reeds/config.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv", - "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv", - "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv", - "replace_existing_data_center_demand": true, - "scenario": "central", - "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx" -} \ No newline at end of file diff --git a/hourlize/reveal2reeds/reveal2reeds.py b/hourlize/reveal2reeds/reveal2reeds.py index 03331fd6..651e0273 100644 --- a/hourlize/reveal2reeds/reveal2reeds.py +++ b/hourlize/reveal2reeds/reveal2reeds.py @@ -201,16 +201,16 @@ def apply_custom_data_center_demand_projections( calculate_state_subsector_data_center_demand_hourly( df_load, model_year, - cf.scenario, - cf.national_demand_source, - cf.cooling_proportions_source, - cf.propagation_source, - cf.state_proportions_source + cf['scenario'], + cf['national_demand_source'], + cf['cooling_proportions_source'], + cf['propagation_source'], + cf['state_proportions_source'] ) ) - if cf.replace_existing_data_center_demand: - data_center_subsectors = ['data center cooling', 'data center it'] + if cf['replace_existing_data_center_demand']: + data_center_subsectors = cf['subsectors']['commercial'] df_load = pd.concat( [ df_load.loc[~df_load.subsector.isin(data_center_subsectors)],