-
Notifications
You must be signed in to change notification settings - Fork 10
Migrate reVeal2ReEDS pipeline to Hourlize #80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
24bc66c
07cc55e
ab6f31e
4a0ddde
8bf8b37
70bd97e
6954709
6ca8f67
9c13715
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,10 +45,12 @@ | |
| "subsectors": { | ||
| "commercial": ["data center cooling", "data center it"] | ||
| }, | ||
| "model_years": [2021, 2025, 2030, 2035, 2040, 2045, 2050], | ||
| "filepaths": ["/kfs2/projects/eerload/challoran/eer_splice/dummy_agg_op_datacenters_by_state.csv"], | ||
| "unit_conversion_factor": 1, | ||
| "timezone": "Etc/GMT+6", | ||
| "regional_scope": "state" | ||
| "model_years": [2025, 2030, 2035, 2040, 2045, 2050], | ||
| "scenario": "central", | ||
| "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv", | ||
| "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv", | ||
| "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @SoLaraS2 do the original |
||
| "replace_existing_data_center_demand": true, | ||
| "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx" | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
| import pandas as pd | ||
| import site | ||
| from types import SimpleNamespace | ||
| from reveal2reeds import reveal2reeds | ||
|
|
||
| def get_state_name_code_map(reeds_path: str) -> dict: | ||
| """ | ||
|
|
@@ -269,6 +270,24 @@ def create_hourly_state_load_for_model_year( | |
| compression='gzip', | ||
| parse_dates=['weather_datetime'] | ||
| ) | ||
|
|
||
| # If applicable, replace or add to data center cooling and IT projections, | ||
| # as specified in inputs/load/sector_config.json | ||
| if 'Data Centers' in replace_sectors: | ||
| data_center_config = sector_config['Data Centers'] | ||
| data_center_config['cooling_proportions_source'] = ( | ||
| data_center_config['cooling_proportions_source'] | ||
| .format(scenario=data_center_config['scenario']) | ||
| ) | ||
| if model_year in data_center_config['model_years']: | ||
| df_load = reveal2reeds.apply_custom_data_center_demand_projections( | ||
| df_load, | ||
| model_year, | ||
| data_center_config | ||
| ) | ||
| else: | ||
| pass | ||
|
|
||
| # Downselect to specified weather years | ||
| df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)] | ||
|
|
||
|
|
@@ -283,6 +302,10 @@ def create_hourly_state_load_for_model_year( | |
| # sectoral load from the raw load profiles | ||
| replacement_load_list = [] | ||
| for sector in replace_sectors: | ||
| # Skip 'data centers' sector, as it was already processed above | ||
| if sector == 'Data Centers': | ||
| continue | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why the DC replacement can't happen inside this preexisting loop, is this a coding convention? I just don't see a reason on the processing side, it's at the same level as the loop already existing so we aren't skipping or double calculating anything. Ignore if this is just how it has to be :) |
||
|
|
||
| print(f"Removing endogenous load for '{sector}' sector...") | ||
| if sector not in sector_config: | ||
| raise NotImplementedError( | ||
|
|
@@ -348,6 +371,8 @@ def create_hourly_state_load_for_model_year( | |
| model_year | ||
| ) | ||
| for sector in replace_sectors | ||
| # Skip 'data centers' sector, as it was already processed above | ||
| if sector != 'Data Centers' | ||
| ] | ||
|
|
||
| # Aggregate the exogenous sectoral load to the state level and | ||
|
|
@@ -443,9 +468,8 @@ def main( | |
| ) | ||
|
|
||
| output_fpath = os.path.join( | ||
| reeds_path, | ||
| "inputs", | ||
| "load", | ||
| cf.outpath, | ||
| 'results', | ||
| f"demand_{scenario_outfile_prefix_map[scenario]}.h5" | ||
| ) | ||
| for model_year in model_years: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,239 @@ | ||
| import numpy as np | ||
| import pandas as pd | ||
|
|
||
| def get_national_model_year_data_center_demand( | ||
| national_demand_source_path: str, | ||
| model_year: int | ||
| ) -> int: | ||
| data_center_demand = pd.read_csv(national_demand_source_path) | ||
| model_year_data_center_demand = ( | ||
| data_center_demand.loc[( | ||
| data_center_demand.year == model_year | ||
| )] | ||
| .copy() | ||
| ) | ||
| national_model_year_data_center_demand = ( | ||
| model_year_data_center_demand['total_data_center_mw'].sum() | ||
| ) | ||
|
|
||
| return national_model_year_data_center_demand | ||
|
|
||
| def get_propagation_by_weather_year( | ||
| propagation_source_path: str, | ||
| scenario: str | ||
| ) -> pd.Series: | ||
| propagation_by_weather_year = pd.read_csv(propagation_source_path) | ||
| propagation_by_weather_year = ( | ||
| propagation_by_weather_year.loc[( | ||
| propagation_by_weather_year.scenario == scenario | ||
| )] | ||
| .set_index('year') | ||
| ['avg_prop'] | ||
| ) | ||
|
|
||
| return propagation_by_weather_year | ||
|
|
||
|
|
||
| def calculate_national_data_center_demand_hourly( | ||
| df_load: pd.DataFrame, | ||
| model_year: int, | ||
| scenario: str, | ||
| national_demand_source_path: str, | ||
| propagation_source_path: str | ||
| ): | ||
| # Calculate national projected data center demand for the model year | ||
| national_data_center_demand = get_national_model_year_data_center_demand( | ||
| national_demand_source_path, | ||
| model_year | ||
| ) | ||
|
|
||
| # Get propagation factors by weather year for the given scenario. | ||
| # Propagation factors represent the percentage of projected national | ||
| # data center demand for the model year that is expected to be | ||
| # realized during each hour of each weather year. | ||
| propagation_by_weather_year = get_propagation_by_weather_year( | ||
| propagation_source_path, | ||
| scenario | ||
| ) | ||
|
|
||
| # Estimate national hourly load values for each weather year | ||
| # by multiplying the propagation factors by national data | ||
| # center demand for the model year. | ||
| national_data_center_demand_hourly = pd.DataFrame( | ||
| index=df_load['weather_datetime'].drop_duplicates() | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to trace where df_load is coming from at this point, but if it has repeated timestamps it's likely because of dispatch_feeder, and it makes a bigger difference when we are considering the rest-of-economy subsectors, so I generally try to sum them to make unique timestamps rather than dropping. But if by this point you are absolutely sure there are no duplicates of this nature, what are the other options that would cause a need for this process? |
||
| ) | ||
| national_data_center_demand_hourly['propagation_factor'] = ( | ||
| national_data_center_demand_hourly.index.year | ||
| .map(propagation_by_weather_year) | ||
| ) | ||
| national_data_center_demand_hourly['demand_MW'] = ( | ||
| national_data_center_demand_hourly['propagation_factor'] | ||
| * national_data_center_demand | ||
| ) | ||
| national_data_center_demand_hourly = ( | ||
| national_data_center_demand_hourly['demand_MW'] | ||
| ) | ||
|
|
||
| return national_data_center_demand_hourly | ||
|
|
||
| def get_data_center_cooling_weights( | ||
| cooling_proportions_source_path: str | ||
| ) -> pd.DataFrame: | ||
| state_cooling_weights = pd.read_csv(cooling_proportions_source_path) | ||
| state_cooling_weights["weather_datetime"] = ( | ||
| pd.to_datetime(state_cooling_weights["weather_datetime"]) | ||
| ) | ||
| national_cooling_weights = ( | ||
| state_cooling_weights.groupby("weather_datetime") | ||
| ["cooling_prop"] | ||
| .mean() | ||
| ) | ||
|
|
||
| return national_cooling_weights | ||
|
|
||
| def get_data_center_state_weights( | ||
| state_proportions_source_path: str, | ||
| model_year: int, | ||
| scenario: str | ||
| ) -> pd.DataFrame: | ||
| data_center_year = 2024 if model_year == 2025 else model_year | ||
| state_weights = pd.read_excel(state_proportions_source_path) | ||
| state_weights = ( | ||
| state_weights.loc[ | ||
| (state_weights['Run Name'] == scenario) | ||
| & (state_weights['Year'] == data_center_year) | ||
| ] | ||
| .set_index('State') | ||
| ["% of Total Data Center Load"] | ||
| ) | ||
|
|
||
| return state_weights | ||
|
|
||
|
|
||
| def apply_state_and_subsector_weights( | ||
| national_demand: pd.DataFrame, | ||
| state_weights: pd.Series, | ||
| subsector_weights: pd.Series, | ||
| subsector: str, | ||
| ): | ||
| national_subsector_demand = national_demand * subsector_weights | ||
| state_subsector_demand = pd.DataFrame( | ||
| np.outer(national_subsector_demand, state_weights), | ||
| index=national_subsector_demand.index, | ||
| columns=state_weights.index | ||
| ) | ||
| state_subsector_demand = ( | ||
| state_subsector_demand.reset_index() | ||
| .assign( | ||
| sector='commercial', | ||
| subsector=subsector, | ||
| dispatch_feeder='Commercial' | ||
| ) | ||
| .rename_axis(columns='') | ||
| ) | ||
|
|
||
| return state_subsector_demand | ||
|
|
||
| def calculate_state_subsector_data_center_demand_hourly( | ||
| df_load: pd.DataFrame, | ||
| model_year: int, | ||
| scenario: str, | ||
| national_demand_source_path: str, | ||
| cooling_proportions_source_path: str, | ||
| propagation_source_path: str, | ||
| state_proportions_source_path: str | ||
| ) -> pd.DataFrame: | ||
| # Calculate hourly national data center demand | ||
| national_data_center_demand_hourly = ( | ||
| calculate_national_data_center_demand_hourly( | ||
| df_load, | ||
| model_year, | ||
| scenario, | ||
| national_demand_source_path, | ||
| propagation_source_path | ||
| ) | ||
| ) | ||
| # Calculate proportion of national demand attributable to each state | ||
| state_weights = get_data_center_state_weights( | ||
| state_proportions_source_path, | ||
| model_year, | ||
| scenario | ||
| ) | ||
| state_weights = state_weights.loc[state_weights.index.isin(df_load.columns)] | ||
| # Get proportion of hourly demand attributable to cooling | ||
| data_center_cooling_weights = get_data_center_cooling_weights( | ||
| cooling_proportions_source_path | ||
| ) | ||
| # Calculate state-by-state hourly demand for data center cooling subsector | ||
| state_data_center_cooling_demand_hourly = apply_state_and_subsector_weights( | ||
| national_demand=national_data_center_demand_hourly, | ||
| state_weights=state_weights, | ||
| subsector_weights=data_center_cooling_weights, | ||
| subsector='data center cooling', | ||
| ) | ||
| # Calculate state-by-state hourly demand for data center IT subsector | ||
| data_center_it_weights = 1 - data_center_cooling_weights | ||
| state_data_center_it_demand_hourly = apply_state_and_subsector_weights( | ||
| national_demand=national_data_center_demand_hourly, | ||
| state_weights=state_weights, | ||
| subsector_weights=data_center_it_weights, | ||
| subsector='data center it', | ||
| ) | ||
| # Concatenate all state subsector-level demand | ||
| state_subsector_data_center_demand_hourly = ( | ||
| pd.concat( | ||
| [ | ||
| state_data_center_cooling_demand_hourly, | ||
| state_data_center_it_demand_hourly | ||
| ], | ||
| ignore_index=True | ||
| ) | ||
| .fillna(0) | ||
| ) | ||
| return state_subsector_data_center_demand_hourly | ||
|
|
||
| def apply_custom_data_center_demand_projections( | ||
| df_load: pd.DataFrame, | ||
| model_year: int, | ||
| cf: dict | ||
| ): | ||
| state_subsector_data_center_demand_hourly = ( | ||
| calculate_state_subsector_data_center_demand_hourly( | ||
| df_load, | ||
| model_year, | ||
| cf['scenario'], | ||
| cf['national_demand_source'], | ||
| cf['cooling_proportions_source'], | ||
| cf['propagation_source'], | ||
| cf['state_proportions_source'] | ||
| ) | ||
| ) | ||
|
|
||
| if cf['replace_existing_data_center_demand']: | ||
| data_center_subsectors = cf['subsectors']['commercial'] | ||
| df_load = pd.concat( | ||
| [ | ||
| df_load.loc[~df_load.subsector.isin(data_center_subsectors)], | ||
| state_subsector_data_center_demand_hourly | ||
| ], | ||
| ignore_index=True | ||
| ) | ||
| else: | ||
| df_load = ( | ||
| pd.concat( | ||
| [df_load, state_subsector_data_center_demand_hourly], | ||
| ignore_index=True | ||
| ) | ||
| .groupby( | ||
| [ | ||
| 'weather_datetime', | ||
| 'sector', | ||
| 'subsector', | ||
| 'dispatch_feeder' | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually the "dispatch_feeder" level is generally summed over. I spoke about this with Anne Hamilton when I first started working with the EER files, so you should be fine to drop it as a level distinction in general. I thought I'd share this bit of info! (It doesn't always make a huge difference, but to get the exact same results as current scripts it'd probably be best to sum over it) |
||
| ], | ||
| as_index=False | ||
| ) | ||
| .sum(numeric_only=True) | ||
| ) | ||
|
|
||
| return df_load | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be good to add a README in hourlize/inputs/load that explains these config options. In particular, does scenario here refer to EER baseline vs. IRA low vs. 100by2050 (which EER calls "central") or central vs. high data center demand?