Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions hourlize/inputs/load/sector_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@
"subsectors": {
"commercial": ["data center cooling", "data center it"]
},
"model_years": [2021, 2025, 2030, 2035, 2040, 2045, 2050],
"filepaths": ["/kfs2/projects/eerload/challoran/eer_splice/dummy_agg_op_datacenters_by_state.csv"],
"unit_conversion_factor": 1,
"timezone": "Etc/GMT+6",
"regional_scope": "state"
"model_years": [2025, 2030, 2035, 2040, 2045, 2050],
"scenario": "central",

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to add a README in hourlize/inputs/load that explains these config options. In particular, does scenario here refer to EER baseline vs. IRA low vs. 100by2050 (which EER calls "central") or central vs. high data center demand?

"national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv",
"cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv",
"propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv",

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SoLaraS2 do the original cooling_proportions_source and propagation_source files exist on the HPC in the /projects/eerload directory? It would be better to point to those locations than a location in the largeloads project.

"replace_existing_data_center_demand": true,
"state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx"
}
}
30 changes: 27 additions & 3 deletions hourlize/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pandas as pd
import site
from types import SimpleNamespace
from reveal2reeds import reveal2reeds

def get_state_name_code_map(reeds_path: str) -> dict:
"""
Expand Down Expand Up @@ -269,6 +270,24 @@ def create_hourly_state_load_for_model_year(
compression='gzip',
parse_dates=['weather_datetime']
)

# If applicable, replace or add to data center cooling and IT projections,
# as specified in inputs/load/sector_config.json
if 'Data Centers' in replace_sectors:
data_center_config = sector_config['Data Centers']
data_center_config['cooling_proportions_source'] = (
data_center_config['cooling_proportions_source']
.format(scenario=data_center_config['scenario'])
)
if model_year in data_center_config['model_years']:
df_load = reveal2reeds.apply_custom_data_center_demand_projections(
df_load,
model_year,
data_center_config
)
else:
pass

# Downselect to specified weather years
df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)]

Expand All @@ -283,6 +302,10 @@ def create_hourly_state_load_for_model_year(
# sectoral load from the raw load profiles
replacement_load_list = []
for sector in replace_sectors:
# Skip 'data centers' sector, as it was already processed above
if sector == 'Data Centers':
continue

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why the DC replacement can't happen inside this preexisting loop, is this a coding convention? I just don't see a reason on the processing side, it's at the same level as the loop already existing so we aren't skipping or double calculating anything. Ignore if this is just how it has to be :)


print(f"Removing endogenous load for '{sector}' sector...")
if sector not in sector_config:
raise NotImplementedError(
Expand Down Expand Up @@ -348,6 +371,8 @@ def create_hourly_state_load_for_model_year(
model_year
)
for sector in replace_sectors
# Skip 'data centers' sector, as it was already processed above
if sector != 'Data Centers'
]

# Aggregate the exogenous sectoral load to the state level and
Expand Down Expand Up @@ -443,9 +468,8 @@ def main(
)

output_fpath = os.path.join(
reeds_path,
"inputs",
"load",
cf.outpath,
'results',
f"demand_{scenario_outfile_prefix_map[scenario]}.h5"
)
for model_year in model_years:
Expand Down
239 changes: 239 additions & 0 deletions hourlize/reveal2reeds/reveal2reeds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
import numpy as np
import pandas as pd

def get_national_model_year_data_center_demand(
national_demand_source_path: str,
model_year: int
) -> int:
data_center_demand = pd.read_csv(national_demand_source_path)
model_year_data_center_demand = (
data_center_demand.loc[(
data_center_demand.year == model_year
)]
.copy()
)
national_model_year_data_center_demand = (
model_year_data_center_demand['total_data_center_mw'].sum()
)

return national_model_year_data_center_demand

def get_propagation_by_weather_year(
propagation_source_path: str,
scenario: str
) -> pd.Series:
propagation_by_weather_year = pd.read_csv(propagation_source_path)
propagation_by_weather_year = (
propagation_by_weather_year.loc[(
propagation_by_weather_year.scenario == scenario
)]
.set_index('year')
['avg_prop']
)

return propagation_by_weather_year


def calculate_national_data_center_demand_hourly(
df_load: pd.DataFrame,
model_year: int,
scenario: str,
national_demand_source_path: str,
propagation_source_path: str
):
# Calculate national projected data center demand for the model year
national_data_center_demand = get_national_model_year_data_center_demand(
national_demand_source_path,
model_year
)

# Get propagation factors by weather year for the given scenario.
# Propagation factors represent the percentage of projected national
# data center demand for the model year that is expected to be
# realized during each hour of each weather year.
propagation_by_weather_year = get_propagation_by_weather_year(
propagation_source_path,
scenario
)

# Estimate national hourly load values for each weather year
# by multiplying the propagation factors by national data
# center demand for the model year.
national_data_center_demand_hourly = pd.DataFrame(
index=df_load['weather_datetime'].drop_duplicates()

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to trace where df_load is coming from at this point, but if it has repeated timestamps it's likely because of dispatch_feeder, and it makes a bigger difference when we are considering the rest-of-economy subsectors, so I generally try to sum them to make unique timestamps rather than dropping. But if by this point you are absolutely sure there are no duplicates of this nature, what are the other options that would cause a need for this process?

)
national_data_center_demand_hourly['propagation_factor'] = (
national_data_center_demand_hourly.index.year
.map(propagation_by_weather_year)
)
national_data_center_demand_hourly['demand_MW'] = (
national_data_center_demand_hourly['propagation_factor']
* national_data_center_demand
)
national_data_center_demand_hourly = (
national_data_center_demand_hourly['demand_MW']
)

return national_data_center_demand_hourly

def get_data_center_cooling_weights(
cooling_proportions_source_path: str
) -> pd.DataFrame:
state_cooling_weights = pd.read_csv(cooling_proportions_source_path)
state_cooling_weights["weather_datetime"] = (
pd.to_datetime(state_cooling_weights["weather_datetime"])
)
national_cooling_weights = (
state_cooling_weights.groupby("weather_datetime")
["cooling_prop"]
.mean()
)

return national_cooling_weights

def get_data_center_state_weights(
state_proportions_source_path: str,
model_year: int,
scenario: str
) -> pd.DataFrame:
data_center_year = 2024 if model_year == 2025 else model_year
state_weights = pd.read_excel(state_proportions_source_path)
state_weights = (
state_weights.loc[
(state_weights['Run Name'] == scenario)
& (state_weights['Year'] == data_center_year)
]
.set_index('State')
["% of Total Data Center Load"]
)

return state_weights


def apply_state_and_subsector_weights(
national_demand: pd.DataFrame,
state_weights: pd.Series,
subsector_weights: pd.Series,
subsector: str,
):
national_subsector_demand = national_demand * subsector_weights
state_subsector_demand = pd.DataFrame(
np.outer(national_subsector_demand, state_weights),
index=national_subsector_demand.index,
columns=state_weights.index
)
state_subsector_demand = (
state_subsector_demand.reset_index()
.assign(
sector='commercial',
subsector=subsector,
dispatch_feeder='Commercial'
)
.rename_axis(columns='')
)

return state_subsector_demand

def calculate_state_subsector_data_center_demand_hourly(
df_load: pd.DataFrame,
model_year: int,
scenario: str,
national_demand_source_path: str,
cooling_proportions_source_path: str,
propagation_source_path: str,
state_proportions_source_path: str
) -> pd.DataFrame:
# Calculate hourly national data center demand
national_data_center_demand_hourly = (
calculate_national_data_center_demand_hourly(
df_load,
model_year,
scenario,
national_demand_source_path,
propagation_source_path
)
)
# Calculate proportion of national demand attributable to each state
state_weights = get_data_center_state_weights(
state_proportions_source_path,
model_year,
scenario
)
state_weights = state_weights.loc[state_weights.index.isin(df_load.columns)]
# Get proportion of hourly demand attributable to cooling
data_center_cooling_weights = get_data_center_cooling_weights(
cooling_proportions_source_path
)
# Calculate state-by-state hourly demand for data center cooling subsector
state_data_center_cooling_demand_hourly = apply_state_and_subsector_weights(
national_demand=national_data_center_demand_hourly,
state_weights=state_weights,
subsector_weights=data_center_cooling_weights,
subsector='data center cooling',
)
# Calculate state-by-state hourly demand for data center IT subsector
data_center_it_weights = 1 - data_center_cooling_weights
state_data_center_it_demand_hourly = apply_state_and_subsector_weights(
national_demand=national_data_center_demand_hourly,
state_weights=state_weights,
subsector_weights=data_center_it_weights,
subsector='data center it',
)
# Concatenate all state subsector-level demand
state_subsector_data_center_demand_hourly = (
pd.concat(
[
state_data_center_cooling_demand_hourly,
state_data_center_it_demand_hourly
],
ignore_index=True
)
.fillna(0)
)
return state_subsector_data_center_demand_hourly

def apply_custom_data_center_demand_projections(
df_load: pd.DataFrame,
model_year: int,
cf: dict
):
state_subsector_data_center_demand_hourly = (
calculate_state_subsector_data_center_demand_hourly(
df_load,
model_year,
cf['scenario'],
cf['national_demand_source'],
cf['cooling_proportions_source'],
cf['propagation_source'],
cf['state_proportions_source']
)
)

if cf['replace_existing_data_center_demand']:
data_center_subsectors = cf['subsectors']['commercial']
df_load = pd.concat(
[
df_load.loc[~df_load.subsector.isin(data_center_subsectors)],
state_subsector_data_center_demand_hourly
],
ignore_index=True
)
else:
df_load = (
pd.concat(
[df_load, state_subsector_data_center_demand_hourly],
ignore_index=True
)
.groupby(
[
'weather_datetime',
'sector',
'subsector',
'dispatch_feeder'

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually the "dispatch_feeder" level is generally summed over. I spoke about this with Anne Hamilton when I first started working with the EER files, so you should be fine to drop it as a level distinction in general. I thought I'd share this bit of info! (It doesn't always make a huge difference, but to get the exact same results as current scripts it'd probably be best to sum over it)

],
as_index=False
)
.sum(numeric_only=True)
)

return df_load
Loading