Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
18bf25a
Add superfacility staging fallbacks
jmsexton03 Feb 13, 2026
e45bf8d
Add superfacility staging tests
jmsexton03 Feb 13, 2026
1e86156
Clarify SFAPI auth errors for remote executable lookup
jmsexton03 Feb 13, 2026
c8d1008
Use sfapi_client key paths for auth
jmsexton03 Feb 13, 2026
581b272
Fix sfapi_client file listing for executables
jmsexton03 Feb 13, 2026
5f33c68
Prefer remote_output_dir on Perlmutter
jmsexton03 Feb 13, 2026
91e6490
Drop local output_dir from remote config
jmsexton03 Feb 13, 2026
88239bd
Add benchmark case grid and runner scaffold
jmsexton03 Feb 16, 2026
8318355
Add metrics collection and JSONL summaries
jmsexton03 Feb 16, 2026
9aceb6e
Add benchmark runner outputs and compare_models test
jmsexton03 Feb 16, 2026
3dd3d06
Add metrics aggregation adapter for raw benchmark records
jmsexton03 Feb 16, 2026
9be57d1
Refactor benchmark runner into shared case and model modules
jmsexton03 Feb 16, 2026
429ccde
Add difficulty and novelty metadata to benchmark cases
jmsexton03 Feb 16, 2026
6b797bf
Add benchmark metrics context and CSV aggregation
jmsexton03 Feb 16, 2026
417e053
Add difficulty and novelty aggregates to metrics output
jmsexton03 Feb 16, 2026
2374c39
Remove env-var metrics context and refine concept density
jmsexton03 Feb 16, 2026
142e241
Use benchmark context sidecar for metrics
jmsexton03 Feb 16, 2026
28cd4fe
Skip REST mkdir when SFAPI creds are available
jmsexton03 Feb 16, 2026
188500c
Fix LLM client unwrapping and SFAPI test
jmsexton03 Feb 16, 2026
2d9fe1c
Document instructor usage map
jmsexton03 Feb 16, 2026
8fcde6d
Add shared LLM call helper skeleton
jmsexton03 Feb 16, 2026
cf0b780
Refactor LLM call sites to use helper
jmsexton03 Feb 16, 2026
4854ad0
Add filesystem write policy guard
jmsexton03 Feb 17, 2026
0cfaca2
Add write policy defaults to config
jmsexton03 Feb 17, 2026
226bb36
Add test enforcing LLM helper usage
jmsexton03 Feb 17, 2026
25f8e65
Add privacy scrubbing modes and hooks
jmsexton03 Feb 17, 2026
4ac9eb8
Add privacy persistence tests
jmsexton03 Feb 17, 2026
d8835e7
Make privacy scrubber pluggable
jmsexton03 Feb 17, 2026
b7e99b8
Add scrubber selection integration test
jmsexton03 Feb 17, 2026
57272c9
Add optional scrubadub dependency
jmsexton03 Feb 17, 2026
6165f08
Scrub sensitive content from logs
jmsexton03 Feb 17, 2026
4d4404a
Scrub benchmark artifacts for privacy
jmsexton03 Feb 17, 2026
15b5ede
Document benchmark privacy run_args
jmsexton03 Feb 17, 2026
bced396
Create remote run dirs via REST mkdir
jmsexton03 Feb 17, 2026
e85d598
Allow mkdir fallback when remote output missing
jmsexton03 Feb 17, 2026
0b7a580
Ensure run_benchmark.py sets PYTHONPATH
jmsexton03 Feb 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions benchmark/cases/erf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
schema_version: "1.0"
suite_id: "erf_scaling_v1"
solver: "ERF"
cases:
- id: "erf_density_current_strong"
solver: "ERF"
case_name: "DensityCurrent"
case_dir: "Exec/DryRegTests/DensityCurrent"
inputs: "inputs_amr"
description: "Classic density (gravity) current benchmark (Straka 1993)."
dimension: 3
physics: [density_current, stratification, atmosphere]
difficulty_tier: "medium"
prompt_length_band: "medium"
concept_density: "medium"
specialized_knowledge: false
novelty_tier: "parameter-only"
scaling:
type: "strong"
sizes:
- label: "S"
grid: "128x256x128"
amr_levels: 0
- label: "M"
grid: "256x384x256"
amr_levels: 0
- label: "L"
grid: "384x512x384"
amr_levels: 0
tags: [regtest, atmosphere]
status: "ready"

- id: "erf_abl_neutral_weak"
solver: "ERF"
case_name: "ABL-Neutral"
case_dir: "Exec/ABL"
inputs: "inputs_smagorinsky"
description: "Atmospheric boundary layer with turbulence scheme and MOST options."
dimension: 3
physics: [abl, turbulence, atmosphere]
difficulty_tier: "medium"
prompt_length_band: "medium"
concept_density: "medium"
specialized_knowledge: true
novelty_tier: "parameter-only"
scaling:
type: "weak"
sizes:
- label: "S"
grid: "128x128x64"
amr_levels: 0
- label: "M"
grid: "256x256x128"
amr_levels: 0
- label: "L"
grid: "512x512x256"
amr_levels: 0
tags: [abl, turbulence]
status: "ready"

- id: "erf_abl_stable_weak"
solver: "ERF"
case_name: "ABL-Stable"
case_dir: "Exec/ABL"
inputs: "mrf_stable_gabls"
description: "Stable ABL configuration with hydrostatic sounding inputs."
dimension: 3
physics: [abl, stable, atmosphere]
difficulty_tier: "medium"
prompt_length_band: "medium"
concept_density: "medium"
specialized_knowledge: true
novelty_tier: "config-extension"
scaling:
type: "weak"
sizes:
- label: "S"
grid: "128x128x64"
amr_levels: 0
- label: "M"
grid: "256x256x128"
amr_levels: 0
- label: "L"
grid: "512x512x256"
amr_levels: 0
tags: [abl, stable]
status: "draft"

- id: "erf_abl_convective_weak"
solver: "ERF"
case_name: "ABL-Convective"
case_dir: "Exec/ABL"
inputs: "mrf_unstable"
description: "Unstable/convective ABL configuration with perturbations."
dimension: 3
physics: [abl, convection, atmosphere]
difficulty_tier: "medium"
prompt_length_band: "medium"
concept_density: "medium"
specialized_knowledge: true
novelty_tier: "config-extension"
scaling:
type: "weak"
sizes:
- label: "S"
grid: "128x128x64"
amr_levels: 0
- label: "M"
grid: "256x256x128"
amr_levels: 0
- label: "L"
grid: "512x512x256"
amr_levels: 0
tags: [abl, convection]
status: "draft"

- id: "erf_taylor_green_strong"
solver: "ERF"
case_name: "TaylorGreenVortex"
case_dir: "Exec/DryRegTests/TaylorGreenVortex"
inputs: "inputs_advdiff"
description: "Taylor-Green vortex benchmark for advection/diffusion terms."
dimension: 3
physics: [turbulence, vortex, atmosphere]
difficulty_tier: "medium"
prompt_length_band: "medium"
concept_density: "medium"
specialized_knowledge: false
novelty_tier: "parameter-only"
scaling:
type: "strong"
sizes:
- label: "S"
grid: "128x128x128"
amr_levels: 0
- label: "M"
grid: "256x256x256"
amr_levels: 0
- label: "L"
grid: "384x384x384"
amr_levels: 0
tags: [regtest, turbulence]
status: "ready"
143 changes: 143 additions & 0 deletions benchmark/cases/pelec.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
schema_version: "1.0"
suite_id: "pelec_scaling_v1"
solver: "PeleC"
cases:
- id: "pelec_pmf_weak"
solver: "PeleC"
case_name: "PMF"
case_dir: "Exec/RegTests/PMF"
inputs: "pmf-lidryer-rk64.inp"
description: "Premixed methane flame; baseline reacting flow scaling case."
dimension: 3
physics: [premixed, combustion, compressible]
difficulty_tier: "medium"
prompt_length_band: "medium"
concept_density: "medium"
specialized_knowledge: true
novelty_tier: "parameter-only"
scaling:
type: "weak"
sizes:
- label: "S"
grid: "128x128x256"
amr_levels: 0
- label: "M"
grid: "256x256x512"
amr_levels: 0
- label: "L"
grid: "512x512x1024"
amr_levels: 0
tags: [regtest, combustion, baseline]
status: "ready"

- id: "pelec_sedov_strong"
solver: "PeleC"
case_name: "Sedov"
case_dir: "Exec/RegTests/Sedov"
inputs: "sedov-1.inp"
description: "Sedov blast wave for compressible hydro strong scaling."
dimension: 3
physics: [blast, hydro, compressible]
difficulty_tier: "easy"
prompt_length_band: "short"
concept_density: "low"
specialized_knowledge: false
novelty_tier: "parameter-only"
scaling:
type: "strong"
sizes:
- label: "S"
grid: "256x256x256"
amr_levels: 0
- label: "M"
grid: "384x384x384"
amr_levels: 0
- label: "L"
grid: "512x512x512"
amr_levels: 0
tags: [regtest, hydro, shock]
status: "ready"

- id: "pelec_tg_strong"
solver: "PeleC"
case_name: "TaylorGreen"
case_dir: "Exec/RegTests/TG"
inputs: "tg-1.inp"
description: "Taylor-Green vortex (High-Order CFD workshop benchmark)."
dimension: 3
physics: [turbulence, vortex, compressible]
difficulty_tier: "medium"
prompt_length_band: "medium"
concept_density: "medium"
specialized_knowledge: false
novelty_tier: "parameter-only"
scaling:
type: "strong"
sizes:
- label: "S"
grid: "256x256x256"
amr_levels: 0
- label: "M"
grid: "384x384x384"
amr_levels: 0
- label: "L"
grid: "512x512x512"
amr_levels: 0
tags: [regtest, turbulence]
status: "ready"

- id: "pelec_tgreact_weak"
solver: "PeleC"
case_name: "TGReact"
case_dir: "Exec/RegTests/TGReact"
inputs: "tgreact.inp"
description: "Reacting Taylor-Green vortex (combustion DNS workshop setup)."
dimension: 3
physics: [turbulence, combustion, compressible]
difficulty_tier: "hard"
prompt_length_band: "long"
concept_density: "high"
specialized_knowledge: true
novelty_tier: "config-extension"
scaling:
type: "weak"
sizes:
- label: "S"
grid: "128x128x128"
amr_levels: 0
- label: "M"
grid: "256x256x256"
amr_levels: 0
- label: "L"
grid: "512x512x512"
amr_levels: 0
tags: [regtest, combustion, turbulence]
status: "ready"

- id: "pelec_jetflame_weak"
solver: "PeleC"
case_name: "JetFlame"
case_dir: "Exec/Production/JetFlame"
inputs: "inputs"
description: "Turbulent jet flame production case for weak scaling."
dimension: 3
physics: [jet, combustion, compressible]
difficulty_tier: "hard"
prompt_length_band: "long"
concept_density: "high"
specialized_knowledge: true
novelty_tier: "config-extension"
scaling:
type: "weak"
sizes:
- label: "S"
grid: "128x128x256"
amr_levels: 1
- label: "M"
grid: "256x256x512"
amr_levels: 1
- label: "L"
grid: "384x384x768"
amr_levels: 1
tags: [production, combustion, jet]
status: "ready"
Loading
Loading