Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions docs/container_runs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Container Runs

Container runs activate when `container_image` (or `container_runtime`) is set.
Defaults:
- Image: `registry.nersc.gov/amsc014/superfacility/pele:latest`
- ALCF image: `docker://registry.nersc.gov/amsc014/superfacility/pele:latest`

## Local (Docker)

Example config override:

```yaml
environment: local
container_image: registry.nersc.gov/amsc014/superfacility/pele:latest
container_runtime: docker
```

## NERSC Perlmutter (podman-hpc)

Example config override:

```yaml
environment: perlmutter
container_image: registry.nersc.gov/amsc014/superfacility/pele:latest
container_runtime: podman-hpc
```

Reference: https://docs.nersc.gov/development/containers/podman-hpc/overview/

## ALCF (Apptainer)

Example config override:

```yaml
environment: alcf
container_image: docker://registry.nersc.gov/amsc014/superfacility/pele:latest
container_runtime: apptainer
```
4 changes: 4 additions & 0 deletions docs/mcp.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ The MCP server exposes these tools:

Use `tools/list` to discover schemas and required parameters.

## Container runs

Container usage examples live in `docs/container_runs.md`.

## Example: apply_plan

Apply a previously generated plan to write inputs:
Expand Down
8 changes: 8 additions & 0 deletions mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from src.services.input_writer import InputWriterService
from src.services.knowledge import PeleKnowledgeService
from src.services.plan import SimulationPlan
from src.services.run_container import ContainerRunner
from src.services.run_local import LocalRunner
from src.services.run_superfacility import SuperfacilityRunner
from src.services.validation import ValidationService
Expand All @@ -62,6 +63,7 @@
from src.services.input_writer import InputWriterService
from src.services.knowledge import PeleKnowledgeService
from src.services.plan import SimulationPlan
from src.services.run_container import ContainerRunner
from src.services.run_local import LocalRunner
from src.services.run_superfacility import SuperfacilityRunner
from src.services.validation import ValidationService
Expand Down Expand Up @@ -130,6 +132,12 @@ def _persist_session_context(session_id: str, context: dict[str, Any]) -> None:
def _select_runner(active_config: AMReXAgentConfig):
"""Select execution runner based on config environment."""
environment = (active_config.environment or "").lower()
container_image = getattr(active_config, "container_image", None)
container_runtime = getattr(active_config, "container_runtime", None)
has_container_image = isinstance(container_image, (str, Path)) and str(container_image).strip()
has_container_runtime = isinstance(container_runtime, str) and container_runtime.strip()
if has_container_image or has_container_runtime:
return ContainerRunner(active_config)
if environment == "local":
return LocalRunner(active_config)
if environment in {"perlmutter", "mcp"}:
Expand Down
41 changes: 41 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,47 @@ class AMReXAgentConfig(BaseModel):
"Auto-detected from PODMAN_HPC or SHIFTER env variables. "
"When True, visualization uses extraction (headless) + rendering (local) workflow."
)
container_runtime: Optional[str] = Field(
default=None,
description="Container runtime override (docker, podman, podman-hpc, apptainer, singularity). "
"When unset, inferred from environment."
)
container_image: Optional[str] = Field(
default=None,
description="Container image reference (e.g., docker image or apptainer .sif or docker:// ref)."
)
container_entrypoint: Optional[str] = Field(
default=None,
description="Container entrypoint or executable path to run (default: /usr/local/bin/run_pelelmex)."
)
container_workdir: str = Field(
default="/work",
description="Workdir inside the container for mounted run directory."
)
container_inputs_name: str = Field(
default="inputs",
description="Inputs filename passed to the container entrypoint."
)
container_extra_args: Optional[List[str]] = Field(
default_factory=list,
description="Extra runtime args passed to the container (e.g., --env, --network)."
)
container_account: Optional[str] = Field(
default=None,
description="Slurm account for container jobs (used for non-NERSC slurm submission)."
)
container_qos: Optional[str] = Field(
default=None,
description="Slurm QoS for container jobs (overrides default when set)."
)
container_constraint: Optional[str] = Field(
default=None,
description="Slurm constraint for container jobs (overrides default when set)."
)
container_walltime: Optional[str] = Field(
default=None,
description="Slurm walltime for container jobs (overrides default when set)."
)

analysis_always_enabled: bool = Field(
default=True,
Expand Down
13 changes: 11 additions & 2 deletions src/nodes/runner_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,17 @@ def runner_node(state: GraphState) -> dict[str, Any]:
if compile_selection.get("value") == "compile_only":
run_after_compile = False

# Select runner based on environment
if config.environment == "local":
# Select runner based on environment or container settings
container_image = getattr(config, "container_image", None)
container_runtime = getattr(config, "container_runtime", None)
has_container_image = isinstance(container_image, (str, Path)) and str(container_image).strip()
has_container_runtime = isinstance(container_runtime, str) and container_runtime.strip()

if has_container_image or has_container_runtime:
from src.services.run_container import ContainerRunner
runner = ContainerRunner(config)
logger.info("Using ContainerRunner for containerized execution")
elif config.environment == "local":
from src.services.run_local import LocalRunner
runner = LocalRunner(config)
logger.info("Using LocalRunner for local execution")
Expand Down
Loading
Loading