diff --git a/.claude/skills/spawn-agent/SKILL.md b/.claude/skills/spawn-agent/SKILL.md index d20bd79..af3dd7e 100644 --- a/.claude/skills/spawn-agent/SKILL.md +++ b/.claude/skills/spawn-agent/SKILL.md @@ -255,6 +255,135 @@ git -C "${GIT_ROOT}" worktree remove --force "${AGENTS_HOME}/${BRANCH}" rm -rf "${AGENTS_HOME}/${BRANCH}" ``` +## PI agents (local mlx_lm backend) + +PI agents are a **separate class of agent** that use the pi.dev SDK with a +LOCAL mlx_lm.server (managed via `/iac`) as their OpenAI-compatible backend, +instead of the Anthropic cloud API. They are useful when you want agent work +without consuming Claude API credits, or when the task is well-served by a +local Gemma-class model. + +### When to use a PI agent (detection) + +Use a PI agent when the user says any of: +- "spawn a PI agent" / "lanza un agente PI" +- "use the local model" / "local LLM" +- "use mlx_lm" / "use the local server" +- "no Claude credits" / "without using the API" + +Otherwise, default to a regular Claude agent. + +### Required setup (one-time) + +```bash +# 1. Build the PI image +cd /config && make build-pi + +# 2. Start the local model server (from /iac) +cd /iac && uv sync && uv run iac server start +uv run iac server status # verify it is reachable +``` + +### Spawning a PI agent + +PI agents do NOT need `CLAUDE_CONTAINER_OAUTH_TOKEN`. They authenticate +against the local server via `PI_BASE_URL` (default +`http://192.168.100.1:8080/v1` — the **gateway IP** of the default bridge +subnet; `host.containers.internal` is NOT implemented in Apple Container +CLI, see apple/container#346). + +Preferred: use the CLI wrapper. + +```bash +q pi spawn --branch pi/refactor --task "rename ambiguous helpers" +``` + +Equivalent Makefile invocation: + +```bash +cd /config && make spawn-pi \ + BRANCH=pi/refactor TASK="rename ambiguous helpers" +``` + +Container name pattern: `-pi-` (note the +`-pi-` segment that distinguishes them from Claude agents). + +If the user customised the bridge subnet, pass `--base-url`: + +```bash +q pi spawn --branch pi/x --task "..." --base-url http://:8080/v1 +``` + +### Memory ceiling — MAX_PI_AGENTS=1 + +The model + 6 GB prompt cache leaves little RAM headroom on Apple Silicon. +The Makefile enforces `MAX_PI_AGENTS=1` by default — `spawn-pi` will refuse +to launch a second PI agent while one is still running. If the user asks +for multiple PI agents in parallel, **warn them** and recommend stopping +the existing one first. + +### Listing, monitoring, stopping PI agents + +```bash +q pi list # only PI agents +q pi follow --branch pi/refactor # live logs +q pi status --branch pi/refactor # status.json from worktree +q pi stop --branch pi/refactor # stop the container +``` + +The status.json for PI agents includes `"agent_kind": "pi"`, used to filter +PI worktrees from Claude worktrees in `list-pi-agents`. + +### Important — do not mix targets + +- Use `spawn-pi` / `q pi spawn` for PI agents — never the regular `spawn`. +- Use `stop-pi-agent` / `q pi stop` for PI agents — never `stop-agent`. +- The two agent classes share `AGENTS_HOME` and the bridge network, but + their containers, images, and entrypoints are independent. + +### Formulating tasks for PI agents + +A Gemma-class local model is much more literal than Claude. Three rules +when writing the `--task` string: + +1. **Use only relative paths.** `iac/main.py`, not `/workspace/iac/main.py`. + The agent already `cd`s into the worktree; absolute paths cause it to + escape the worktree and write to the main repo's working copy. +2. **Bound the scope explicitly.** End the task with + `Modify ONLY . Do not create any other files.` Without this, the + model often invents extra files ("just in case" tests, READMEs, etc.). +3. **Ask for a commit verification line.** Add + `After the edit, commit and include 'git log -1 --oneline' at the end of your response.` + This gives the orchestrator a string-level handle for "did the agent + actually commit?" beyond just checking `status.json`. + +`entrypoint-pi.sh` already prepends a structural preamble with rules 1–3 +to **every** PI task — so even tasks crafted by hand or by a different +orchestrator inherit the discipline. Restating the rules in the user-facing +task wording still helps reinforce them with the model. + +### Verifying a PI agent completed successfully + +`exit_code == 0` is not enough. The model can produce a confident-sounding +final response while having made no actual changes. Always check: + +```bash +q pi status --branch +# → status.json must show: +# "phase": "completed" +# "commits": N where N >= 1 (or 0 only if the task was read-only) +``` + +Plus, before merging: + +```bash +git diff --name-only main.. # files actually changed +git log -1 --oneline # commit message + sha +``` + +If `commits == 0` but the task asked for code changes, report this to the +user as a failure regardless of `exit_code`. Do NOT merge the empty branch. + ## Apple Container CLI reference (key commands) ``` diff --git a/.claude/skills/spawn-agent/evals/evals.json b/.claude/skills/spawn-agent/evals/evals.json index 4623dd5..76e2b60 100644 --- a/.claude/skills/spawn-agent/evals/evals.json +++ b/.claude/skills/spawn-agent/evals/evals.json @@ -102,6 +102,43 @@ "Stops and reports to user if a conflict occurs during any merge", "Does NOT delete original agent branches unless user explicitly asks" ] + }, + { + "id": 9, + "prompt": "Spawn a PI agent (local mlx_lm backend) to investigate the auth module and propose refactors. Branch pi/auth-debug.", + "expected_output": "Claude detects this is a PI agent (uses the local mlx_lm.server), runs `make spawn-pi` (or `q pi spawn`) with BRANCH=pi/auth-debug and a feature-style task prompt. Reminds the user to ensure the local server is running via `uv run iac server status`. Does NOT use the Claude `spawn` target, does NOT pass CLAUDE_CODE_OAUTH_TOKEN.", + "files": [], + "expectations": [ + "Uses the PI-specific target: `spawn-pi` (Makefile) or `q pi spawn` (CLI)", + "Container name pattern includes `-pi-` (sanitized branch under PI namespace)", + "Does NOT pass CLAUDE_CODE_OAUTH_TOKEN — PI agents authenticate against the local model", + "Mentions or checks that mlx_lm.server is running (e.g. `uv run iac server status`)", + "Warns the user about MAX_PI_AGENTS=1 if they ask for more than one PI agent at once" + ] + }, + { + "id": 10, + "prompt": "Show me the PI agents currently running. I want to know which ones are using the local model.", + "expected_output": "Claude lists only PI agents (containers matching `*-pi-*` or worktrees with agent_kind=pi in status.json). Uses `make list-pi-agents` or `q pi list`. Does NOT include regular Claude agents in the output.", + "files": [], + "expectations": [ + "Uses `list-pi-agents` target or `q pi list` command (not the generic `list-agents`)", + "Filters by PI containers (name includes `-pi-`) or PI worktrees (agent_kind=pi)", + "Does NOT spawn a new container", + "Output clearly distinguishes PI agents from Claude agents" + ] + }, + { + "id": 11, + "prompt": "The pi/auth-debug PI agent finished. Stop the container and tell me what it did.", + "expected_output": "Claude stops the PI container with `make stop-pi-agent BRANCH=pi/auth-debug` (or `q pi stop --branch pi/auth-debug`), then reads the persisted status.json from the worktree to summarize phase, exit code, commits. Does NOT call `stop-agent` (the Claude target).", + "files": [], + "expectations": [ + "Uses `stop-pi-agent` target (not `stop-agent`)", + "Reads `.agent/status.json` from the worktree to summarize results", + "Reports phase, exit code, and commit count", + "Does NOT attempt `container logs` on a stopped container" + ] } ] } diff --git a/app/cli/src/container_cli/commands/pi_agents.py b/app/cli/src/container_cli/commands/pi_agents.py new file mode 100644 index 0000000..04fef27 --- /dev/null +++ b/app/cli/src/container_cli/commands/pi_agents.py @@ -0,0 +1,143 @@ +"""PI agent lifecycle commands. + +PI agents are an extension of the agent system that use the pi.dev SDK with +a LOCAL mlx_lm.server backend (managed via /iac) instead of the Anthropic +cloud API. They run in separate containers built from Dockerfile.pi. + +Open/Closed: this module is a pure extension. The existing agents.py and +build.py are not modified — pi commands live under their own subapp. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Annotated + +import typer + +from container_cli.utils import find_git_root, run_make + +app = typer.Typer(help="PI agent lifecycle (local mlx_lm.server backend)") + + +def _agents_home() -> Path: + """Resolve AGENTS_HOME, falling back to sibling .worktrees/ directory.""" + env_val = os.environ.get("AGENTS_HOME") + if env_val: + return Path(env_val) + return find_git_root().parent / ".worktrees" + + +@app.command() +def build( + image: Annotated[ + str | None, typer.Option("--image", help="PI image tag") + ] = None, + dockerfile: Annotated[ + str | None, typer.Option("--dockerfile", help="Path to PI Dockerfile") + ] = None, +) -> None: + """Build the PI agent image (Ubuntu 26.04 + PI SDK).""" + vars: dict[str, str] = {} + if image: + vars["PI_IMAGE"] = image + if dockerfile: + vars["PI_DOCKERFILE"] = dockerfile + run_make("build-pi", vars) + + +@app.command() +def spawn( + branch: Annotated[ + str, typer.Option("--branch", help="Git branch for the PI agent worktree") + ], + task: Annotated[ + str, typer.Option("--task", help="Task description for the PI agent") + ], + cpus: Annotated[int | None, typer.Option("--cpus", help="CPU count")] = None, + memory: Annotated[ + str | None, typer.Option("--memory", help="Memory limit (e.g. 3G)") + ] = None, + image: Annotated[str | None, typer.Option("--image", help="PI image tag")] = None, + base_url: Annotated[ + str | None, + typer.Option( + "--base-url", + help="Override the OpenAI-compatible base URL for the local LLM", + ), + ] = None, + model_id: Annotated[ + str | None, + typer.Option( + "--model-id", + help="Override the model id served by mlx_lm.server", + ), + ] = None, +) -> None: + """Spawn a detached headless PI agent (local mlx_lm.server backend). + + The mlx_lm.server must be running on the host. Check with: + uv run iac server status + """ + typer.echo( + "[pi] reminder: ensure mlx_lm.server is running " + "(`uv run iac server status` from /iac)" + ) + vars: dict[str, str] = {"BRANCH": branch, "TASK": task} + if cpus is not None: + vars["CPUS"] = str(cpus) + if memory: + vars["MEMORY"] = memory + if image: + vars["PI_IMAGE"] = image + if base_url: + vars["PI_BASE_URL"] = base_url + if model_id: + vars["PI_MODEL_ID"] = model_id + run_make("spawn-pi", vars) + + +@app.command(name="list") +def list_agents() -> None: + """List active PI agent containers and PI worktrees.""" + run_make("list-pi-agents") + + +@app.command() +def logs( + branch: Annotated[str, typer.Option("--branch", help="PI agent branch name")], +) -> None: + """Show logs for a PI agent (live container or persisted log).""" + run_make("logs-pi-agent", {"BRANCH": branch}) + + +@app.command() +def follow( + branch: Annotated[str, typer.Option("--branch", help="PI agent branch name")], +) -> None: + """Follow live streaming logs for a PI agent.""" + run_make("follow-pi-agent", {"BRANCH": branch}, tty=True) + + +@app.command() +def stop( + branch: Annotated[str, typer.Option("--branch", help="PI agent branch name")], +) -> None: + """Stop a PI agent container.""" + run_make("stop-pi-agent", {"BRANCH": branch}) + + +@app.command() +def status( + branch: Annotated[str, typer.Option("--branch", help="PI agent branch name")], +) -> None: + """Show PI agent status from persisted status.json file.""" + status_file = _agents_home() / branch / ".agent" / "status.json" + if not status_file.exists(): + typer.echo(f"[pi-status] No status file found for branch '{branch}'.") + typer.echo(f"[pi-status] Expected at: {status_file}") + raise typer.Exit(1) + data = json.loads(status_file.read_text()) + typer.echo(json.dumps(data, indent=2)) diff --git a/app/cli/src/container_cli/main.py b/app/cli/src/container_cli/main.py index bd5e1d5..80efbf9 100644 --- a/app/cli/src/container_cli/main.py +++ b/app/cli/src/container_cli/main.py @@ -1,6 +1,6 @@ import typer -from container_cli.commands import agents, build, network, run +from container_cli.commands import agents, build, network, pi_agents, run app = typer.Typer(name="q", help="Container management CLI for Claude agent containers") agents_app = agents.app @@ -24,6 +24,9 @@ # Register agents sub-app app.add_typer(agents_app, name="agents") +# Register PI agent sub-app (extension — local mlx_lm backend, no Claude token) +app.add_typer(pi_agents.app, name="pi") + if __name__ == "__main__": app() diff --git a/app/cli/tests/acceptance/conftest.py b/app/cli/tests/acceptance/conftest.py index e19a09e..b62d508 100644 --- a/app/cli/tests/acceptance/conftest.py +++ b/app/cli/tests/acceptance/conftest.py @@ -40,7 +40,9 @@ def invocation_context( patch("container_cli.commands.build.run_make") as m_build, \ patch("container_cli.commands.run.run_make") as m_run, \ patch("container_cli.commands.network.run_make") as m_network, \ - patch("container_cli.commands.agents.find_git_root", return_value=repo): + patch("container_cli.commands.pi_agents.run_make") as m_pi, \ + patch("container_cli.commands.agents.find_git_root", return_value=repo), \ + patch("container_cli.commands.pi_agents.find_git_root", return_value=repo): ctx = InvocationContext( runner=CliRunner(), mocks={ @@ -48,6 +50,7 @@ def invocation_context( "build": m_build, "run": m_run, "network": m_network, + "pi": m_pi, }, git_root=repo, agents_home=agents_home, diff --git a/app/cli/tests/acceptance/features/pi_agents.feature b/app/cli/tests/acceptance/features/pi_agents.feature new file mode 100644 index 0000000..8a8241b --- /dev/null +++ b/app/cli/tests/acceptance/features/pi_agents.feature @@ -0,0 +1,63 @@ +Feature: PI agent lifecycle + As a user of the q CLI + I want to spawn and manage PI agents backed by the local mlx_lm.server + So that I can run agents without using cloud LLM credits + + Background: + Given the make runner is ready + + Scenario: Spawn a PI agent + When I run "q pi spawn --branch pi/refactor --task rename-helpers" + Then the command exits successfully + And the make runner was invoked with target "spawn-pi" + And the make vars include BRANCH="pi/refactor" and TASK="rename-helpers" + + Scenario: Spawn does not require CLAUDE_CONTAINER_OAUTH_TOKEN + Given the CLAUDE_CONTAINER_OAUTH_TOKEN is not set + When I run "q pi spawn --branch pi/refactor --task rename-helpers" + Then the command exits successfully + And the make runner was invoked with target "spawn-pi" + + Scenario: Spawn with custom resources and backend URL + When I run "q pi spawn --branch pi/refactor --task work --cpus 4 --memory 8G --base-url http://10.0.0.5:9000/v1" + Then the command exits successfully + And the make vars include CPUS="4" and MEMORY="8G" and PI_BASE_URL="http://10.0.0.5:9000/v1" + + Scenario: Spawn with custom model id + When I run "q pi spawn --branch pi/refactor --task work --model-id mlx-community/llama-3.1-8b" + Then the command exits successfully + And the make vars include PI_MODEL_ID="mlx-community/llama-3.1-8b" + + Scenario: List PI agents + When I run "q pi list" + Then the command exits successfully + And the make runner was invoked with target "list-pi-agents" + + Scenario: Follow PI agent logs + When I run "q pi follow --branch pi/refactor" + Then the command exits successfully + And the make runner was invoked with target "follow-pi-agent" + And the make vars include BRANCH="pi/refactor" + + Scenario: Stop a PI agent + When I run "q pi stop --branch pi/refactor" + Then the command exits successfully + And the make runner was invoked with target "stop-pi-agent" + And the make vars include BRANCH="pi/refactor" + + Scenario: Build the PI image + When I run "q pi build" + Then the command exits successfully + And the make runner was invoked with target "build-pi" + + Scenario: PI status fails when no status file exists + When I run "q pi status --branch pi/refactor" + Then the command exits with an error + And the output contains "No status file found" + + Scenario: PI status reads the persisted status.json + Given a PI status file exists for branch "pi/refactor" with payload {"phase":"completed","agent_kind":"pi","exit_code":0} + When I run "q pi status --branch pi/refactor" + Then the command exits successfully + And the output contains "completed" + And the output contains "pi" diff --git a/app/cli/tests/acceptance/steps/pi_agents_steps.py b/app/cli/tests/acceptance/steps/pi_agents_steps.py new file mode 100644 index 0000000..fd25c3a --- /dev/null +++ b/app/cli/tests/acceptance/steps/pi_agents_steps.py @@ -0,0 +1,15 @@ +from pytest_bdd import given, parsers, scenarios + +from tests.acceptance.steps.common_steps import * # noqa: F401, F403 + + +@given(parsers.parse( + 'a PI status file exists for branch "{branch}" with payload {payload}' +)) +def _pi_status_file_exists(invocation_context, branch: str, payload: str) -> None: + status_dir = invocation_context.agents_home / branch / ".agent" + status_dir.mkdir(parents=True, exist_ok=True) + (status_dir / "status.json").write_text(payload) + + +scenarios("../features/pi_agents.feature") diff --git a/app/cli/tests/conftest.py b/app/cli/tests/conftest.py index 6746d76..281af0e 100644 --- a/app/cli/tests/conftest.py +++ b/app/cli/tests/conftest.py @@ -19,12 +19,14 @@ def mock_run_make(): with patch("container_cli.commands.agents.run_make") as m_agents, \ patch("container_cli.commands.build.run_make") as m_build, \ patch("container_cli.commands.run.run_make") as m_run, \ - patch("container_cli.commands.network.run_make") as m_network: + patch("container_cli.commands.network.run_make") as m_network, \ + patch("container_cli.commands.pi_agents.run_make") as m_pi: yield { "agents": m_agents, "build": m_build, "run": m_run, "network": m_network, + "pi": m_pi, } diff --git a/app/cli/tests/test_main.py b/app/cli/tests/test_main.py index ad7bc61..57a1248 100644 --- a/app/cli/tests/test_main.py +++ b/app/cli/tests/test_main.py @@ -25,6 +25,10 @@ def test_has_agents_subapp(self): group_names = [g.name for g in app.registered_groups] assert "agents" in group_names + def test_has_pi_subapp(self): + group_names = [g.name for g in app.registered_groups] + assert "pi" in group_names + class TestHelp: def test_help_exits_zero(self): diff --git a/app/cli/tests/test_pi_agents.py b/app/cli/tests/test_pi_agents.py new file mode 100644 index 0000000..87c34e3 --- /dev/null +++ b/app/cli/tests/test_pi_agents.py @@ -0,0 +1,135 @@ +"""Unit tests for the pi_agents command module. + +Mirrors test_agents.py at the unit level. PI agents do NOT use +CLAUDE_CONTAINER_OAUTH_TOKEN — they hit the local mlx_lm.server. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import patch + +import pytest +import typer + +from container_cli.commands.pi_agents import ( + _agents_home, + build, + follow, + list_agents, + logs, + spawn, + status, + stop, +) + + +class TestSpawn: + def test_basic_spawn(self, mock_run_make): + spawn(branch="pi/a", task="rename", cpus=None, memory=None, image=None, base_url=None, model_id=None) + mock_run_make["pi"].assert_called_once_with( + "spawn-pi", {"BRANCH": "pi/a", "TASK": "rename"} + ) + + def test_spawn_with_cpus(self, mock_run_make): + spawn(branch="b", task="t", cpus=4, memory=None, image=None, base_url=None, model_id=None) + call_vars = mock_run_make["pi"].call_args[0][1] + assert call_vars["CPUS"] == "4" + + def test_spawn_with_memory(self, mock_run_make): + spawn(branch="b", task="t", cpus=None, memory="8G", image=None, base_url=None, model_id=None) + call_vars = mock_run_make["pi"].call_args[0][1] + assert call_vars["MEMORY"] == "8G" + + def test_spawn_with_image(self, mock_run_make): + spawn(branch="b", task="t", cpus=None, memory=None, image="claude-pi:custom", base_url=None, model_id=None) + call_vars = mock_run_make["pi"].call_args[0][1] + assert call_vars["PI_IMAGE"] == "claude-pi:custom" + + def test_spawn_with_base_url(self, mock_run_make): + spawn(branch="b", task="t", cpus=None, memory=None, image=None, base_url="http://10.0.0.5:9000/v1", model_id=None) + call_vars = mock_run_make["pi"].call_args[0][1] + assert call_vars["PI_BASE_URL"] == "http://10.0.0.5:9000/v1" + + def test_spawn_with_model_id(self, mock_run_make): + spawn(branch="b", task="t", cpus=None, memory=None, image=None, base_url=None, model_id="mlx-community/gemma-4-26b-a4b-it-4bit") + call_vars = mock_run_make["pi"].call_args[0][1] + assert call_vars["PI_MODEL_ID"] == "mlx-community/gemma-4-26b-a4b-it-4bit" + + def test_spawn_does_not_require_token(self, mock_run_make, monkeypatch): + monkeypatch.delenv("CLAUDE_CONTAINER_OAUTH_TOKEN", raising=False) + spawn(branch="b", task="t", cpus=None, memory=None, image=None, base_url=None, model_id=None) + mock_run_make["pi"].assert_called_once() + + +class TestBuild: + def test_basic_build(self, mock_run_make): + build(image=None, dockerfile=None) + mock_run_make["pi"].assert_called_once_with("build-pi", {}) + + def test_build_with_overrides(self, mock_run_make): + build(image="claude-pi:custom", dockerfile="Dockerfile.pi.custom") + mock_run_make["pi"].assert_called_once_with( + "build-pi", + {"PI_IMAGE": "claude-pi:custom", "PI_DOCKERFILE": "Dockerfile.pi.custom"}, + ) + + +class TestListAgents: + def test_calls_run_make(self, mock_run_make): + list_agents() + mock_run_make["pi"].assert_called_once_with("list-pi-agents") + + +class TestLogs: + def test_passes_branch(self, mock_run_make): + logs(branch="pi/x") + mock_run_make["pi"].assert_called_once_with( + "logs-pi-agent", {"BRANCH": "pi/x"} + ) + + +class TestFollow: + def test_passes_branch_with_tty(self, mock_run_make): + follow(branch="pi/x") + mock_run_make["pi"].assert_called_once_with( + "follow-pi-agent", {"BRANCH": "pi/x"}, tty=True + ) + + +class TestStop: + def test_passes_branch(self, mock_run_make): + stop(branch="pi/x") + mock_run_make["pi"].assert_called_once_with( + "stop-pi-agent", {"BRANCH": "pi/x"} + ) + + +class TestStatus: + def test_missing_status_file(self, tmp_path, monkeypatch): + monkeypatch.setenv("AGENTS_HOME", str(tmp_path)) + with pytest.raises(typer.Exit) as exc_info: + status(branch="pi/x") + assert exc_info.value.exit_code == 1 + + def test_reads_status_file(self, tmp_path, monkeypatch): + monkeypatch.setenv("AGENTS_HOME", str(tmp_path)) + status_file = tmp_path / "pi/x" / ".agent" / "status.json" + status_file.parent.mkdir(parents=True) + status_file.write_text('{"phase": "completed", "agent_kind": "pi"}') + status(branch="pi/x") + + +class TestAgentsHome: + def test_uses_env_var(self, tmp_path, monkeypatch): + monkeypatch.setenv("AGENTS_HOME", str(tmp_path)) + assert _agents_home() == tmp_path + + def test_fallback_path_name(self, monkeypatch): + monkeypatch.delenv("AGENTS_HOME", raising=False) + with patch( + "container_cli.commands.pi_agents.find_git_root", + return_value=Path("/home/user/repo"), + ): + result = _agents_home() + assert result == Path("/home/user/.worktrees") diff --git a/config/Dockerfile.pi b/config/Dockerfile.pi new file mode 100644 index 0000000..f1bca8a --- /dev/null +++ b/config/Dockerfile.pi @@ -0,0 +1,109 @@ +# ============================================================================= +# PI Agent container — Ubuntu 26.04 ARM64 + pi-coding-agent + local mlx_lm +# +# - Base: ubuntu:26.04 (linux/arm64, kernel 7.x) +# - Agent CLI: `pi` (@earendil-works/pi-coding-agent) — headless: `pi -p "..."` +# - Backend: local mlx_lm.server on the host, reached at PI_BASE_URL +# - Auth: none (no Claude credentials required) +# - Runs as: non-root `agent` user (su-exec drops from root at runtime) +# +# Operator guide and troubleshooting: docs/agents/pi-agent.md +# ============================================================================= + +FROM --platform=linux/arm64 ubuntu:26.04 + +LABEL maintainer="pi-agent" \ + description="PI agent — Ubuntu 26.04 + local mlx_lm backend" \ + org.opencontainers.image.base.name="ubuntu:26.04" + +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TERM=xterm-256color \ + PATH="/root/.local/bin:/usr/local/bin:$PATH" \ + BAT_PAGER="" \ + BAT_STYLE="numbers,changes,header" \ + PI_BASE_URL=http://192.168.100.1:8080/v1 \ + PI_MODEL_ID=mlx-community/gemma-4-26b-a4b-it-4bit \ + PI_PROVIDER_NAME=local \ + PI_DISABLE_AUTOUPDATE=1 + +# System packages + Rust CLIs prebuilt by Ubuntu (no cargo build needed). +# fd-find and bat ship with renamed binaries to avoid Debian collisions; +# we symlink them back to the upstream names for ergonomics. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash \ + curl \ + wget \ + git \ + git-lfs \ + openssh-client \ + ca-certificates \ + jq \ + unzip \ + gzip \ + tmux \ + nodejs \ + npm \ + python3 \ + python3-pip \ + ripgrep \ + fd-find \ + bat \ + eza \ + && ln -sf /usr/bin/fdfind /usr/local/bin/fd \ + && ln -sf /usr/bin/batcat /usr/local/bin/bat \ + && rm -rf /var/lib/apt/lists/* + +# PI coding agent — `pi` CLI from @earendil-works/pi-coding-agent. +# Custom providers live in ~/.pi/agent/models.json (populated by entrypoint-pi.sh). +ARG PI_PACKAGE=@earendil-works/pi-coding-agent +RUN npm install -g --no-fund --no-audit "${PI_PACKAGE}" + +# Shell aliases for the installed Rust CLIs +RUN printf '%s\n' \ + "alias grep='rg --smart-case --follow'" \ + "alias find='fd --follow'" \ + "alias cat='bat --paging=never'" \ + "alias ls='eza'" \ + "alias ll='eza -la --git'" \ + "alias la='eza -la'" \ + "alias lt='eza --tree --level=2'" \ + > /etc/profile.d/rust-aliases.sh + +RUN echo 'source /etc/profile.d/rust-aliases.sh' >> /root/.bashrc \ + && echo 'source /etc/profile.d/rust-aliases.sh' >> /root/.profile + +# Git config base (root) +RUN git config --global init.defaultBranch main \ + && git config --global core.editor "true" \ + && git config --global advice.detachedHead false + +# Non-root user for headless PI agents (parity with Dockerfile.wolfi) +RUN groupadd --system agent \ + && useradd --system --gid agent --home-dir /home/agent --shell /bin/bash agent \ + && mkdir -p /home/agent \ + && chown -R agent:agent /home/agent + +# Git config for agent user +RUN printf '[init]\n\tdefaultBranch = main\n[core]\n\teditor = true\n[advice]\n\tdetachedHead = false\n[user]\n\temail = agent@container\n\tname = PI Agent\n' \ + > /home/agent/.gitconfig \ + && chown agent:agent /home/agent/.gitconfig + +# su-exec equivalent: drops to the specified user via os.setuid/setgid then exec. +# Needed because the entrypoint must do root-only work (chown of the host-mounted +# worktree, models.json for the agent user) before handing off to `pi` as agent. +RUN printf '#!/usr/bin/env python3\nimport os, sys, pwd\npw = pwd.getpwnam(sys.argv[1])\nos.setgid(pw.pw_gid)\nos.setuid(pw.pw_uid)\nos.execvp(sys.argv[2], sys.argv[2:])\n' \ + > /usr/local/bin/su-exec \ + && chmod +x /usr/local/bin/su-exec + +# Entrypoint (PI variant — no Claude credential copy) +COPY entrypoint-pi.sh /usr/local/bin/entrypoint-pi.sh +RUN chmod +x /usr/local/bin/entrypoint-pi.sh + +WORKDIR /workspace + +SHELL ["/bin/bash", "--login", "-c"] +ENTRYPOINT ["/usr/local/bin/entrypoint-pi.sh"] +CMD ["/bin/bash", "--login"] diff --git a/config/Makefile b/config/Makefile index bc96562..ae31d69 100644 --- a/config/Makefile +++ b/config/Makefile @@ -60,7 +60,8 @@ CONTAINER_BRANCH := $(shell echo "$(BRANCH)" | tr '/_ ' '-' | tr '[:upper:]' '[: # Host env var holding the OAuth token (avoids collision with host Claude instance) HOST_TOKEN_VAR := CLAUDE_CONTAINER_OAUTH_TOKEN -.PHONY: build network run shell spawn list-agents logs-agent follow-agent stop-agent status-agent summary-agent clean clean-network clean-all help +.PHONY: build network run shell spawn list-agents logs-agent follow-agent stop-agent status-agent summary-agent clean clean-network clean-all help \ + build-pi spawn-pi list-pi-agents logs-pi-agent follow-pi-agent status-pi-agent stop-pi-agent clean-pi # ── Build ───────────────────────────────────────────────────────────────────── @@ -216,6 +217,139 @@ clean-network: clean-all: clean clean-network +# ── PI Agent (local mlx_lm backend) ─────────────────────────────────────────── +# +# PI agents are a separate class of agent that uses the pi.dev SDK with a +# LOCAL mlx_lm.server (hosted from /iac) as the OpenAI-compatible backend, +# instead of the Anthropic cloud API. +# +# Memory safety: +# Gemma-4-26b + 6GB prompt cache leaves little RAM headroom on M-series. +# MAX_PI_AGENTS=1 by default. Override only when you understand the cost. +# +# Open/Closed: this section adds new targets; no existing target is modified. +# +# Usage: +# make build-pi +# uv run iac server start # in /iac, FIRST — start the local LLM +# make spawn-pi BRANCH=pi/refactor TASK="rename ambiguous helpers" +# make list-pi-agents +# make follow-pi-agent BRANCH=pi/refactor +# make stop-pi-agent BRANCH=pi/refactor + +PI_IMAGE ?= claude-pi:ubuntu +PI_DOCKERFILE ?= Dockerfile.pi +MAX_PI_AGENTS ?= 1 +# Apple Container CLI does not implement host.containers.internal +# (apple/container#346). Containers reach the host via the bridge gateway IP, +# which matches the .1 address of the configured SUBNET (default +# 192.168.100.0/24 → gateway 192.168.100.1). +PI_BASE_URL ?= http://192.168.100.1:8080/v1 +PI_MODEL_ID ?= mlx-community/gemma-4-26b-a4b-it-4bit + +build-pi: + container build --no-cache -f $(PI_DOCKERFILE) -t $(PI_IMAGE) . + +spawn-pi: network + @RUNNING=$$(container list 2>/dev/null | grep -c "$(PROJECT_NAME)-pi-" || true); \ + if [ "$$RUNNING" -ge "$(MAX_PI_AGENTS)" ]; then \ + echo "[error] MAX_PI_AGENTS=$(MAX_PI_AGENTS) reached ($$RUNNING PI agents running)."; \ + echo "[error] Stop one with 'make stop-pi-agent BRANCH=' before spawning another."; \ + echo "[error] Reason: the local model + cache leaves little RAM for concurrent agents."; \ + exit 1; \ + fi + @mkdir -p "$(WORKTREES_DIR)" + @echo "[pi-spawn] Launching PI agent: $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH)" + @echo "[pi-spawn] Worktree: $(WORKTREES_DIR)/$(BRANCH)" + @echo "[pi-spawn] Backend: $(PI_BASE_URL)" + @echo "[pi-spawn] Task: $(TASK)" + container run -d --rm \ + --name $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) \ + --network $(NETWORK) \ + --cpus $(CPUS) \ + --memory $(MEMORY) \ + --dns 1.1.1.1 \ + -v $(GIT_ROOT):/workspace \ + -v $(WORKTREES_DIR):/worktrees \ + -e PI_BASE_URL=$(PI_BASE_URL) \ + -e PI_MODEL_ID=$(PI_MODEL_ID) \ + $(PI_IMAGE) \ + --worktree "$(BRANCH)" --task "$(TASK)" + @echo "[pi-spawn] Agent started. View logs: make follow-pi-agent BRANCH=$(BRANCH)" + +list-pi-agents: + @echo "[pi-agents] Active PI containers for project '$(PROJECT_NAME)':" + @container list 2>/dev/null | grep "$(PROJECT_NAME)-pi-" || echo " (none)" + @echo "" + @echo "[pi-agents] Worktrees in $(WORKTREES_DIR):" + @if [ -d "$(WORKTREES_DIR)" ]; then \ + for dir in $(WORKTREES_DIR)/*/; do \ + [ -d "$$dir" ] || continue; \ + status_file="$$dir/.agent/status.json"; \ + if [ -f "$$status_file" ] && command -v jq >/dev/null 2>&1; then \ + kind=$$(jq -r '.agent_kind // ""' "$$status_file" 2>/dev/null || echo ""); \ + [ "$$kind" = "pi" ] || continue; \ + branch=$$(basename "$$dir"); \ + phase=$$(jq -r '.phase // "unknown"' "$$status_file" 2>/dev/null || echo "unknown"); \ + printf " %-30s %s\n" "$$branch" "$$phase"; \ + fi; \ + done; \ + else \ + echo " (none yet)"; \ + fi + +logs-pi-agent: + @[ -n "$(BRANCH)" ] || (echo "[error] BRANCH required"; exit 1) + @container logs $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) 2>/dev/null \ + || { \ + LOG_FILE="$(WORKTREES_DIR)/$(BRANCH)/.agent/agent.log"; \ + if [ -f "$$LOG_FILE" ]; then \ + echo "[pi-logs] Container $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) no longer running (PI agent finished)."; \ + echo "[pi-logs] Showing saved logs from $$LOG_FILE"; \ + echo "---"; \ + cat "$$LOG_FILE"; \ + else \ + echo "[pi-logs] Container $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) not found and no saved logs at $$LOG_FILE"; \ + fi; \ + } + +follow-pi-agent: + @[ -n "$(BRANCH)" ] || (echo "[error] BRANCH required"; exit 1) + @container logs -f $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) 2>/dev/null \ + || { \ + LOG_FILE="$(WORKTREES_DIR)/$(BRANCH)/.agent/agent.log"; \ + if [ -f "$$LOG_FILE" ]; then \ + echo "[pi-logs] Container $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) no longer running (PI agent finished)."; \ + echo "[pi-logs] Showing saved logs from $$LOG_FILE"; \ + echo "---"; \ + cat "$$LOG_FILE"; \ + else \ + echo "[pi-logs] Container $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) not found and no saved logs at $$LOG_FILE"; \ + fi; \ + } + +status-pi-agent: + @STATUS_FILE="$(WORKTREES_DIR)/$(BRANCH)/.agent/status.json"; \ + if [ -f "$$STATUS_FILE" ]; then \ + if command -v jq >/dev/null 2>&1; then \ + jq '.' "$$STATUS_FILE"; \ + else \ + cat "$$STATUS_FILE"; \ + fi; \ + else \ + echo "[pi-status] No status file found for branch '$(BRANCH)'."; \ + echo "[pi-status] Expected at: $$STATUS_FILE"; \ + fi + +stop-pi-agent: + @[ -n "$(BRANCH)" ] || (echo "[error] BRANCH required"; exit 1) + @container stop $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) 2>/dev/null \ + && echo "[pi-stop] PI agent $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) stopped." \ + || echo "[pi-stop] PI agent $(PROJECT_NAME)-pi-$(CONTAINER_BRANCH) not found or already stopped." + +clean-pi: + container image delete $(PI_IMAGE) 2>/dev/null || true + # ── Help ────────────────────────────────────────────────────────────────────── help: @@ -234,6 +368,15 @@ help: @echo " clean-network Remove network $(NETWORK)" @echo " clean-all Remove image + network" @echo "" + @echo " PI agent targets (local mlx_lm backend):" + @echo " build-pi Build $(PI_IMAGE) from $(PI_DOCKERFILE)" + @echo " spawn-pi Spawn PI agent (MAX_PI_AGENTS=$(MAX_PI_AGENTS) guard)" + @echo " list-pi-agents List active PI agents" + @echo " follow-pi-agent Follow live logs of PI agent (BRANCH=...)" + @echo " status-pi-agent Show status.json of PI agent (BRANCH=...)" + @echo " stop-pi-agent Stop a PI agent (BRANCH=...)" + @echo " clean-pi Remove the PI image" + @echo "" @echo " Agent targets (examples):" @echo " make spawn BRANCH=feat/auth TASK=\"implement OAuth flow\"" @echo " make list-agents" diff --git a/config/entrypoint-pi.sh b/config/entrypoint-pi.sh new file mode 100644 index 0000000..903c6e2 --- /dev/null +++ b/config/entrypoint-pi.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# Entrypoint for PI agent containers — local mlx_lm.server backend +# +# Interactive mode (default): +# entrypoint-pi.sh → interactive bash +# entrypoint-pi.sh [args...] → exec [args...] +# +# Headless agent mode: +# entrypoint-pi.sh --worktree --task "" +# +# Expected volumes: +# -v :/workspace → main repository (read/write) +# -v /.worktrees:/worktrees → worktrees directory +# +# Expected env vars: +# PI_BASE_URL → OpenAI-compatible base URL of mlx_lm.server on host. +# Apple Container CLI does NOT implement +# host.containers.internal (apple/container#346), so +# the default is the bridge gateway IP: 192.168.100.1 +# (gateway of the default 192.168.100.0/24 subnet). +# PI_MODEL_ID → model id served by mlx_lm.server (matches the +# --model flag passed when starting the server). +# PI_PROVIDER_NAME → provider key written into ~/.pi/agent/models.json +# (default: "local"). pi addresses the model as +# "/". +# +# Unlike entrypoint.sh (Claude), this entrypoint does NOT copy any +# Claude credentials — PI agents authenticate against the local model +# via the OpenAI-compatible HTTP API, no cloud token needed. + +set -euo pipefail + +WORKTREE_BRANCH="" +AGENT_TASK="" +PASSTHROUGH_ARGS=() + +# ── Functions ───────────────────────────────────────────────────────────────── + +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + --worktree) WORKTREE_BRANCH="$2"; shift 2 ;; + --task) AGENT_TASK="$2"; shift 2 ;; + *) PASSTHROUGH_ARGS+=("$1"); shift ;; + esac + done +} + +create_worktree() { + WORKTREE_PATH="/worktrees/${WORKTREE_BRANCH}" + + echo "[pi-entrypoint] Creating worktree: ${WORKTREE_BRANCH} → ${WORKTREE_PATH}" + + mkdir -p "$(dirname "$WORKTREE_PATH")" + + WORKTREE_BASE_SHA=$(git -C /workspace rev-parse HEAD 2>/dev/null || echo "") + + if git -C /workspace worktree add "$WORKTREE_PATH" -b "$WORKTREE_BRANCH" 2>/dev/null; then + echo "[pi-entrypoint] Worktree created on new branch: ${WORKTREE_BRANCH}" + elif git -C /workspace worktree add "$WORKTREE_PATH" "$WORKTREE_BRANCH" 2>/dev/null; then + echo "[pi-entrypoint] Worktree created on existing branch: ${WORKTREE_BRANCH}" + else + echo "[pi-entrypoint] ERROR: could not create worktree for '${WORKTREE_BRANCH}'" >&2 + exit 1 + fi + + cd "$WORKTREE_PATH" + echo "[pi-entrypoint] Working directory: $(pwd)" +} + +setup_agent_perms() { + echo "[pi-entrypoint] Preparing PI agent runtime..." + chown -R agent:agent "$WORKTREE_PATH" +} + +write_pi_models_config() { + # Materialise ~/.pi/agent/models.json from env vars so the `pi` CLI knows + # how to reach the local mlx_lm.server. Written to the agent user's home + # since that is where `pi` looks for it. + local pi_dir="/home/agent/.pi/agent" + local base_url="${PI_BASE_URL:-http://192.168.100.1:8080/v1}" + local model_id="${PI_MODEL_ID:-mlx-community/gemma-4-26b-a4b-it-4bit}" + local provider="${PI_PROVIDER_NAME:-local}" + + mkdir -p "$pi_dir" + cat > "$pi_dir/models.json" < "${AGENT_DIR}/status.json" ) 2>/dev/null || true +} + +wrap_task_with_discipline() { + # Prepends a structural preamble to AGENT_TASK so the model inherits + # discipline regardless of how the orchestrator phrased the task. + # Three problems this addresses (from the format_bytes test run): + # 1. Models follow absolute paths literally, escaping the worktree. + # 2. pi -p has no postcondition that verifies an actual commit happened. + # 3. With temp > 0 the model invents extra files (tests, README, etc.). + local cwd + cwd=$(pwd) + cat <" + git log -1 --oneline + Include that last "git log -1 --oneline" line at the end of your response. +3. If you cannot complete the task, DO NOT commit. Briefly explain why instead. + +Task: +${AGENT_TASK} +EOF +} + +emit_marker() { + local phase="$1"; shift + echo "[agent:status] PHASE=${phase} BRANCH=${WORKTREE_BRANCH} KIND=pi $*" +} + +run_agent() { + AGENT_DIR="${WORKTREE_PATH}/.agent" + mkdir -p "$AGENT_DIR" + chown -R agent:agent "$AGENT_DIR" + + if ! grep -qxF '.agent/' "${WORKTREE_PATH}/.gitignore" 2>/dev/null; then + echo '.agent/' >> "${WORKTREE_PATH}/.gitignore" 2>/dev/null || true + fi + + AGENT_STARTED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + local start_epoch + start_epoch=$(date +%s) + + # Single phase write — there is no meaningful initialization gap between + # entering run_agent and invoking pi, so we go straight to "working". + write_status "working" + emit_marker "working" + + local provider="${PI_PROVIDER_NAME:-local}" + local model_id="${PI_MODEL_ID:-mlx-community/gemma-4-26b-a4b-it-4bit}" + + echo "[pi-entrypoint] Task: ${AGENT_TASK} (wrapped with discipline preamble)" + echo "---" + + local wrapped_task + wrapped_task=$(wrap_task_with_discipline) + + # su-exec drops privileges from root to `agent` so `pi` runs unprivileged. + # Root is needed for the setup steps before this (chown the host-mounted + # worktree, write /home/agent/.pi/agent/models.json); `pi` itself does not. + set +e + su-exec agent env HOME=/home/agent \ + pi -p "$wrapped_task" --model "${provider}/${model_id}" \ + 2>&1 | tee "$AGENT_DIR/agent.log" + local exit_code=${PIPESTATUS[0]} + set -e + + local commit_count last_commit finished_at end_epoch duration_secs + commit_count=$(git -C "$WORKTREE_PATH" -c "safe.directory=$WORKTREE_PATH" \ + rev-list --count "${WORKTREE_BASE_SHA:-HEAD}..HEAD" 2>/dev/null || echo 0) + last_commit=$(git -C "$WORKTREE_PATH" -c "safe.directory=$WORKTREE_PATH" \ + log --oneline -1 2>/dev/null || echo "none") + finished_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + end_epoch=$(date +%s) + duration_secs=$((end_epoch - start_epoch)) + + local final_phase="completed" + [ "$exit_code" -ne 0 ] && final_phase="errored" + + ( printf '{ + "phase": "%s", + "branch": "%s", + "task": "%s", + "agent_kind": "pi", + "started_at": "%s", + "finished_at": "%s", + "duration_secs": %d, + "exit_code": %d, + "commits": %s, + "last_commit": "%s" +}\n' "$final_phase" "$WORKTREE_BRANCH" "$AGENT_TASK" \ + "$AGENT_STARTED_AT" "$finished_at" "$duration_secs" \ + "$exit_code" "$commit_count" "$last_commit" \ + > "$AGENT_DIR/status.json" ) 2>/dev/null || true + + emit_marker "$final_phase" "EXIT_CODE=${exit_code}" "COMMITS=${commit_count}" "DURATION=${duration_secs}s" + + exit "$exit_code" +} + +run_interactive() { + if [[ ${#PASSTHROUGH_ARGS[@]} -eq 0 ]]; then + exec /bin/bash --login + else + exec "${PASSTHROUGH_ARGS[@]}" + fi +} + +# ── Main ────────────────────────────────────────────────────────────────────── + +main() { + parse_args "$@" + + if [[ -n "$WORKTREE_BRANCH" ]]; then + create_worktree + if [[ -n "$AGENT_TASK" ]]; then + setup_agent_perms + write_pi_models_config + run_agent + else + write_pi_models_config + run_interactive + fi + else + write_pi_models_config + run_interactive + fi +} + +main "$@" diff --git a/docs/agents/cli.md b/docs/agents/cli.md index b8f4b8a..e91bc61 100644 --- a/docs/agents/cli.md +++ b/docs/agents/cli.md @@ -146,6 +146,64 @@ directory with a contextual message. This avoids confusing error output. --- +### `q pi` — PI agent lifecycle sub-commands + +PI agents are a separate agent class backed by a local `mlx_lm.server` +(managed via `/iac`). They do **not** require `CLAUDE_CONTAINER_OAUTH_TOKEN` +— authentication is local. See [pi-agent.md](./pi-agent.md) for the full +architecture. + +#### `q pi build` + +Builds the `claude-pi:ubuntu` image (`Dockerfile.pi`). + +```bash +q pi build +q pi build --image claude-pi:custom --dockerfile Dockerfile.pi.custom +``` + +#### `q pi spawn` + +Spawns a detached headless PI agent (local LLM backend). + +```bash +q pi spawn --branch pi/refactor --task "rename ambiguous helpers" +q pi spawn --branch pi/explore --task "explore the auth module" \ + --cpus 4 --memory 8G \ + --base-url http://192.168.100.1:8080/v1 \ + --model-id mlx-community/llama-3.1-8b +``` + +| Option | Required | Description | +|---|---|---| +| `--branch` | yes | Git branch for the PI worktree | +| `--task` | yes | Task description for the PI agent | +| `--cpus` | no | CPU count | +| `--memory` | no | Memory limit (e.g. `3G`) | +| `--image` | no | PI image tag override | +| `--base-url` | no | OpenAI-compatible base URL of the local LLM (default = bridge gateway IP, `host.containers.internal` is NOT supported by Apple Container CLI) | +| `--model-id` | no | Model id served by `mlx_lm.server` (must match `/v1/models` response) | + +The Makefile enforces **`MAX_PI_AGENTS=1`** by default — `spawn` will refuse +to launch a second PI agent while one is still running. The model + 6 GB +prompt cache leaves little RAM headroom on M-series machines. + +#### `q pi list`, `q pi logs`, `q pi follow`, `q pi status`, `q pi stop` + +```bash +q pi list # PI agents only +q pi logs --branch pi/refactor # snapshot logs +q pi follow --branch pi/refactor # live logs (TTY hand-off) +q pi status --branch pi/refactor # status.json (works post-exit) +q pi stop --branch pi/refactor # stop the container +``` + +The list command filters by `agent_kind=pi` in `status.json`, so it +returns only PI worktrees — Claude agent worktrees in the same +`AGENTS_HOME` are excluded. + +--- + ### Cleanup commands ```bash @@ -183,7 +241,8 @@ app/cli/ ├── build.py ← build, clean, clean-network, clean-all ├── network.py ← network ├── run.py ← run, shell - └── agents.py ← spawn, list, logs, follow, stop + ├── agents.py ← spawn, list, logs, follow, stop + └── pi_agents.py ← pi build/spawn/list/logs/follow/stop/status ``` --- @@ -204,3 +263,10 @@ app/cli/ | `q clean` | `clean` | | | `q clean-network` | `clean-network` | | | `q clean-all` | `clean-all` | | +| `q pi build` | `build-pi` | optional `--image`, `--dockerfile` | +| `q pi spawn` | `spawn-pi` | requires `--branch`, `--task`; no Claude token needed | +| `q pi list` | `list-pi-agents` | filters by `agent_kind=pi` | +| `q pi logs` | `logs-pi-agent` | requires `--branch` | +| `q pi follow` | `follow-pi-agent` | TTY hand-off | +| `q pi stop` | `stop-pi-agent` | requires `--branch` | +| `q pi status` | _(local read)_ | reads `$AGENTS_HOME//.agent/status.json` | diff --git a/docs/agents/container-agent.md b/docs/agents/container-agent.md index cca325b..1af685c 100644 --- a/docs/agents/container-agent.md +++ b/docs/agents/container-agent.md @@ -428,3 +428,32 @@ The Alpine CI image is sufficient for verifying that the Dockerfile syntax is co | Base image | Chainguard Wolfi (glibc) | Alpine (musl) | | Purpose | Run headless Claude agents | Build validation | | Claude Code compatible | Yes (`glibc` + `posix_getdents`) | No (musl lacks `posix_getdents`) | + +--- + +## Dockerfile.pi — PI agent variant (Ubuntu 26.04, kernel 7.x) + +`config/Dockerfile.pi` builds a **separate image** (`claude-pi:ubuntu`) for +the PI agent class. It mirrors the security hardening of `Dockerfile.wolfi` +(non-root `agent` user, `su-exec` shim, minimal package set) but uses +Ubuntu 26.04 LTS as its base instead of Chainguard Wolfi because PI agent +workloads benefit from Linux kernel 7.x runtime behaviour +(`io_uring`, memory accounting). + +| Aspect | `Dockerfile.wolfi` (Claude) | `Dockerfile.pi` (PI) | +|---|---|---| +| Base | `cgr.dev/chainguard/wolfi-base` | `ubuntu:26.04` | +| Kernel target | glibc on whatever kernel host provides | Linux 7.x (matches Ubuntu 26.04 LTS) | +| Cloud auth tooling | Claude CLI + OAuth token | none — uses local `mlx_lm.server` | +| Backend | Anthropic API | host's `mlx_lm.server` via `host.containers.internal:8080` | +| Entrypoint | `entrypoint.sh` (copies credentials) | `entrypoint-pi.sh` (no credentials) | +| Env vars | `CLAUDE_CODE_OAUTH_TOKEN` | `PI_BASE_URL` | +| Memory note | 3 GB default; safe to scale parallel | 3 GB default but **MAX_PI_AGENTS=1** by default | + +The two images do not share layers (different base), but they share the +non-root `agent` user pattern, the `su-exec` Python shim, and the +`status.json` lifecycle format — which means `list-agents`-style tooling +can read both seamlessly via the `agent_kind` field. + +For the full operational guide on PI agents (setup, spawn, monitor, +troubleshoot), see [pi-agent.md](./pi-agent.md). diff --git a/docs/agents/pi-agent.md b/docs/agents/pi-agent.md new file mode 100644 index 0000000..8cfb570 --- /dev/null +++ b/docs/agents/pi-agent.md @@ -0,0 +1,237 @@ +# PI Agent (local mlx_lm backend) + +PI agents are a **second class of agent** in stackai, complementary to the +default Claude agents documented in [container-agent.md](./container-agent.md). +They use the [pi.dev](https://pi.dev/) SDK as their intelligence layer, +backed by a **local `mlx_lm.server`** running on the host (managed from +`/iac`) — no Anthropic API calls. + +## When to use a PI agent + +| Scenario | Use | +|---|---| +| You want agent help on a long-running, exploratory task without spending Claude credits | PI agent | +| Air-gapped or offline machine | PI agent | +| Task requires frontier reasoning (large refactors, security review) | Claude agent | +| You haven't installed `mlx_lm` or don't have a model downloaded | Claude agent | + +## Architecture at a glance + +``` +┌──────────────────────────────────────────────────────────────────────┐ +│ Host (macOS 26, Apple Silicon) │ +│ │ +│ ┌─────────────────┐ │ +│ │ iac CLI │ mlx_lm.server (Gemma-4-26b, 4-bit MLX) │ +│ │ uv run iac │──▶ 0.0.0.0:8080 (OpenAI-compatible) │ +│ │ server start │ │ +│ └─────────────────┘ │ +│ ▲ │ +│ │ HTTP │ +│ │ http://192.168.100.1:8080/v1 (bridge gateway IP) │ +│ │ │ +│ ┌────────┴─────────────────────────────────────────┐ │ +│ │ Apple Container: claude-pi:ubuntu │ │ +│ │ (Ubuntu 26.04, linux/arm64, kernel 7.x) │ │ +│ │ │ │ +│ │ pi -p "" --model local/ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ~/.pi/agent/models.json (generated at startup) │ │ +│ │ provider "local" → baseUrl PI_BASE_URL │ │ +│ └──────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────────┘ +``` + +The PI container does NOT carry its own model. It reaches the host's +`mlx_lm.server` via the **gateway IP** of the bridge network created by +Apple Container CLI (`192.168.100.1` for the default +`192.168.100.0/24` subnet). + +### Why not `host.containers.internal`? + +Apple Container CLI does **not** implement `host.containers.internal` +(see [apple/container#346](https://github.com/apple/container/issues/346)). +DNS lookups for that hostname either fail immediately or time out. The +workaround is to use the bridge gateway IP, which the host owns. If you +override the default subnet via `make network SUBNET=10.20.0.0/24`, +remember to also override `PI_BASE_URL` accordingly +(`make spawn-pi PI_BASE_URL=http://10.20.0.1:8080/v1 …`). + +## Open/Closed extension model + +Existing Claude agent infrastructure is **not modified** to add PI agents. +Every piece is additive: + +| Component | Existing (Claude) | New (PI) | +|---|---|---| +| Image | `claude-agent:wolfi` | `claude-pi:ubuntu` | +| Dockerfile | `config/Dockerfile.wolfi` | `config/Dockerfile.pi` | +| Entrypoint | `config/entrypoint.sh` | `config/entrypoint-pi.sh` | +| Makefile targets | `spawn`, `list-agents`, `stop-agent`, … | `spawn-pi`, `list-pi-agents`, `stop-pi-agent`, … | +| CLI command | `q spawn`, `q agents …` | `q pi spawn`, `q pi …` | +| Skill | `spawn-agent` (Claude block) | same skill, dedicated **PI agents** section | + +Adding more agent classes in the future (e.g. a different local backend) +follows the same pattern — a new Dockerfile, a new entrypoint, a new +Makefile section, a new CLI module. + +## Setup (one-time) + +```bash +# 1) Sync the iac project +cd iac && uv sync + +# 2) Start the local model server (downloads on first run; takes minutes) +uv run iac server start +uv run iac server status # should report phase=running + +# 3) Build the PI container image +cd ../config && make build-pi +``` + +## Spawning a PI agent + +Preferred (via the CLI wrapper): + +```bash +q pi spawn --branch pi/refactor --task "rename ambiguous helpers in src/utils.py" +``` + +Equivalent direct Makefile call: + +```bash +cd config && make spawn-pi \ + BRANCH=pi/refactor TASK="rename ambiguous helpers in src/utils.py" +``` + +The container is detached, runs to completion, and removes itself with `--rm`. +The branch worktree (and its `.agent/status.json` + `.agent/agent.log`) persists +under `$AGENTS_HOME/pi/refactor/` for review. + +## Memory safety — `MAX_PI_AGENTS=1` + +The default model (Gemma-4-26b, 4-bit) + 6 GB prompt cache leaves little RAM +headroom on M-series machines. The `spawn-pi` target counts running PI +containers and refuses to launch a new one if it would exceed +`MAX_PI_AGENTS` (default `1`). + +To override (only if you know your machine can absorb the load): + +```bash +make spawn-pi MAX_PI_AGENTS=2 BRANCH=pi/second TASK="..." +``` + +## Monitoring and lifecycle + +| Action | CLI | Makefile | +|---|---|---| +| List active PI agents | `q pi list` | `make list-pi-agents` | +| Live logs | `q pi follow --branch ` | `make follow-pi-agent BRANCH=` | +| Status JSON | `q pi status --branch ` | `make status-pi-agent BRANCH=` | +| Stop | `q pi stop --branch ` | `make stop-pi-agent BRANCH=` | +| Saved logs (post-exit) | reads `$AGENTS_HOME//.agent/agent.log` | same | + +The `status.json` written by `entrypoint-pi.sh` includes +`"agent_kind": "pi"`, which `list-pi-agents` uses to filter PI worktrees +from regular Claude worktrees that share the same `AGENTS_HOME`. + +## The `pi` CLI and `models.json` + +The PI container ships the [pi-coding-agent](https://pi.dev/) (`pi` CLI) +installed via `npm install -g @earendil-works/pi-coding-agent`. Headless +invocation: `pi -p "" --model /`. + +`entrypoint-pi.sh` generates `~/.pi/agent/models.json` on every start +from these env vars: + +| Env var | Purpose | Default | +|---|---|---| +| `PI_BASE_URL` | OpenAI-compatible base URL of the local server | `http://192.168.100.1:8080/v1` | +| `PI_MODEL_ID` | Model id served by `mlx_lm.server` (also the `id` field in `/v1/models`) | `mlx-community/gemma-4-26b-a4b-it-4bit` | +| `PI_PROVIDER_NAME` | Provider key in `models.json` | `local` | + +Generated file: + +```json +{ + "providers": { + "local": { + "baseUrl": "http://192.168.100.1:8080/v1", + "api": "openai-completions", + "apiKey": "none", + "compat": { "supportsDeveloperRole": false }, + "models": [ { "id": "mlx-community/gemma-4-26b-a4b-it-4bit" } ] + } + } +} +``` + +`compat.supportsDeveloperRole: false` is required because `mlx_lm.server` +does not understand OpenAI's `developer` role — pi sends a regular `system` +message instead. + +### Discipline preamble (every task is wrapped) + +Local models follow instructions much more literally than Claude. +`entrypoint-pi.sh` prepends a structural preamble to **every** task before +invoking `pi -p`, regardless of how the orchestrator phrased it. The model +actually sees: + +``` +You are running inside a git worktree at . Every file path in this task +must be interpreted relative to that directory — never use absolute paths +beginning with /workspace or any other absolute prefix. + +Rules: +1. Modify ONLY the files explicitly named in the task. Do not create test + files, documentation, or auxiliary files unless the task asks for them. +2. After making your changes you MUST run, in this exact order: + git add -A + git commit -m "" + git log -1 --oneline + Include that last "git log -1 --oneline" line at the end of your response. +3. If you cannot complete the task, DO NOT commit. Briefly explain why instead. + +Task: + +``` + +This is structural, not advisory — every PI agent run inherits these rules, +so a user spawning directly via `make spawn-pi TASK="..."` still gets them. + +### Sampling defaults + +The iac CLI starts `mlx_lm.server` with sampling parameters tuned for +coding tasks (low temperature, narrow nucleus). Override if needed: + +| Parameter | Default | Override | +|---|---|---| +| `temp` | `0.2` | `uv run iac server start --temp 0.4` | +| `top_p` | `0.9` | `uv run iac server start --top-p 0.95` | + +Pi-coding-agent does **not** send `temperature` or `top_p` on per-request +basis (its `models.json` schema doesn't expose them, nor does its CLI), so +the server defaults are what every PI agent actually uses. Lowering temp +markedly reduces the "model invents extra files" failure mode observed +during the format_bytes test. + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| `spawn-pi` fails with `MAX_PI_AGENTS=1 reached` | Another PI agent is still running | `make stop-pi-agent BRANCH=` | +| Container starts but PI agent immediately exits with HTTP error | Local server not running | `uv run iac server status` then `iac server start` | +| Container can't reach `192.168.100.1:8080` | Custom `SUBNET` or post-restart Apple Container bug | Pass `--base-url http://:8080/v1` to `q pi spawn`, or `make network SUBNET=192.168.100.0/24` to reset to the default | +| `pi` errors with "model not found" | `PI_MODEL_ID` does not match what mlx_lm.server reports at `/v1/models` | `curl http://localhost:8080/v1/models` on the host, then `q pi spawn --model-id ` | +| `iac server start` says "already running" but `status` shows stopped | Stale PID file | `rm ~/.iac/server.pid && uv run iac server start` | +| Connection from container hangs indefinitely after macOS restart | Known Apple Container CLI bug — bridge gateway not always reachable post-restart | `container network delete claude-agent-net && make network` | +| First curl from container right after `iac server start` times out, but localhost works | Warm-up gap — `mlx_lm.server` accepts on `127.0.0.1:8080` before the bridge IP is fully reachable (the model still loading into RAM) | Wait ~5-15 s after `iac server status` first reports reachable; or curl from the container until success before spawning the PI agent | + +## Why Ubuntu 26.04 (not Chainguard Wolfi) + +The PI image targets a distribution that ships **Linux kernel 7.x** for +better `io_uring` and memory-accounting behaviour under sustained LLM +streaming. Chainguard Wolfi remains the right base for Claude agents +(smaller attack surface, faster pulls) — but it lags slightly on kernel +versions. Each agent class can choose the base that fits its workload. diff --git a/iac/main.py b/iac/main.py index 2e946ba..044890c 100644 --- a/iac/main.py +++ b/iac/main.py @@ -1,5 +1,243 @@ -def main(): - print("Hello from iac!") +""" +iac — local mlx_lm.server lifecycle manager. + +Provides a small Typer CLI to start, stop, and check the status of the +local Gemma model served via `mlx_lm.server`. The server exposes an +OpenAI-compatible HTTP API on the configured port and is consumed by +PI agent containers running on the host. + +Warning: the configured model + 6GB prompt cache leaves little RAM +headroom. Do not start more than one server instance, and limit the +number of concurrent PI agents that talk to it (MAX_PI_AGENTS=1). +""" + +from __future__ import annotations + +import json +import os +import signal +import subprocess +import sys +import time +import urllib.error +import urllib.request +from dataclasses import dataclass, replace +from pathlib import Path + +import typer +from rich.console import Console +from rich.table import Table + +app = typer.Typer(name="iac", help="Local mlx_lm.server lifecycle manager") +server_app = typer.Typer(help="mlx_lm.server lifecycle commands") +app.add_typer(server_app, name="server") + +console = Console() + + +@dataclass(frozen=True) +class ServerConfig: + """Defaults for the mlx_lm.server invocation. Overridable via env vars.""" + + model: str = os.environ.get( + "IAC_MODEL", "mlx-community/gemma-4-26b-a4b-it-4bit" + ) + host: str = os.environ.get("IAC_HOST", "0.0.0.0") + port: int = int(os.environ.get("IAC_PORT", "8080")) + prompt_cache_size: int = 5 + prompt_cache_bytes: str = "6GB" + decode_concurrency: int = 4 + prompt_concurrency: int = 2 + prefill_step_size: int = 1024 + temp: float = 0.2 + top_p: float = 0.9 + top_k: int = 40 + min_p: float = 0.0 + max_tokens: int = 2048 + log_level: str = "INFO" + + def command(self) -> list[str]: + return [ + "mlx_lm.server", + "--model", self.model, + "--host", self.host, + "--port", str(self.port), + "--prompt-cache-size", str(self.prompt_cache_size), + "--prompt-cache-bytes", self.prompt_cache_bytes, + "--decode-concurrency", str(self.decode_concurrency), + "--prompt-concurrency", str(self.prompt_concurrency), + "--prefill-step-size", str(self.prefill_step_size), + "--temp", str(self.temp), + "--top-p", str(self.top_p), + "--top-k", str(self.top_k), + "--min-p", str(self.min_p), + "--max-tokens", str(self.max_tokens), + "--use-default-chat-template", + "--log-level", self.log_level, + ] + + +def _state_dir() -> Path: + base = Path(os.environ.get("IAC_STATE_DIR", str(Path.home() / ".iac"))) + base.mkdir(parents=True, exist_ok=True) + return base + + +def _pid_file() -> Path: + return _state_dir() / "server.pid" + + +def _log_file() -> Path: + return _state_dir() / "server.log" + + +def _read_pid() -> int | None: + pf = _pid_file() + if not pf.exists(): + return None + try: + return int(pf.read_text().strip()) + except (ValueError, OSError): + return None + + +def _process_alive(pid: int) -> bool: + try: + os.kill(pid, 0) + return True + except (ProcessLookupError, PermissionError): + return False + + +def _http_ok(url: str, timeout: float = 1.5) -> bool: + try: + with urllib.request.urlopen(url, timeout=timeout) as resp: # noqa: S310 + return 200 <= resp.status < 500 + except (urllib.error.URLError, TimeoutError, ConnectionError, OSError): + return False + + +@server_app.command("start") +def server_start( + detach: bool = typer.Option( + True, + "--detach/--foreground", + help="Run server in background (default) or foreground.", + ), + temp: float | None = typer.Option( + None, + "--temp", + help="Override sampling temperature (default: ServerConfig.temp = 0.2).", + ), + top_p: float | None = typer.Option( + None, + "--top-p", + help="Override nucleus sampling p (default: ServerConfig.top_p = 0.9).", + ), +) -> None: + """Start mlx_lm.server with the project defaults.""" + existing_pid = _read_pid() + if existing_pid and _process_alive(existing_pid): + console.print( + f"[yellow][server][/] already running (pid={existing_pid})" + ) + raise typer.Exit(0) + + cfg = ServerConfig() + if temp is not None: + cfg = replace(cfg, temp=temp) + if top_p is not None: + cfg = replace(cfg, top_p=top_p) + cmd = cfg.command() + + console.print(f"[cyan][server][/] starting {cfg.model}") + console.print(f"[cyan][server][/] listening on http://{cfg.host}:{cfg.port}") + console.print(f"[cyan][server][/] sampling: temp={cfg.temp} top_p={cfg.top_p}") + + if detach: + log = _log_file().open("ab") + proc = subprocess.Popen( # noqa: S603 + cmd, + stdout=log, + stderr=subprocess.STDOUT, + stdin=subprocess.DEVNULL, + start_new_session=True, + ) + _pid_file().write_text(str(proc.pid)) + console.print(f"[green][server][/] started (pid={proc.pid})") + console.print(f"[server] logs: {_log_file()}") + else: + os.execvp(cmd[0], cmd) + + +@server_app.command("stop") +def server_stop() -> None: + """Stop a running mlx_lm.server started via `iac server start`.""" + pid = _read_pid() + if pid is None: + console.print("[yellow][server][/] no pid file — not running") + raise typer.Exit(0) + + if not _process_alive(pid): + console.print( + f"[yellow][server][/] pid {pid} not alive — clearing pid file" + ) + _pid_file().unlink(missing_ok=True) + raise typer.Exit(0) + + os.kill(pid, signal.SIGTERM) + for _ in range(20): + if not _process_alive(pid): + break + time.sleep(0.25) + else: + os.kill(pid, signal.SIGKILL) + + _pid_file().unlink(missing_ok=True) + console.print(f"[green][server][/] stopped (pid={pid})") + + +@server_app.command("status") +def server_status( + as_json: bool = typer.Option(False, "--json", help="Emit machine-readable JSON."), +) -> None: + """Report whether the server is running and reachable.""" + cfg = ServerConfig() + pid = _read_pid() + alive = pid is not None and _process_alive(pid) + health_url = f"http://127.0.0.1:{cfg.port}/v1/models" + reachable = _http_ok(health_url) if alive else False + + payload = { + "phase": "running" if alive and reachable else "stopped", + "pid": pid, + "process_alive": alive, + "endpoint_reachable": reachable, + "model": cfg.model, + "base_url": f"http://{cfg.host}:{cfg.port}/v1", + } + + if as_json: + console.print_json(json.dumps(payload)) + return + + table = Table(title="iac server status", show_header=False) + table.add_column("field") + table.add_column("value") + for k, v in payload.items(): + table.add_row(k, str(v)) + console.print(table) + + +@app.command() +def info() -> None: + """Print the resolved server config without starting anything.""" + cfg = ServerConfig() + console.print_json(json.dumps({"command": cfg.command()})) + + +def main() -> None: + app() if __name__ == "__main__": diff --git a/iac/pyproject.toml b/iac/pyproject.toml index de2be5f..fdf2930 100644 --- a/iac/pyproject.toml +++ b/iac/pyproject.toml @@ -1,10 +1,23 @@ [project] name = "iac" version = "0.1.0" -description = "vLLM for Mac" +description = "vLLM for Mac — mlx_lm.server lifecycle manager for local PI agents" readme = "README.md" requires-python = ">=3.13" dependencies = [ "mlx-lm>=0.30.0", "mlx>=0.30.4", + "typer>=0.12", + "rich>=13", ] + +[project.scripts] +iac = "main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["."] +only-include = ["main.py"] diff --git a/iac/uv.lock b/iac/uv.lock index 30cede2..0ce8c0d 100644 --- a/iac/uv.lock +++ b/iac/uv.lock @@ -163,16 +163,20 @@ wheels = [ [[package]] name = "iac" version = "0.1.0" -source = { virtual = "." } +source = { editable = "." } dependencies = [ { name = "mlx" }, { name = "mlx-lm" }, + { name = "rich" }, + { name = "typer" }, ] [package.metadata] requires-dist = [ { name = "mlx", specifier = ">=0.30.4" }, { name = "mlx-lm", specifier = ">=0.30.0" }, + { name = "rich", specifier = ">=13" }, + { name = "typer", specifier = ">=0.12" }, ] [[package]]