diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 740a4438..91f28b2c 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "PACT", "source": "./pact-plugin", "description": "Orchestration harness that turns Claude Code into a coordinated team of specialist AI agents", - "version": "4.1.2", + "version": "4.1.3", "author": { "name": "Synaptic-Labs-AI" }, diff --git a/README.md b/README.md index 49b0bc51..7730b772 100644 --- a/README.md +++ b/README.md @@ -505,7 +505,7 @@ When installed as a plugin, PACT lives in your plugin cache: │ └── cache/ │ └── pact-plugin/ │ └── PACT/ -│ └── 4.1.2/ # Plugin version +│ └── 4.1.3/ # Plugin version │ ├── agents/ │ ├── commands/ │ ├── skills/ diff --git a/pact-plugin/.claude-plugin/plugin.json b/pact-plugin/.claude-plugin/plugin.json index 588a1234..cb7c8fae 100644 --- a/pact-plugin/.claude-plugin/plugin.json +++ b/pact-plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "PACT", - "version": "4.1.2", + "version": "4.1.3", "description": "Orchestration harness that turns Claude Code into a coordinated team of specialist AI agents", "author": { "name": "Synaptic-Labs-AI", diff --git a/pact-plugin/README.md b/pact-plugin/README.md index 42b6d1ad..894cdb89 100644 --- a/pact-plugin/README.md +++ b/pact-plugin/README.md @@ -1,6 +1,6 @@ # PACT — Orchestration Harness for Claude Code -> **Version**: 4.1.2 +> **Version**: 4.1.3 Turn a single Claude Code session into a managed team of specialist AI agents that prepare, design, build, and test your code systematically. @@ -68,6 +68,14 @@ Then restart Claude Code. Requires [Agent Teams enabled](https://github.com/Syna - **Restored session-start ritual** (v4.1): Scaled-down `/PACT:bootstrap` command + `bootstrap_gate.py` injection re-establish the first-turn ritual under the new delivery model - **Communication Charter**: Async-at-idle-boundary delivery model formalized for inter-agent SendMessage mechanics +## Configuration + +Environment variables that tune hook behavior: + +| Variable | Default | Allowed values | Effect | +|---|---|---|---| +| `PACT_DISPATCH_INLINE_MISSION_MODE` | `warn` | `warn` / `deny` / `shadow` | Disposition of the dispatch-gate inline-mission heuristic (flags dispatchers inlining mission text into `prompt=` instead of using the canonical "check TaskList" form; F7 in the #662 failure-mode index). `warn` emits an advisory `additionalContext`; `deny` blocks the spawn (flip after the F22 counter-test in `tests/runbooks/662-dispatch-gate.md` confirms `additionalContext` is silently dropped under PreToolUse); `shadow` journals only — the trigger is observable but neither WARNs nor DENYs (calibration / first-session safety net). F1-F6, F14, F15 are unaffected. Unknown values fall back to `warn`. | + ## Full Documentation For installation options, detailed features, examples, and technical reference: diff --git a/pact-plugin/agents/pact-orchestrator.md b/pact-plugin/agents/pact-orchestrator.md index 11ba7ca9..5d39bc75 100644 --- a/pact-plugin/agents/pact-orchestrator.md +++ b/pact-plugin/agents/pact-orchestrator.md @@ -200,7 +200,7 @@ Create a feature branch before any new workstream begins. **Checkpoint**: Reaching for **Edit**/**Write** on application code (`.py`, `.ts`, `.js`, `.rb`, etc.)? **DELEGATE**. -**Checkpoint**: Reaching for `Task(subagent_type=...)` without `team_name`? **Create a team first.** Every specialist dispatch uses Agent Teams — no exceptions. +**Checkpoint**: Reaching for `Agent(subagent_type=...)` without `team_name`? **Create a team first.** Every specialist dispatch uses Agent Teams — no exceptions. Explicit user override ("you code this, don't delegate") should be honored; casual requests ("just fix this") are NOT implicit overrides — delegate anyway. @@ -357,17 +357,27 @@ For full detail, `Read(file_path="../protocols/pact-variety.md")` when calibrati ## 11. Agent Teams Dispatch -> ⚠️ **MANDATORY**: Specialists are spawned as teammates via `Task(name=..., team_name="{team_name}", subagent_type=...)`. The session team is created at session start per INSTRUCTIONS step 1. The `session_init` hook provides the specific team name in your session context. +> ⚠️ **MANDATORY**: Specialists are spawned as teammates via `Agent(name=..., team_name="{team_name}", subagent_type=...)`. The session team is created at session start per INSTRUCTIONS step 1. The `session_init` hook provides the specific team name in your session context. > -> ⚠️ **NEVER** use plain `Task(subagent_type=...)` without `name` and `team_name` for specialist agents. This bypasses team coordination, task tracking, and `SendMessage` communication. +> ⚠️ **NEVER** use plain `Agent(subagent_type=...)` without `name` and `team_name` for specialist agents. This bypasses team coordination, task tracking, and `SendMessage` communication. **Dispatch pattern**: 1. `TaskCreate(subject, description)` — create the tracking task with full mission 2. `TaskUpdate(taskId, owner="{name}")` — assign ownership -3. `Task(name="{name}", team_name="{team_name}", subagent_type="pact-{type}", prompt="YOUR PACT ROLE: teammate ({name}).\n\nYou are joining team {team_name}. Check `TaskList` for tasks assigned to you.")` — spawn the teammate +3. `Agent(name="{name}", team_name="{team_name}", subagent_type="pact-{type}", prompt="YOUR PACT ROLE: teammate ({name}).\n\nYou are joining team {team_name}. Check `TaskList` for tasks assigned to you.")` — spawn the teammate -> ⚠️ **`{name}` constraint (SECURITY)**: the `name=` parameter you pass to `Task()` is interpolated verbatim into the `YOUR PACT ROLE: teammate ({name}).` marker line. To prevent marker spoofing via injected newlines or close-parens, the `name` value MUST match the pattern `^[a-z0-9-]+$` — lowercase alphanumerics and hyphens only, no spaces, no newlines, no parentheses. Examples of valid names: `backend-coder-1`, `review-test-engineer-7`, `secretary`. Examples of invalid names: `backend coder 1` (spaces), `backend-coder)evil` (close-paren), any name containing newlines. +> ⚠️ **`{name}` constraint (SECURITY)**: the `name=` parameter you pass to `Agent()` is interpolated verbatim into the `YOUR PACT ROLE: teammate ({name}).` marker line. To prevent marker spoofing via injected newlines or close-parens, the `name` value MUST match the pattern `^[a-z0-9-]+$` — lowercase alphanumerics and hyphens only, no spaces, no newlines, no parentheses. Examples of valid names: `backend-coder-1`, `review-test-engineer-7`, `secretary`. Examples of invalid names: `backend coder 1` (spaces), `backend-coder)evil` (close-paren), any name containing newlines. + +#### First-spawn verification (HARD-RULE) + +After your first specialist spawn in a session — and after any subsequent spawn where you suspect dispatch tooling may be misconfigured — verify the teammate received the full PACT protocol surface. The teammate's first message MUST demonstrate access to `TaskList`, `TaskUpdate`, and `SendMessage`. If the teammate reports any of those tools "not available", "not loaded", or otherwise missing: + +> ⚠️ **HARD STOP — DISPATCH PROTOCOL VIOLATION**. This is **NOT** degraded mode. **NOT** something to "work around". The dispatch was malformed (almost always: spawn shape used `Task(...)` instead of `Agent(...)`, or omitted `name=` / `team_name=`). Stop the teammate, correct the dispatch shape, and re-spawn with the canonical `Agent(name=..., team_name=..., subagent_type=...)` form documented above. Do **not** instruct the teammate to "make do" — they cannot self-recover from a malformed spawn. + +#### Hook WARN signals are STOP signals + +When a PreToolUse hook (`bootstrap_gate`, `dispatch_gate`, `team_guard`, etc.) emits a WARN-shaped advisory or a `permissionDecision: deny` rationale, treat it as a HARD STOP. **WARN means STOP and re-dispatch correctly** — not "note the warning and proceed". Rationalizing past a WARN ("the gate is overly cautious", "this case doesn't apply") is the failure mode the WARN exists to prevent. If a gate fires unexpectedly on a dispatch you believe is correct, the dispatch is likely subtly wrong; investigate before retrying. ### Reuse vs. Spawn Decision diff --git a/pact-plugin/commands/bootstrap.md b/pact-plugin/commands/bootstrap.md index bb5b2190..5934f263 100644 --- a/pact-plugin/commands/bootstrap.md +++ b/pact-plugin/commands/bootstrap.md @@ -53,15 +53,36 @@ Command files use `{team_name}`, `{session_dir}`, and `{plugin_root}` as literal This step unlocks code-editing tools (`Edit`, `Write`) and agent spawning (`Agent`, `NotebookEdit`), which are blocked by the `bootstrap_gate` PreToolUse hook until the bootstrap-complete marker exists. -Find the `PACT_SESSION_DIR=` line in your context (injected by `bootstrap_prompt_gate` at every prompt while the marker is absent). Run: +Find the `PACT_SESSION_DIR=` line in your context (injected by `bootstrap_prompt_gate` at every prompt while the marker is absent). Run the marker-write command below, substituting `` with the `PACT_SESSION_DIR=` value and `` with the `{plugin_root}` value (from the Current Session block): ``` -mkdir -p "" && touch "/bootstrap-complete" +mkdir -p "" && python3 - "" "" <<'PY' +import hashlib, json, os, sys +session_dir = sys.argv[1].rstrip("/") +plugin_root = sys.argv[2].rstrip("/") +sid = os.path.basename(session_dir) +plugin_version = json.loads( + open(os.path.join(plugin_root, ".claude-plugin", "plugin.json"), + encoding="utf-8").read() +)["version"] +v = 1 +sig = hashlib.sha256( + f"{sid}|{plugin_root}|{plugin_version}|{v}".encode("utf-8") +).hexdigest() +marker = os.path.join(session_dir, "bootstrap-complete") +with open(marker, "w", encoding="utf-8") as f: + f.write(json.dumps({"v": v, "sid": sid, "sig": sig})) +PY ``` -Substitute `` with the value from `PACT_SESSION_DIR=`. The marker name `bootstrap-complete` is the load-bearing literal that `bootstrap_gate.is_marker_set` checks; do not rename it. +The marker is a JSON sentinel `{"v": 1, "sid": , "sig": SHA256("|||")}` (#662). The marker name `bootstrap-complete` is the load-bearing literal that `bootstrap_gate.is_marker_set` checks; do not rename it. The signature is a marker-content fingerprint that closes the trivial `Bash("touch /bootstrap-complete")` bypass. It is NOT cryptographic provenance: all four signature inputs are readable from the same-user filesystem, so a same-user attacker with Python execution can recompute the digest. The fingerprint raises attacker effort and creates a detection surface; it does not make the marker unforgeable. diff --git a/pact-plugin/commands/comPACT.md b/pact-plugin/commands/comPACT.md index efb8afa8..3ed7f429 100644 --- a/pact-plugin/commands/comPACT.md +++ b/pact-plugin/commands/comPACT.md @@ -168,7 +168,7 @@ Every specialist dispatch creates **two tasks**, not one: - **Task A** — TEACHBACK gate. `subject = "{specialist}: TEACHBACK for {sub-task}"`, owner = teammate. Description: teachback expectations + dispatch context. - **Task B** — primary work. `subject = "{specialist}: {sub-task}"`, owner = teammate, `blockedBy = []`. -Both are created BEFORE the `Task(...)` spawn call so the teammate sees them on first `TaskList`. The teammate claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review and accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)). On accept, the teammate wakes to claim B. +Both are created BEFORE the `Agent(...)` spawn call so the teammate sees them on first `TaskList`. The teammate claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review and accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)). On accept, the teammate wakes to claim B. **Dispatch sequence (replaces single-task dispatch)**: @@ -190,10 +190,10 @@ B_id = TaskCreate(subject="{specialist}: {sub-task}", description="]`. -Both are created BEFORE the `Task(...)` spawn call so the teammate sees them on first `TaskList`. The teammate claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review the teachback, accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)), and the teammate wakes to claim B. +Both are created BEFORE the `Agent(...)` spawn call so the teammate sees them on first `TaskList`. The teammate claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review the teachback, accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)), and the teammate wakes to claim B. **Dispatch sequence (replaces single-task dispatch)**: @@ -98,10 +98,10 @@ B_id = TaskCreate( TaskUpdate(B_id, owner="{teammate-name}", addBlockedBy=[A_id]) TaskUpdate(A_id, addBlocks=[B_id]) -# 3. Spawn the teammate via the canonical Task() form above. +# 3. Spawn the teammate via the canonical Agent() form above. ``` -The `Task()` `prompt` does NOT change shape — the two-task dispatch is encoded in the surrounding TaskCreate sequence, not in the `Task()` call. The teammate discovers Task A + Task B via `TaskList` and follows pact-agent-teams §On Start. +The `Agent()` `prompt` does NOT change shape — the two-task dispatch is encoded in the surrounding TaskCreate sequence, not in the `Agent()` call. The teammate discovers Task A + Task B via `TaskList` and follows pact-agent-teams §On Start. **Carve-outs** — single-task dispatch still applies for: @@ -463,7 +463,7 @@ JSON 4. Spawn the preparer with the canonical dispatch form: ``` -Task( +Agent( name="preparer", team_name="{team_name}", subagent_type="pact-preparer", @@ -555,7 +555,7 @@ JSON 4. Spawn the architect with the canonical dispatch form: ``` -Task( +Agent( name="architect", team_name="{team_name}", subagent_type="pact-architect", @@ -678,7 +678,7 @@ JSON 4. Spawn each coder with the canonical dispatch form: ``` -Task( +Agent( name="{coder-name}", team_name="{team_name}", subagent_type="pact-{coder-type}", @@ -706,7 +706,7 @@ Valid skip reasons: single coder on familiar pattern, variety reassessed below 7 3. Spawn the auditor with the canonical dispatch form: ``` -Task( +Agent( name="auditor", team_name="{team_name}", subagent_type="pact-auditor", @@ -805,7 +805,7 @@ JSON 4. Spawn the test engineer with the canonical dispatch form: ``` -Task( +Agent( name="test-engineer", team_name="{team_name}", subagent_type="pact-test-engineer", @@ -868,9 +868,9 @@ When a blocker is resolved, prefer resuming the original agent over spawning fre **Resume pattern**: 1. Read agent ID from task metadata: `TaskGet(taskId).metadata.agent_id` -2. Resume with blocker context: `Task(resume="{agent_id}", prompt="Blocker resolved: {details}. Continue your task.")` +2. Resume with blocker context: `Agent(resume="{agent_id}", prompt="Blocker resolved: {details}. Continue your task.")` -**Fresh spawn pattern** (when resume is inappropriate): Follow the standard dispatch pattern (`TaskCreate` + `TaskUpdate` + Task with name/team_name/subagent_type). +**Fresh spawn pattern** (when resume is inappropriate): Follow the standard dispatch pattern (`TaskCreate` + `TaskUpdate` + Agent with name/team_name/subagent_type). --- diff --git a/pact-plugin/commands/peer-review.md b/pact-plugin/commands/peer-review.md index 754f9097..cdb724b5 100644 --- a/pact-plugin/commands/peer-review.md +++ b/pact-plugin/commands/peer-review.md @@ -149,7 +149,7 @@ Each reviewer dispatch creates **two tasks**, not one: - **Task A** — TEACHBACK gate. `subject = "{reviewer-type}: TEACHBACK for review of {feature}"`, owner = reviewer. Description: state which review angle the reviewer is taking (consistency check vs adversarial vs design coherence) before reading the diff. - **Task B** — primary review work. `subject = "{reviewer-type}: review {feature}"`, owner = reviewer, `blockedBy = []`. -Both are created BEFORE the `Task(...)` spawn call. The reviewer claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review the teachback (does it state the review angle clearly?), accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)). On accept, the reviewer wakes to claim B and read the diff. +Both are created BEFORE the `Agent(...)` spawn call. The reviewer claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review the teachback (does it state the review angle clearly?), accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)). On accept, the reviewer wakes to claim B and read the diff. ``` A_id = TaskCreate( @@ -166,7 +166,7 @@ TaskUpdate(B_id, owner="{reviewer-name}", addBlockedBy=[A_id]) TaskUpdate(A_id, addBlocks=[B_id]) ``` -The `Task()` `prompt` does NOT change shape — the two-task dispatch is encoded in the surrounding TaskCreate sequence. +The `Agent()` `prompt` does NOT change shape — the two-task dispatch is encoded in the surrounding TaskCreate sequence. --- @@ -178,7 +178,7 @@ For each reviewer: 3. Spawn the reviewer with the canonical dispatch form. The `prompt` MUST lead with the `YOUR PACT ROLE: teammate ({reviewer-name})` marker on its own line so routing detects the teammate spawn (team protocol + teachback content arrive via spawn-time skills frontmatter, not a per-prompt directive): ``` -Task( +Agent( name="{reviewer-name}", team_name="{team_name}", subagent_type="pact-{reviewer-type}", diff --git a/pact-plugin/commands/plan-mode.md b/pact-plugin/commands/plan-mode.md index fec4512e..917c403e 100644 --- a/pact-plugin/commands/plan-mode.md +++ b/pact-plugin/commands/plan-mode.md @@ -194,7 +194,7 @@ Each consultant dispatch creates **two tasks**, not one: - **Task A** — TEACHBACK gate. `subject = "{specialist}: TEACHBACK for plan consultation on {feature}"`, owner = consultant. Description: lightweight understanding-confirm of the consultation scope. - **Task B** — primary consultation. `subject = "{specialist}: plan consultation for {feature}"`, owner = consultant, `blockedBy = []`. -Both are created BEFORE the `Task(...)` spawn call. The consultant claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review and accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)). On accept, the consultant wakes to claim B and produce the consultation HANDOFF. +Both are created BEFORE the `Agent(...)` spawn call. The consultant claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review and accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)). On accept, the consultant wakes to claim B and produce the consultation HANDOFF. ``` A_id = TaskCreate( @@ -211,7 +211,7 @@ TaskUpdate(B_id, owner="{specialist-name}", addBlockedBy=[A_id]) TaskUpdate(A_id, addBlocks=[B_id]) ``` -The teachback gate is lightweight ("understanding-confirm" with no implementation gate consequence) — plan-mode dispatches are read-mostly. The `Task()` `prompt` does NOT change shape. +The teachback gate is lightweight ("understanding-confirm" with no implementation gate consequence) — plan-mode dispatches are read-mostly. The `Agent()` `prompt` does NOT change shape. --- @@ -223,7 +223,7 @@ The teachback gate is lightweight ("understanding-confirm" with no implementatio 3. Spawn the consultant with the canonical dispatch form: ``` -Task( +Agent( name="{specialist-name}", team_name="{team_name}", subagent_type="pact-{specialist-type}", diff --git a/pact-plugin/commands/rePACT.md b/pact-plugin/commands/rePACT.md index d729776b..a04ff190 100644 --- a/pact-plugin/commands/rePACT.md +++ b/pact-plugin/commands/rePACT.md @@ -220,9 +220,9 @@ Each specialist dispatch creates **two tasks**, not one: - **Task A** — TEACHBACK gate. `subject = "{scope-prefixed-name}: TEACHBACK for {sub-task}"`, owner = specialist. - **Task B** — primary work. `subject = "{scope-prefixed-name}: implement {sub-task}"`, owner = specialist, `blockedBy = []`. -Both are created BEFORE the `Task(...)` spawn call. The specialist claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review and accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)). On accept, the specialist wakes to claim B. +Both are created BEFORE the `Agent(...)` spawn call. The specialist claims A, submits teachback metadata, idles on `awaiting_lead_completion`. You review and accept via the two-call atomic pair (`TaskUpdate(A, status="completed")` + paired wake-signal SendMessage — see [Teachback Review](../protocols/pact-completion-authority.md#teachback-review)). On accept, the specialist wakes to claim B. -Nested PACT cycles' inner-cycle dispatches follow the same A+B shape recursively. The `Task()` `prompt` does NOT change shape. +Nested PACT cycles' inner-cycle dispatches follow the same A+B shape recursively. The `Agent()` `prompt` does NOT change shape. ``` A_id = TaskCreate( @@ -248,7 +248,7 @@ For each specialist needed — apply the shape above: 3. Spawn the specialist with the canonical dispatch form. The `prompt` MUST lead with the `YOUR PACT ROLE: teammate ({scope-prefixed-name})` marker on its own line (team protocol + teachback content arrive via spawn-time skills frontmatter): ``` -Task( +Agent( name="{scope-prefixed-name}", team_name="{team_name}", subagent_type="pact-{specialist-type}", diff --git a/pact-plugin/hooks/bootstrap_gate.py b/pact-plugin/hooks/bootstrap_gate.py index f10f16ed..529c88b3 100755 --- a/pact-plugin/hooks/bootstrap_gate.py +++ b/pact-plugin/hooks/bootstrap_gate.py @@ -2,29 +2,39 @@ """ Location: pact-plugin/hooks/bootstrap_gate.py Summary: PreToolUse hook that blocks code-editing and agent-dispatch tools - (Edit, Write, Task, NotebookEdit) until the bootstrap-complete + (Edit, Write, Agent, NotebookEdit) until the bootstrap-complete marker exists. Used by: hooks.json PreToolUse hook (no matcher — fires for all hookable tools) Layer 3 of the four-layer bootstrap gate enforcement (#401). On each tool call, checks the session-scoped bootstrap-complete marker: - - Marker exists → suppressOutput (sub-ms fast path) + - Marker exists AND its content is a valid stamp → suppressOutput (sub-ms fast path) - Non-PACT session → suppressOutput (no-op) - Teammate → suppressOutput (no-op) - - Code-editing/agent-dispatch tool (Edit, Write, Task, NotebookEdit) → deny + - Code-editing/agent-dispatch tool (Edit, Write, Agent, NotebookEdit) → deny - Operational/exploration tool (Read, Glob, Grep, Bash, WebFetch, WebSearch, AskUserQuestion, ExitPlanMode, any MCP tool) → allow Tool classification rationale: - Blocked tools are structured code modification (Edit, Write) and agent - dispatch (Task, NotebookEdit) actions that shouldn't run before - governance is loaded. The agent-dispatch tool name is `Task` — the - canonical platform name for sub-agent spawning, confirmed by the - matcher='Task' entries in hooks.json (PreToolUse team_guard + - PostToolUse auditor_reminder). + dispatch (Agent, NotebookEdit) actions that shouldn't run before + governance is loaded. The agent-dispatch tool name is `Agent` — the + canonical Claude Code platform name (verified against + code.claude.com/docs/en/agent-teams.md and sub-agents.md as of + 2026-05-06; #662). hooks.json matcher='Agent' entries (PreToolUse + team_guard + PostToolUse auditor_reminder) fire on Agent invocations. + Earlier `Task` literal in this file (commit 4c286c1f, 2026-05-05) + was based on a misread of production matchers — those matchers were + silently NOT firing on spawn events, mistaken for "production + evidence". Resolved in #662. - Bash is ALLOWED because the bootstrap marker-write mechanism itself is a Bash command in bootstrap.md — blocking Bash would create a circular - dependency where the gate can never self-disable. + dependency where the gate can never self-disable. To prevent the + Bash-marker-bypass (a single `touch bootstrap-complete` against an + empty file would otherwise satisfy any presence check), is_marker_set + verifies marker CONTENT via a SHA256 fingerprint over (session_id, + plugin_root, plugin_version, schema_version), not just file presence + — `touch bootstrap-complete` no longer satisfies the gate. - Exploration tools are read-only and needed for state recovery after compaction. - MCP tools are always allowed — they're external integrations that may @@ -32,94 +42,178 @@ - Non-hookable tools (Skill, ToolSearch, TaskList/TaskGet/TaskUpdate, SendMessage) never reach this hook because they don't fire PreToolUse events. Note: TaskList/TaskGet/TaskUpdate are PACT plugin task-system - tools, distinct from the agent-dispatch `Task` tool that IS blocked. + tools, distinct from the agent-dispatch `Agent` tool that IS blocked. -SACROSANCT: every raisable path is wrapped in try/except that defaults to -allow (exit 0 with suppressOutput). A gate bug must never block a tool call. +SACROSANCT (post-#662): module-load failures and runtime gate-logic +exceptions are fail-CLOSED (deny) per #658 defect class. Only malformed +stdin remains fail-OPEN (input-side failure → harness's domain). Input: JSON from stdin with tool_name, tool_input, session_id, etc. Output: JSON with hookSpecificOutput.permissionDecision (deny case) or {"suppressOutput": true} (allow / passthrough) """ +# ─── stdlib first (used by _emit_load_failure_deny BEFORE wrapped imports) ─── import json import os -import stat import sys -from pathlib import Path +from typing import NoReturn + + +def _emit_load_failure_deny(stage: str, error: BaseException) -> NoReturn: + """Emit fail-closed deny for module-load or runtime gate-logic failure. + + Mirrors PR #660 ``merge_guard_pre._emit_load_failure_deny``. Uses ONLY + stdlib (json, sys) so it remains functional even when every wrapped + import below fails. Audit anchor: hookEventName must be present in any + deny output. + """ + print(json.dumps({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": ( + f"PACT bootstrap_gate {stage} failure — blocking for safety. " + f"{type(error).__name__}: {error}. Check hook installation " + "and shared module availability." + ), + } + })) + print( + f"Hook load error (bootstrap_gate / {stage}): {error}", + file=sys.stderr, + ) + sys.exit(2) + + +# ─── fail-closed wrapper around cross-package imports + risky module work ───── +try: + import hashlib + import hmac + import stat + from pathlib import Path + + import shared.pact_context as pact_context + from shared import BOOTSTRAP_MARKER_NAME +except BaseException as _module_load_error: # noqa: BLE001 — fail-closed catch-all + _emit_load_failure_deny("module imports", _module_load_error) -import shared.pact_context as pact_context -from shared import BOOTSTRAP_MARKER_NAME _SUPPRESS_OUTPUT = json.dumps({"suppressOutput": True}) # Code-editing and agent-dispatch tools blocked until bootstrap completes. # Bash is intentionally NOT blocked — the marker-write mechanism in # bootstrap.md is a Bash command, so blocking Bash would prevent the gate -# from ever self-disabling (circular dependency). The agent-dispatch tool -# is `Task` (the canonical platform tool name); cross-evidence in -# hooks.json: PreToolUse team_guard + PostToolUse auditor_reminder both -# use matcher='Task' and fire correctly in production. +# from ever self-disabling (circular dependency). To prevent the +# Bash-marker-bypass exploit (a single `touch bootstrap-complete`), +# is_marker_set verifies marker CONTENT via a SHA256 fingerprint over +# (session_id, plugin_root, plugin_version, schema_version), not just +# file presence. The agent-dispatch tool +# is `Agent` — the canonical Claude Code platform name (#662 corrects +# 4c286c1f's incorrect rename direction). hooks.json matcher='Agent' +# entries fire on Agent invocations. _BLOCKED_TOOLS = frozenset({ "Edit", "Write", - "Task", + "Agent", "NotebookEdit", }) +# Marker schema version. Bump if marker JSON shape changes; verifier +# rejects unknown versions. Producer (commands/bootstrap.md) must emit a +# matching `v` field. +MARKER_SCHEMA_VERSION = 1 + +# Marker file size cap (bytes). The marker JSON is a small fixed schema +# ({v, sid, sig}); a content larger than this is rejected to defend against +# pathological reads. +_MARKER_MAX_BYTES = 256 + _DENY_REASON = ( "PACT bootstrap required. Invoke Skill(\"PACT:bootstrap\") first. " - "Code-editing tools (Edit, Write) and agent dispatch (Task) are blocked " + "Code-editing tools (Edit, Write) and agent dispatch (Agent) are blocked " "until bootstrap completes. Bash, Read, Glob, Grep are available." ) -def is_marker_set(session_dir: Path | None) -> bool: - """Public predicate: does a real bootstrap-complete marker exist? +def _expected_marker_signature(session_id: str, plugin_root: str, + plugin_version: str, marker_version: int) -> str: + """Compute the expected SHA256 marker signature. + + Inputs are joined with `|` separators in a fixed order so the producer + in commands/bootstrap.md and this verifier compute the same digest: + + sha256(f"{session_id}|{plugin_root}|{plugin_version}|{marker_version}") + + The `marker_version` is part of the digest so a format-version bump + invalidates pre-bump markers automatically. + """ + payload = f"{session_id}|{plugin_root}|{plugin_version}|{marker_version}" + return hashlib.sha256(payload.encode("utf-8")).hexdigest() + + +def is_marker_set(session_dir: "Path | None") -> bool: + """Public predicate: does a properly-stamped bootstrap-complete marker exist? Returns True iff `/` exists as a REGULAR FILE (not a symlink, not a directory) AND no ancestor of the - session_dir is a symlink. False on any of: + session_dir is a symlink AND its content is a valid stamp: + - file size ≤ ``_MARKER_MAX_BYTES`` + - parses as JSON object with EXACTLY keys {"v", "sid", "sig"} + - ``v`` is integer == ``MARKER_SCHEMA_VERSION`` + - ``sid`` equals ``session_dir.name`` (binds marker to its session) + - ``sig`` matches ``_expected_marker_signature`` via + ``hmac.compare_digest`` (constant-time compare) + + Returns False on any of: - session_dir is None or falsy - - marker path is a symlink (S2 defense: planted symlink at the - marker would otherwise satisfy `Path.exists()` since exists() - follows symlinks) - - marker path is a directory or other non-regular file - - marker path does not exist - - any ancestor of session_dir is a symlink (S4 defense: a planted - symlink at e.g. ~/.claude redirecting to attacker-controlled - directory would otherwise allow attacker to plant a regular-file - marker satisfying the leaf-only check) - - any OSError on stat (treated as marker-absent so the gate stays - armed) - - The plan §High-Risk-TDD-Specs Q4 names this as a 7-method TDD target; - extracting it as a public callable closes the plan-vs-implementation - gap (architect-review Findings #1 + #14). Callers (current: - `_check_tool_allowed`; future: any session-end audit, sibling hooks) - use this single entry point for the safe-marker-check contract. - - Security rationale: + - marker path is a symlink (S2 defense) + - marker path is a directory or other non-regular file (S2 corollary) + - any ancestor of session_dir is a symlink (S4 defense) + - any OSError on stat (treated as marker-absent) + - marker content fails any of size cap / JSON parse / key set / version / + sid match / signature match + - missing plugin context (cannot compute expected signature) + + Security rationale (symlink defenses unchanged from pre-#662; + content fingerprint added): - S2 (security-engineer-review): `marker_path.exists()` follows - symlinks → attacker with same-user write access plants a symlink - at /bootstrap-complete pointing to any existing - file (e.g., /etc/hostname) → gate falsely satisfied → tool - block bypassed. Replaced with `os.lstat()` + `stat.S_ISREG()` - which checks the leaf without following symlinks. + symlinks → attacker plants a symlink at the marker path → gate + falsely satisfied → tool block bypassed. Defense: `os.lstat()` + + `stat.S_ISREG()` checks the leaf without following symlinks. - S4: leaf-only is_symlink() does not detect ancestor symlinks - (e.g., ~/.claude itself being a symlink to attacker-controlled - /tmp/evil/.claude). `Path.resolve(strict=False)` walks every - ancestor; comparing to the unresolved path detects any + (e.g., ~/.claude itself being a symlink). `Path.resolve(strict=False)` + walks every ancestor; comparing to the unresolved path detects any ancestor-link rewrite. + - Bash-marker-bypass defense (#662): `Bash("touch /bootstrap-complete")` + previously defeated the gate because file PRESENCE was the only + check. The verifier now verifies marker CONTENT bound to + (session_id, plugin_root, plugin_version, marker_version) — a + marker-content provenance check that closes the trivial + Bash-touch bypass. + + The signature is NOT cryptographic provenance. All four + signature inputs are readable from the same-user filesystem + (session_id and plugin_root from pact-session-context.json, + plugin_version from plugin.json, marker_version from this + module's MARKER_SCHEMA_VERSION constant), so a same-user attacker + with Python execution and read access to those files can + recompute the digest. The signature is a fingerprint that + raises attacker effort from a one-line `touch` to a multi-line + script-AND-read sequence and creates a detection surface (the + digest is deterministic, so a forgery that races plugin-version + bumps is observable). It is not a MAC; treat any future + hardening that would require unforgeability as a separate + threat model. """ if not session_dir: return False session_dir = Path(session_dir) - # S4: ancestor-symlink defense. Path.resolve() follows ALL symlinks - # in the path; if the resolved path differs from the input path - # (modulo absolute-form), some ancestor was a symlink. strict=False - # so we don't raise if the marker file itself doesn't exist yet. + # S4: ancestor-symlink defense. Path.resolve() follows ALL symlinks in + # the path; if the resolved path differs from the absolute input path, + # some ancestor was a symlink. strict=False so we don't raise if the + # marker file itself doesn't exist yet. try: resolved = session_dir.resolve(strict=False) except OSError: @@ -130,15 +224,49 @@ def is_marker_set(session_dir: Path | None) -> bool: marker_path = session_dir / BOOTSTRAP_MARKER_NAME # S2: lstat (does NOT follow symlinks) + S_ISREG (regular file only). - # The marker is a sentinel file whose CONTENT is not consumed; the - # contract is "regular file at this exact path". A symlink at the - # marker path is rejected even if it points to a regular file - # elsewhere (the link-target is attacker-chosen). try: st = os.lstat(str(marker_path)) except OSError: return False - return stat.S_ISREG(st.st_mode) + if not stat.S_ISREG(st.st_mode): + return False + + # Verify marker CONTENT (#662 — closes the Bash-touch bypass). + try: + if st.st_size <= 0 or st.st_size > _MARKER_MAX_BYTES: + return False + content = marker_path.read_text(encoding="utf-8").strip() + parsed = json.loads(content) + if not isinstance(parsed, dict): + return False + if set(parsed.keys()) != {"v", "sid", "sig"}: + return False + if not isinstance(parsed["v"], int) or parsed["v"] != MARKER_SCHEMA_VERSION: + return False + if not isinstance(parsed["sid"], str) or parsed["sid"] != session_dir.name: + return False + if not isinstance(parsed["sig"], str): + return False + plugin_root = pact_context.get_plugin_root() + if not plugin_root: + return False + plugin_json_path = Path(plugin_root) / ".claude-plugin" / "plugin.json" + try: + plugin_version = json.loads( + plugin_json_path.read_text(encoding="utf-8") + ).get("version", "") + except (OSError, ValueError): + return False + if not plugin_version: + return False + expected = _expected_marker_signature( + parsed["sid"], plugin_root, plugin_version, parsed["v"] + ) + if not hmac.compare_digest(parsed["sig"], expected): + return False + return True + except (OSError, ValueError, KeyError, TypeError): + return False def _check_tool_allowed(input_data: dict) -> str | None: @@ -149,8 +277,9 @@ def _check_tool_allowed(input_data: dict) -> str | None: """ pact_context.init(input_data) - # Fast path: marker exists (as a regular non-symlink file) → allow - # everything. See `is_marker_set` for S2/S4 defense rationale. + # Fast path: marker exists (as a properly-stamped regular file with + # valid content fingerprint) → allow everything. See `is_marker_set` + # for symlink and bypass defense rationale. session_dir = pact_context.get_session_dir() if not session_dir: return None @@ -184,15 +313,17 @@ def main(): try: input_data = json.load(sys.stdin) except (json.JSONDecodeError, ValueError): + # Malformed stdin → fail-OPEN (input-side failure is harness's domain). + # Cannot evaluate without input; cannot DENY meaningfully. print(_SUPPRESS_OUTPUT) sys.exit(0) try: deny_reason = _check_tool_allowed(input_data) - except Exception: - # Any exception in gate logic → fail-open - print(_SUPPRESS_OUTPUT) - sys.exit(0) + except Exception as e: + # Runtime fail-CLOSED — gate-logic exception must DENY (#658 + # sibling defect class). Pre-#662 this path was fail-OPEN. + _emit_load_failure_deny("runtime", e) if deny_reason: # hookEventName is required by the harness; missing it silently fails open diff --git a/pact-plugin/hooks/bootstrap_prompt_gate.py b/pact-plugin/hooks/bootstrap_prompt_gate.py index 2c3c95bf..e4521484 100755 --- a/pact-plugin/hooks/bootstrap_prompt_gate.py +++ b/pact-plugin/hooks/bootstrap_prompt_gate.py @@ -12,28 +12,71 @@ - Non-PACT session (no context file) → no-op passthrough - Teammate (resolve_agent_name non-empty) → no-op passthrough -SACROSANCT: every raisable path is wrapped in try/except that defaults to -allow (exit 0 with suppressOutput). A gate bug must never block a user prompt. +SACROSANCT (post-#662 module-load fail-closed retrofit): module-load +failures emit an advisory `additionalContext` block at exit 0 — +UserPromptSubmit cannot +DENY the prompt itself, so the strongest signal we can send is to surface +the load-failure to the LLM via additionalContext so the user is informed +and the orchestrator persona can react. Runtime exceptions in gate logic +remain fail-OPEN (suppressOutput) because injecting bootstrap-required +text on a hook-side bug would mislead a healthy session into rebooting. Input: JSON from stdin with hook_event_name, session_id, prompt, etc. Output: JSON with hookSpecificOutput.additionalContext (inject case) or {"suppressOutput": true} (fast path / passthrough) """ +# ─── stdlib first (used by _emit_load_failure_advisory BEFORE wrapped imports) ─ import json import sys -from pathlib import Path +from typing import NoReturn + + +def _emit_load_failure_advisory(stage: str, error: BaseException) -> NoReturn: + """Emit fail-closed advisory for module-load failure. + + UserPromptSubmit cannot DENY the prompt; the strongest available signal + is `additionalContext` injection. Uses ONLY stdlib (json, sys) so it + remains functional even when every wrapped import below fails. Audit + anchor: hookEventName must be present in any structured output. + """ + print(json.dumps({ + "hookSpecificOutput": { + "hookEventName": "UserPromptSubmit", + "additionalContext": ( + f"PACT bootstrap_prompt_gate {stage} failure — the hook " + f"could not verify bootstrap state. {type(error).__name__}: " + f"{error}. Until this is resolved, you should invoke " + 'Skill("PACT:bootstrap") before any code-editing or agent ' + "dispatch action; the companion `bootstrap_gate` PreToolUse " + "will block those tools fail-closed." + ), + } + })) + print( + f"Hook load error (bootstrap_prompt_gate / {stage}): {error}", + file=sys.stderr, + ) + sys.exit(0) + + +# ─── fail-closed wrapper around cross-package imports ─────────────────────── +try: + from pathlib import Path + + import shared.pact_context as pact_context + from bootstrap_gate import is_marker_set + from shared import BOOTSTRAP_MARKER_NAME +except BaseException as _module_load_error: # noqa: BLE001 — fail-closed catch-all + _emit_load_failure_advisory("module imports", _module_load_error) -import shared.pact_context as pact_context -from bootstrap_gate import is_marker_set -from shared import BOOTSTRAP_MARKER_NAME _SUPPRESS_OUTPUT = json.dumps({"suppressOutput": True}) _BOOTSTRAP_INSTRUCTION_TEMPLATE = ( "REQUIRED: Before responding to this message, invoke " 'Skill("PACT:bootstrap"). Code-editing tools (Edit, Write) and agent ' - "dispatch (Task) are mechanically blocked until bootstrap completes. " + "dispatch (Agent) are mechanically blocked until bootstrap completes. " "This loads your operating instructions, governance policy, and " "workflow protocols." "{session_dir_hint}" @@ -61,9 +104,8 @@ def _check_bootstrap_needed(input_data: dict) -> str | None: # Use the same safe-marker-check helper as the sibling # bootstrap_gate.py so both enforcement points share one safe-check - # contract. The helper enforces S2 (planted-symlink-at-marker - # rejection via os.lstat + S_ISREG) + S4 (ancestor-symlink rejection - # via Path.resolve containment). + # contract. The helper enforces leaf-symlink, ancestor-symlink, and + # marker-content fingerprint defenses (post-#662). if is_marker_set(Path(session_dir)): # Bootstrap already done → suppress (zero tokens) return None @@ -90,7 +132,10 @@ def main(): try: instruction = _check_bootstrap_needed(input_data) except Exception: - # Any exception in gate logic → fail-open + # Runtime exception in gate logic → fail-OPEN: injecting + # bootstrap-required text on a hook-side bug would mislead a healthy + # session. Module-load failures are handled separately (advisory) by + # the module-load wrapper above. print(_SUPPRESS_OUTPUT) sys.exit(0) diff --git a/pact-plugin/hooks/dispatch_gate.py b/pact-plugin/hooks/dispatch_gate.py new file mode 100644 index 00000000..c183addc --- /dev/null +++ b/pact-plugin/hooks/dispatch_gate.py @@ -0,0 +1,508 @@ +#!/usr/bin/env python3 +""" +Location: pact-plugin/hooks/dispatch_gate.py +Summary: PreToolUse hook (matcher='Agent') validating PACT specialist + spawns: required name + team_name, name regex/length/reserved + tokens, registered specialist type, session-team match, member + uniqueness, task assignment, and prompt heuristics. +Used by: hooks.json PreToolUse matcher='Agent' (sibling of team_guard.py). + +Closes #662 silent-failure surface: spawning pact-* specialists without +name/team_name, with malformed names, against unregistered subagent_types, +into the wrong team, before TaskCreate, with long inline missions. + +Safety: fail-closed on module-load failure AND on runtime gate-logic +exception (mirrors PR #660 ``_emit_load_failure_deny`` and the +bootstrap_gate analogue). hookEventName always emitted (#658 invariant). +DENY → exit 2 + permissionDecision; ALLOW → suppressOutput + exit 0; +WARN → additionalContext + exit 0 (advisory; runbook validates injection +empirically per architect §7(a) / tests/runbooks/662-dispatch-gate.md). + +Cheapest-rule-first ordering with short-circuit on first non-ALLOW: + ① SOLO_EXEMPT carve-out ⑥ session-team match (decision h) + ② non-pact-* carve-out ⑦ member-name uniqueness in team + ③ name + team_name presence ⑧ task-assigned check + ④ name length/NFKC/regex/reserved ⑨ prompt heuristic (WARN) + ⑤ plugin agents/ + specialist registry + +Every gate decision (ALLOW/DENY/WARN) is journaled. Prompt text is +redacted at the journal-write boundary (sk-/xoxb-/ghp_/AKIA/JWT +patterns) so credentials accidentally pasted into a prompt never persist +to disk; the in-memory ``permissionDecisionReason`` keeps the verbatim +prompt-fragment for the user-facing error. + +Configuration: + ``PACT_DISPATCH_INLINE_MISSION_MODE`` env-var (default ``"warn"``) + controls the inline-mission heuristic disposition (the heuristic that + flags dispatchers inlining mission text into ``prompt=`` instead of + using the canonical "check TaskList" form). Allowed values: + ``"warn"`` advisory ``additionalContext`` (default) + ``"deny"`` blocking deny — flip after the matcher-fidelity + counter-test in ``tests/runbooks/662-dispatch-gate.md`` + confirms ``additionalContext`` is silently dropped under + PreToolUse + ``"shadow"`` journal-only; the trigger is observable in the session + journal but does not WARN or DENY (calibration mode). + Unknown values fall back to ``"warn"``. The other rules are unaffected. + +Input: JSON from stdin (tool_name, tool_input, agent_id, etc.) +Output: stdout JSON per harness contract. +""" + +# ─── stdlib first (used by _emit_load_failure_deny BEFORE wrapped imports) ─ +import json +import sys +import os +from typing import NoReturn + + +_SUPPRESS_OUTPUT = json.dumps({"suppressOutput": True}) + + +def _emit_load_failure_deny(stage: str, error: BaseException) -> NoReturn: + """Stdlib-only fail-closed deny for module-load or runtime gate-logic + failure. Mirrors PR #660 ``merge_guard_pre._emit_load_failure_deny`` + and bootstrap_gate.py analogue. hookEventName MUST be present. + """ + print(json.dumps({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": ( + f"PACT dispatch_gate {stage} failure — blocking for safety. " + f"{type(error).__name__}: {error}. Check hook installation " + "and shared module availability." + ), + } + })) + print( + f"Hook load error (dispatch_gate / {stage}): {error}", + file=sys.stderr, + ) + sys.exit(2) + + +# ─── fail-closed wrapper on cross-package imports ────────────────────────── +try: + import re + import unicodedata + from pathlib import Path + + import shared.pact_context as pact_context + from shared.dispatch_helpers import ( + SOLO_EXEMPT, + is_registered_pact_specialist, + has_task_assigned, + ) + from shared.session_journal import append_event, make_event +except BaseException as _module_load_error: # noqa: BLE001 — fail-closed catch-all + _emit_load_failure_deny("module imports", _module_load_error) + + +# ─── constants ───────────────────────────────────────────────────────────── + +# Name validation. Order: length cap → NFKC normalize → regex → reserved. +# NFKC defends against fullwidth/lookalike chars that pass naive regex. +# The regex requires at least one alphanumeric and forbids leading or +# trailing hyphens, so degenerate names like "-", "--", "-foo", "foo-" +# are rejected. Internal hyphens are permitted; the single-character +# form must itself be alphanumeric. +NAME_REGEX = re.compile(r"^[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$") +NAME_MAX_LENGTH = 64 + +# Reserved tokens (per Task #25 description / security HANDOFF). Names +# that would collide with PACT routing literals or schema actor types. +# Recall pinned memory: ``"team-lead"`` is the canonical lead name AND +# routing literal — a teammate named ``team-lead`` would shadow message +# routing. ``lead`` / ``peer`` / ``user`` / ``external`` are +# ``KNOWN_RESOLVERS`` schema values. ``unknown`` / ``solo`` are +# semantic-reserved. +RESERVED_NAMES = frozenset({ + "team-lead", + "lead", + "user", + "external", + "peer", + "unknown", + "solo", + # Self-completion-exempt names. The task_lifecycle_gate + # short-circuits the lead-only-completion advisory when a teammate's + # owner matches one of these names. If a dispatch were allowed to + # spawn under one of these names, the spawned teammate could + # self-complete tasks without triggering the advisory — bypassing + # lead-only completion authority via name choice. Reject the names + # at spawn time to close that confused-deputy chain. Mirrors + # shared.intentional_wait.SELF_COMPLETE_EXEMPT_AGENTS; the + # cross-module subset invariant is asserted by a regression test. + "secretary", + "pact-secretary", +}) + +# Inline-mission heuristic. Long inline mission OR no TaskList reference +# suggests the dispatcher embedded the mission in the prompt instead of +# the task description (defeats the harvest pipeline). +PROMPT_MAX_LENGTH = 800 +TASK_REFERENCE_PHRASES = ( + "TaskList", + "task list", + "tasks assigned", + "check your tasks", +) + +# Inline-mission mode. Read at module-load from +# ``PACT_DISPATCH_INLINE_MISSION_MODE`` env-var. The internal Python +# identifier is named after the behavior the heuristic checks (whether the +# dispatcher inlined mission text into ``prompt=`` rather than using the +# canonical "check TaskList" form). +# Allowed values: +# ``"warn"`` — emit additionalContext (advisory, default; behavior +# unchanged from initial Commit 2 implementation). +# ``"deny"`` — promote to a blocking deny. Flip to this if the +# post-merge matcher-fidelity counter-test confirms +# additionalContext is silently dropped under PreToolUse +# (architect §7(a), runbook 662-dispatch-gate.md +# inline-mission section). +# ``"shadow"`` — emit a journal event but neither WARN nor DENY +# (first-session safety net for calibration; the gate +# observes without intervening). DENY decisions from the +# other rules still fire normally; only the inline-mission +# heuristic is muted. +# Unknown values fall back to ``"warn"`` so a typo never disables the +# gate's other rules. Default ``"warn"`` preserves Commit 2 behavior. +_ALLOWED_INLINE_MISSION_MODES = frozenset({"warn", "deny", "shadow"}) +INLINE_MISSION_MODE = os.environ.get( + "PACT_DISPATCH_INLINE_MISSION_MODE", "warn", +) +if INLINE_MISSION_MODE not in _ALLOWED_INLINE_MISSION_MODES: + INLINE_MISSION_MODE = "warn" + +# Credential redaction patterns. Applied to the journal-written prompt +# only; the in-memory ``permissionDecisionReason`` keeps the verbatim +# prompt for the dispatcher's debugging. +REDACTION_PATTERNS = ( + # Anthropic API keys, including the sk-ant-api03-... family. Matched + # before the generic sk- prefix so the longer, more specific shape + # is captured cleanly. + re.compile(r"sk-ant-[A-Za-z0-9_-]{20,}"), + # OpenAI-style sk- keys. + re.compile(r"sk-[A-Za-z0-9]{20,}"), + re.compile(r"xoxb-[A-Za-z0-9-]{20,}"), + # GitHub tokens: personal-access (ghp_), OAuth (gho_), user-server + # (ghu_), server-to-server (ghs_), refresh (ghr_). + re.compile(r"gh[oprsu]_[A-Za-z0-9]{20,}"), + # AWS access key id. + re.compile(r"AKIA[A-Z0-9]{16}"), + # Google API keys (39-char total: AIza prefix + 35 chars). + re.compile(r"AIza[A-Za-z0-9_-]{35}"), + # JWT shape: three base64url segments joined with dots. + re.compile(r"\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b"), + # PEM private-key blocks (any flavor: RSA, EC, OPENSSH, plain + # PRIVATE KEY, ENCRYPTED PRIVATE KEY). DOTALL so the body across + # newlines is consumed by the redactor; non-greedy to stop at the + # first END line. + re.compile( + r"-----BEGIN [A-Z ]*PRIVATE KEY-----.*?-----END [A-Z ]*PRIVATE KEY-----", + re.DOTALL, + ), +) + + +# ─── helpers ─────────────────────────────────────────────────────────────── + +def _redact(prompt: str) -> str: + """Scrub credential patterns BEFORE journal write. + + Applied at the journal-write boundary, not at gate-decision boundary + — the user-facing ``permissionDecisionReason`` keeps the verbatim + prompt fragment so the dispatcher can self-diagnose. Only the + on-disk journal entry is redacted. + """ + if not isinstance(prompt, str): + return "" + redacted = prompt + for pat in REDACTION_PATTERNS: + redacted = pat.sub("[REDACTED]", redacted) + return redacted + + +def _team_member_names(team_name: str) -> set[str]: + """Member-roster reader. Read ``~/.claude/teams/{team_name}/config.json`` + and return the set of currently-live member names. Tolerant: any error + returns ``set()`` (no collision detected). + + Private to dispatch_gate (only the uniqueness rule uses it). The + architect §5 contract intentionally did NOT include this in + dispatch_helpers.py because task_lifecycle_gate has no need for the + member roster. + """ + cfg_path = Path.home() / ".claude" / "teams" / team_name / "config.json" + try: + data = json.loads(cfg_path.read_text(encoding="utf-8")) + except (OSError, ValueError): + return set() + members = data.get("members") if isinstance(data, dict) else None + if not isinstance(members, list): + return set() + names: set[str] = set() + for entry in members: + if isinstance(entry, dict): + n = entry.get("name") + if isinstance(n, str) and n: + names.add(n) + return names + + +# ─── pure rule-eval composition (testable without stdin/stdout) ──────────── + +def evaluate_dispatch(tool_input: dict) -> tuple[str, str | None, str | None]: + """Single composition function. Returns ``(decision, reason, rule)``. + + decision ∈ {``"ALLOW"``, ``"DENY"``, ``"WARN"``}. + reason: human-readable explanation (None for ALLOW). + rule: behavioral rule identifier (e.g. ``"name_required"``, + ``"long_inline_mission"``); None for ALLOW or carve-out. Values + describe what the rule checks. + + Cheapest-rule-first ordering with short-circuit on first non-ALLOW. + Pure function — no stdin/stdout, no FS writes, no exceptions raised + to caller. ALL exceptions escape to ``main()`` which routes them + through ``_emit_load_failure_deny`` (runtime fail-closed). + """ + if not isinstance(tool_input, dict): + tool_input = {} + + subagent_type = tool_input.get("subagent_type", "") or "" + name = tool_input.get("name", "") or "" + team_name = tool_input.get("team_name", "") or "" + prompt = tool_input.get("prompt", "") or "" + + # ① Carve-outs — sub-microsecond. SOLO_EXEMPT covers research agents + # (general-purpose / Explore / Plan) that legitimately spawn without + # name/team_name per pinned feedback_direct_agent_calls.md. + if subagent_type in SOLO_EXEMPT: + return ("ALLOW", None, None) + # ② Non-pact-* spawns are not this gate's business — fall through. + if not isinstance(subagent_type, str) or not subagent_type.startswith("pact-"): + return ("ALLOW", None, None) + + # ③ Required string presence on name + team_name. + if not isinstance(name, str) or not name: + return ("DENY", + "PACT dispatch_gate: name= parameter is required for " + "pact-* specialist spawns. See orchestrator persona §11.", + "name_required") + if not isinstance(team_name, str) or not team_name: + return ("DENY", + "PACT dispatch_gate: team_name= parameter is required for " + "pact-* specialist spawns.", + "team_name_required") + + # Normalize team_name to its canonical form (lowercase, stripped) once + # and reuse it for the session-equality check, the team-config read, + # and the task-store read. Without this, the session-equality check + # would compare against a lowercased copy while the filesystem reads + # used the raw value, producing inconsistent behavior on + # case-sensitive filesystems. + team_name = team_name.strip().lower() + + # ④ Name validation. Length cap FIRST (cheap), then NFKC normalization + # (defends against fullwidth/lookalike unicode that would otherwise + # pass the regex), then regex on the NORMALIZED form, then + # reserved-token check on the normalized form. + if len(name) > NAME_MAX_LENGTH: + return ("DENY", + f"PACT dispatch_gate: name length {len(name)} exceeds " + f"limit {NAME_MAX_LENGTH}.", + "name_too_long") + normalized_name = unicodedata.normalize("NFKC", name) + if not NAME_REGEX.match(normalized_name): + return ("DENY", + f"PACT dispatch_gate: name {name!r} must match " + r"^[a-z0-9-]+$ (lowercase alphanumerics + hyphens, " + "checked after NFKC normalization).", + "name_invalid_regex") + if normalized_name in RESERVED_NAMES: + return ("DENY", + f"PACT dispatch_gate: name {name!r} is in the " + "reserved-token set (would collide with a PACT routing " + "literal or schema resolver type). Choose a unique " + "role-descriptive name.", + "name_reserved_token") + + # ⑤ Plugin agents/ presence (cheap stat). Caught BEFORE the registry + # check so a missing plugin install gets the more actionable + # "plugin broken" message rather than "specialist not registered". + plugin_root = pact_context.get_plugin_root() + if not plugin_root or not (Path(plugin_root) / "agents").is_dir(): + return ("DENY", + "PACT dispatch_gate: plugin agents/ directory is " + "unavailable. Plugin install may be broken; check " + "pact-session-context.json plugin_root field.", + "plugin_agents_missing") + # subagent_type registered in the agent registry. Empty registry + # (which would also trigger the plugin_agents_missing rule above) is + # fail-closed by is_registered_pact_specialist. + if not is_registered_pact_specialist(subagent_type): + return ("DENY", + f"PACT dispatch_gate: subagent_type {subagent_type!r} " + "is not a registered PACT specialist (no matching " + "agents/pact-*.md).", + "specialist_not_registered") + + # ⑥ Session-team match with empty-source fail-closed (decision h). + # An adversary passing team_name='' would equal an empty session_team + # if we didn't reject empty session_team upfront — the team_name= + # presence rule above already caught explicit empty team_name on the + # spawn-input side. + session_team = pact_context.get_team_name() + if not session_team: + return ("DENY", + "PACT dispatch_gate: session team_name is unavailable " + "(pact-session-context.json missing or unreadable). " + "Re-run /PACT:bootstrap to restore session context.", + "team_name_unavailable") + if team_name != session_team: + return ("DENY", + f"PACT dispatch_gate: team_name {team_name!r} does not " + f"match current session team {session_team!r}. Use the " + "team name listed in CLAUDE.md §Current Session.", + "team_name_mismatch") + + # ⑦ Name uniqueness against live team members. + members = _team_member_names(team_name) + if name in members: + return ("DENY", + f"PACT dispatch_gate: name {name!r} is already a live " + f"member of team {team_name!r}. Use a unique name " + "(append a numeric suffix or role-descriptor variant).", + "name_not_unique") + + # ⑧ Task assignment — TaskCreate must precede Agent spawn so the + # teammate has work on arrival. + if not has_task_assigned(team_name, name): + return ("DENY", + f"PACT dispatch_gate: no Task assigned to owner={name!r} " + f"in team {team_name!r}. Create Task A (teachback) + " + "Task B (work) before spawn so the teammate has work on " + "arrival.", + "no_task_assigned") + + # ⑨ Inline-mission heuristic. Mode controlled by + # PACT_DISPATCH_INLINE_MISSION_MODE env-var (warn|deny|shadow; default + # warn). Shadow is a calibration mode: the rule fires the journal + # event but returns ALLOW so first-session operators can observe + # trigger frequency without WARN-noise. + if (len(prompt) > PROMPT_MAX_LENGTH + or not any(phrase in prompt for phrase in TASK_REFERENCE_PHRASES)): + msg = (f"PACT dispatch_gate: prompt is long ({len(prompt)} " + f"chars, threshold {PROMPT_MAX_LENGTH}) or lacks a " + "TaskList reference. Mission belongs in the Task " + "description, not the spawn prompt. WARN means STOP and " + "re-dispatch correctly: put the mission in " + "TaskCreate(description=...) and let the teammate read " + "it via TaskList/TaskGet. See orchestrator persona §11.") + if INLINE_MISSION_MODE == "deny": + return ("DENY", msg, "long_inline_mission") + if INLINE_MISSION_MODE == "shadow": + # Journal sees the rule fired; caller treats as ALLOW (no advisory). + return ("ALLOW", msg, "long_inline_mission") + return ("WARN", msg, "long_inline_mission") + + return ("ALLOW", None, None) + + +# ─── main ────────────────────────────────────────────────────────────────── + +def _journal_decision(decision: str, reason: str | None, rule: str | None, + tool_input: dict) -> None: + """Emit one journal event per gate decision. Best-effort sink — + errors are swallowed so the gate's primary decision always stands. + + The ``rule`` field carries a behavioral identifier (e.g. + ``"name_required"``, ``"long_inline_mission"``). + + Note: ``"dispatch_decision"`` is not registered in + ``_REQUIRED_FIELDS_BY_TYPE`` in shared/session_journal.py, so the + schema validator passes via the unknown-type opt-in pass-through + (validator L317-L320). If a future change registers this type with + required fields, update both this call site AND the validator + declaration in tandem. Credential redaction applied to the prompt + fragment BEFORE the journal write so credentials never persist. + """ + try: + prompt = tool_input.get("prompt", "") if isinstance(tool_input, dict) else "" + event = make_event( + "dispatch_decision", + decision=decision, + rule=rule, + subagent_type=tool_input.get("subagent_type") if isinstance(tool_input, dict) else None, + name=tool_input.get("name") if isinstance(tool_input, dict) else None, + team_name=tool_input.get("team_name") if isinstance(tool_input, dict) else None, + reason=reason, + prompt_redacted=_redact(prompt)[:1024], + ) + append_event(event) + except Exception: + # Journal is best-effort; gate decision stands regardless. + pass + + +def main() -> None: + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + # Malformed stdin → fail-OPEN (input-side failure is the harness's + # domain; cannot DENY meaningfully without parsed input). Mirrors + # bootstrap_gate.py and the other PreToolUse gates. + print(_SUPPRESS_OUTPUT) + sys.exit(0) + + if not isinstance(input_data, dict): + print(_SUPPRESS_OUTPUT) + sys.exit(0) + + tool_name = input_data.get("tool_name", "") + if tool_name != "Agent": + # Hook is registered under matcher='Agent' but defensive belt: if + # something else routes here, no-op rather than misclassify. + print(_SUPPRESS_OUTPUT) + sys.exit(0) + + pact_context.init(input_data) + tool_input = input_data.get("tool_input", {}) or {} + + try: + decision, reason, rule = evaluate_dispatch(tool_input) + except Exception as e: + # Runtime fail-closed: a runtime exception in the rule logic is + # the same defect class as #658 — must DENY, must include + # hookEventName. + _emit_load_failure_deny("runtime", e) + + _journal_decision(decision, reason, rule, tool_input) + + if decision == "ALLOW": + print(_SUPPRESS_OUTPUT) + sys.exit(0) + if decision == "DENY": + print(json.dumps({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + })) + sys.exit(2) + # WARN: emit additionalContext, exit 0 (advisory; per architect §7(a) + # the empirical injection-vs-silent-drop is validated post-merge). + print(json.dumps({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "additionalContext": reason, + } + })) + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/pact-plugin/hooks/hooks.json b/pact-plugin/hooks/hooks.json index 33cf5e50..953c8a9b 100644 --- a/pact-plugin/hooks/hooks.json +++ b/pact-plugin/hooks/hooks.json @@ -63,11 +63,15 @@ ] }, { - "matcher": "Task", + "matcher": "Agent", "hooks": [ { "type": "command", "command": "python3 \"${CLAUDE_PLUGIN_ROOT}/hooks/team_guard.py\"" + }, + { + "type": "command", + "command": "python3 \"${CLAUDE_PLUGIN_ROOT}/hooks/dispatch_gate.py\"" } ] }, @@ -184,7 +188,7 @@ ] }, { - "matcher": "Task", + "matcher": "Agent", "hooks": [ { "type": "command", @@ -198,6 +202,10 @@ { "type": "command", "command": "python3 \"${CLAUDE_PLUGIN_ROOT}/hooks/wake_lifecycle_emitter.py\"" + }, + { + "type": "command", + "command": "python3 \"${CLAUDE_PLUGIN_ROOT}/hooks/task_lifecycle_gate.py\"" } ] } diff --git a/pact-plugin/hooks/refresh/patterns.py b/pact-plugin/hooks/refresh/patterns.py index 2e0446b3..4265d2dd 100644 --- a/pact-plugin/hooks/refresh/patterns.py +++ b/pact-plugin/hooks/refresh/patterns.py @@ -8,8 +8,15 @@ Configuration constants are imported from constants.py for maintainability. Supports both dispatch models for backward compatibility: -- Background Task agent: Task(subagent_type="pact-*", run_in_background=true) -- Agent Teams teammate: Task(name="pact-*", team_name="pact-{session_hash}", subagent_type="pact-*") +- Background Agent agent: Agent(subagent_type="pact-*", run_in_background=true) +- Agent Teams teammate: Agent(name="pact-*", team_name="pact-{session_hash}", subagent_type="pact-*") + +Dual-token transcript carve-out (#662): historical session transcripts +recorded the spawn tool as "Task"; current platform writes "Agent". The +transcript-parsing surface (this module + transcript_parser.py) is the +ONE place where both literals are accepted, so old transcripts remain +parseable. Dispatch code (bootstrap_gate, hooks.json matchers, persona, +commands, skills, protocols) is clean Agent only — no dual-naming. """ import re @@ -40,7 +47,7 @@ "STEP_MARKERS", "TERMINATION_SIGNALS", "PACT_AGENT_PATTERN", - "TASK_TOOL_PATTERN", + "SPAWN_TOOL_PATTERN", "SUBAGENT_TYPE_PATTERN", "CONTEXT_EXTRACTORS", "PENDING_ACTION_PATTERNS", @@ -164,15 +171,20 @@ class WorkflowPattern: ], } -# Agent type patterns (for detecting Task tool calls to PACT agents) +# Agent type patterns (for detecting spawn-tool calls to PACT agents) PACT_AGENT_PATTERN = re.compile(r"\bpact-(preparer|architect|backend-coder|frontend-coder|database-engineer|devops-engineer|n8n|test-engineer|security-engineer|qa-engineer|auditor|secretary)(?![\w-])") # Tool call patterns - support both dispatch models: -# - Background Task agent: Task(subagent_type="pact-*", run_in_background=true) -# - Agent Teams teammate: Task(name="pact-*", team_name="pact-{session_hash}", subagent_type="pact-*") +# - Background Agent agent: Agent(subagent_type="pact-*", run_in_background=true) +# - Agent Teams teammate: Agent(name="pact-*", team_name="pact-{session_hash}", subagent_type="pact-*") # where team_name is session-unique (e.g., "pact-0001639f") # Both include subagent_type, so SUBAGENT_TYPE_PATTERN matches either model. -TASK_TOOL_PATTERN = re.compile(r'"name":\s*"Task"', re.IGNORECASE) +# +# Dual-token (#662): the spawn-tool literal in transcripts is either "Task" +# (historical, pre-#662) or "Agent" (current, post-#662). This pattern is +# the dual-token carve-out — transcript-parsing accepts both so old session +# fixtures remain parseable; dispatch code uses the clean "Agent" form only. +SPAWN_TOOL_PATTERN = re.compile(r'"name":\s*"(?:Task|Agent)"', re.IGNORECASE) SUBAGENT_TYPE_PATTERN = re.compile(r'"subagent_type":\s*"([^"]+)"') # Context extraction patterns (for building rich checkpoint context) @@ -202,7 +214,7 @@ class WorkflowPattern: CONFIDENCE_WEIGHTS = { "clear_trigger": 0.4, # Found explicit /PACT:* command "step_marker": 0.2, # Found step marker in content - "agent_invocation": 0.2, # Found Task call to PACT agent + "agent_invocation": 0.2, # Found Agent (or historical Task) call to PACT agent "pending_action": 0.1, # Found pending action indicator "context_richness": 0.1, # Found context elements (PR#, task summary) } diff --git a/pact-plugin/hooks/refresh/transcript_parser.py b/pact-plugin/hooks/refresh/transcript_parser.py index 7c560eb2..c8b89794 100644 --- a/pact-plugin/hooks/refresh/transcript_parser.py +++ b/pact-plugin/hooks/refresh/transcript_parser.py @@ -75,14 +75,18 @@ def get_tool_call(self, name: str) -> ToolCall | None: def has_task_to_pact_agent(self) -> bool: """ - Check if this turn has a Task call to a PACT agent. + Check if this turn has a spawn-tool call to a PACT agent. Supports both dispatch models: - - Background Task agent: subagent_type contains "pact-" + - Background Agent agent: subagent_type contains "pact-" - Agent Teams teammate: name field contains "pact-" (with team_name) + + Dual-token (#662): historical transcripts recorded the spawn tool + as ``"Task"``; current platform writes ``"Agent"``. Both literals + are accepted here so old session fixtures remain parseable. """ for tc in self.tool_calls: - if tc.name == "Task": + if tc.name in ("Task", "Agent"): # Check subagent_type (present in both dispatch models) subagent = tc.input_data.get("subagent_type", "") if "pact-" in subagent: @@ -340,12 +344,15 @@ def find_task_calls_to_agent(turns: list[Turn], agent_pattern: str) -> list[tupl agent_pattern: Pattern to match in subagent_type or name (e.g., "pact-") Returns: - List of (Turn, ToolCall) tuples for matching Task calls + List of (Turn, ToolCall) tuples for matching spawn-tool calls. + + Dual-token (#662): both ``"Task"`` (historical) and ``"Agent"`` (current) + spawn-tool literals are accepted so old session fixtures remain parseable. """ results = [] for turn in turns: for tc in turn.tool_calls: - if tc.name == "Task": + if tc.name in ("Task", "Agent"): # Check subagent_type (present in both dispatch models) subagent = tc.input_data.get("subagent_type", "") if agent_pattern in subagent: diff --git a/pact-plugin/hooks/shared/dispatch_helpers.py b/pact-plugin/hooks/shared/dispatch_helpers.py new file mode 100644 index 00000000..c3bceacc --- /dev/null +++ b/pact-plugin/hooks/shared/dispatch_helpers.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +""" +Location: pact-plugin/hooks/shared/dispatch_helpers.py +Summary: Shared helpers for #662 dispatch_gate.py and task_lifecycle_gate.py. + +Exposes: + - is_registered_pact_specialist(subagent_type) — registry check (one + of the dispatch_gate rules; rejects unregistered pact-* spawns) + - has_task_assigned(team_name, name) — task-assigned check (one of + the dispatch_gate rules; rejects spawn before TaskCreate) + - trustworthy_actor_name(input_data) — actor resolution for the + task_lifecycle_gate self-completion rule + - SOLO_EXEMPT — research/exploration agents that bypass dispatch_gate + - MARKER_SCHEMA_VERSION — bootstrap marker schema version + +Module-load discipline (architect §5.6): + Stdlib imports (json/sys/os) AND _emit_load_failure_deny defined BEFORE + the wrapped try/except BaseException block. The wrapped block catches + cross-package import failures (functools/re/pathlib/shared.pact_context) + and emits a structured DENY with hookEventName=PreToolUse so any + importer (dispatch_gate, task_lifecycle_gate) inherits the fail-closed + contract automatically: if THIS module fails to load, the importer's + own wrapped import block fires its own _emit_load_failure_deny via the + re-raised BaseException. +""" + +import json +import sys +import os +from typing import NoReturn + + +def _emit_load_failure_deny(stage: str, error: BaseException) -> NoReturn: + """Stdlib-only fail-closed deny for module-load failure. + + Mirrors PR #660 ``merge_guard_pre._emit_load_failure_deny`` and the + bootstrap_gate analogue at hooks/bootstrap_gate.py. Uses ONLY stdlib + (json, sys) so it remains functional even when every wrapped import + below fails. Audit anchor: hookEventName must be present in any deny + output (#658 invariant). + """ + print(json.dumps({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": ( + f"PACT dispatch_helpers {stage} failure — blocking for safety. " + f"{type(error).__name__}: {error}. Check hook installation " + "and shared module availability." + ), + } + })) + print( + f"Hook load error (dispatch_helpers / {stage}): {error}", + file=sys.stderr, + ) + sys.exit(2) + + +# ─── fail-closed wrapper around cross-package imports ───────────────────── +try: + import functools + from pathlib import Path + import shared.pact_context as pact_context + from shared.task_utils import iter_team_task_jsons +except BaseException as _module_load_error: # noqa: BLE001 — fail-closed catch-all + _emit_load_failure_deny("module imports", _module_load_error) + + +# ─── constants ───────────────────────────────────────────────────────────── + +# Research/exploration agents that legitimately spawn WITHOUT name/team_name +# (per pinned-memory feedback_direct_agent_calls.md). These bypass the +# dispatch_gate entirely. The specialist registry stays simple — these are caught at +# step ① of evaluate_dispatch in dispatch_gate.py. +SOLO_EXEMPT = frozenset({"general-purpose", "Explore", "Plan"}) + +# Bootstrap marker schema version. Mirror constant from bootstrap_gate.py; bump +# here AND in bootstrap_gate.MARKER_SCHEMA_VERSION if marker JSON shape ever +# changes. Producer (commands/bootstrap.md) reads this same value. +MARKER_SCHEMA_VERSION = 1 + + +# ─── specialist registry ─────────────────────────────────────────────────── + +@functools.lru_cache(maxsize=1) +def _specialist_registry() -> frozenset[str]: + """Glob agents/pact-*.md once per hook subprocess. + + Hook subprocesses are short-lived (per-tool-call); the cache is rebuilt + on every dispatch evaluation. Empty plugin_root or missing agents/ + directory → empty registry → every pact-* dispatch is DENIED + (registry fail-closed). pact-orchestrator.md IS in the glob set; + the "orchestrator is the persona, not a dispatchable specialist" + semantic is enforced at a different layer (system-prompt --agent + flag at session start), not at the registry. + """ + plugin_root = pact_context.get_plugin_root() + if not plugin_root: + return frozenset() + agents_dir = Path(plugin_root) / "agents" + try: + return frozenset(p.stem for p in agents_dir.glob("pact-*.md")) + except OSError: + return frozenset() + + +def is_registered_pact_specialist(subagent_type: str) -> bool: + """True iff subagent_type matches a file at ``agents/pact-*.md``.""" + if not isinstance(subagent_type, str) or not subagent_type: + return False + return subagent_type in _specialist_registry() + + +# ─── task-assigned check ─────────────────────────────────────────────────── + +def has_task_assigned(team_name: str, name: str) -> bool: + """True iff at least one task in ``team_name``'s task store has + ``owner==name`` AND ``status in {"pending", "in_progress"}``. + + Delegates path construction and per-file reading to + ``shared.task_utils.iter_team_task_jsons``, the single source of truth + for per-team task iteration. That helper enforces the canonical + ``~/.claude/tasks/{team_name}/*.json`` layout and applies path-traversal + + symlink-escape defenses; centralizing the path here prevents the + layout duplication that previously caused this gate to read the wrong + directory. TaskList is a harness tool unavailable in subprocess + context, so direct FS read via the helper is the only viable channel. + """ + if not isinstance(team_name, str) or not team_name: + return False + if not isinstance(name, str) or not name: + return False + for data in iter_team_task_jsons(team_name): + if data.get("owner") != name: + continue + if data.get("status") in ("pending", "in_progress"): + return True + return False + + +# ─── actor resolution (for the self-completion rule) ────────────────────── + +def trustworthy_actor_name(input_data: dict) -> str | None: + """Extract actor name from harness-trustworthy ``agent_id`` ONLY. + + Bypasses ``resolve_agent_name``'s Step 1 (agent_name) and Step 4 + (agent_type) fallbacks because they are not strong enough trust + signals for the self-completion check (architect §5.3 / PREPARE + §10). + + Trust contract: ``agent_id`` is harness-set and lives at the top + level of the hook stdin JSON, not inside ``tool_input`` — a teammate + cannot collide with it via crafted tool arguments. Format is + ``"name@team_name"``. + + Returns the ``name`` portion, or None if ``agent_id`` is missing or + malformed (no ``@``). Caller (task_lifecycle_gate self-completion + rule) treats None as "actor unresolvable — DO NOT exempt from the + self-completion advisory". + + Pure function: no FS, no I/O, no exceptions. + """ + if not isinstance(input_data, dict): + return None + agent_id = input_data.get("agent_id") + if not isinstance(agent_id, str) or "@" not in agent_id: + return None + name, _, team = agent_id.partition("@") + if not name or not team: + return None + return name diff --git a/pact-plugin/hooks/task_lifecycle_gate.py b/pact-plugin/hooks/task_lifecycle_gate.py new file mode 100644 index 00000000..f08771ba --- /dev/null +++ b/pact-plugin/hooks/task_lifecycle_gate.py @@ -0,0 +1,469 @@ +#!/usr/bin/env python3 +""" +Location: pact-plugin/hooks/task_lifecycle_gate.py +Summary: PostToolUse hook (matcher='TaskCreate|TaskUpdate') enforcing PACT + lifecycle invariants. Cannot DENY (post-action); emits structural + advisory via additionalContext, plus a metadata writeback for + self-completion violations. +Used by: hooks.json PostToolUse matcher='TaskCreate|TaskUpdate' (#662) + +Self-completion writeback recursion mitigation: the metadata writeback marks +metadata.gate_writeback=true. The gate's first check skips on this marker +so the gate's own write does not re-trigger the self-completion advisory +on itself. + +Safety: fail-closed-as-advisory on module-load failure (PR #660 pattern, +adapted for PostToolUse — cannot DENY, emits advisory + exit 0). +hookEventName always emitted (#658 defense). + +Rule coverage: + - teachback_addblocks_missing — Teachback Task created without + addBlocks=[] + - work_addblockedby_missing — pact-* work Task created without + addBlockedBy=[] + - completion_no_paired_send — Teachback Task completed without paired + wake-SendMessage to owner within the configured window + - handoff_missing — pact-* work Task completed without + metadata.handoff payload + - self_completion — Teammate self-completed a Task without carve-out + → advisory + completion_disputed writeback + - handoff_schema_invalid — metadata.handoff present but malformed + (disjoint with handoff_missing) + - Every gate decision emits a session_journal lifecycle_decision event +""" + +# ─── stdlib first (used by _emit_load_failure_advisory BEFORE wrapped imports) ─ +import json +import os +import sys +from typing import NoReturn + + +_SUPPRESS_OUTPUT = json.dumps({"suppressOutput": True}) + + +def _emit_load_failure_advisory(stage: str, error: BaseException) -> NoReturn: + """Stdlib-only fail-advisory (PostToolUse cannot DENY). + + Mirrors bootstrap_gate._emit_load_failure_deny but for PostToolUse — + advisory output + exit 0 since deny is not a valid PostToolUse verdict. + Uses ONLY stdlib (json, sys) so it remains functional even when every + wrapped import below fails. + """ + output = { + "hookSpecificOutput": { + "hookEventName": "PostToolUse", + "additionalContext": ( + f"PACT task_lifecycle_gate {stage} failure — lifecycle " + f"rule enforcement skipped this turn. " + f"{type(error).__name__}: {error}. Investigate hook " + "installation and shared module availability." + ), + } + } + print(json.dumps(output)) + print( + f"Hook load error (task_lifecycle_gate / {stage}): {error}", + file=sys.stderr, + ) + sys.exit(0) + + +# ─── fail-closed wrapper around cross-package imports ───────────────────────── +try: + import re + import time + from datetime import datetime, timezone + from pathlib import Path + + import shared.pact_context as pact_context + from shared.dispatch_helpers import trustworthy_actor_name + from shared.intentional_wait import ( + SELF_COMPLETE_EXEMPT_AGENTS, + is_self_complete_exempt, + ) + from shared.session_journal import append_event, make_event + from shared.task_utils import read_task_json +except BaseException as _module_load_error: # noqa: BLE001 — fail-closed catch-all + _emit_load_failure_advisory("module imports", _module_load_error) + + +# ─── constants ──────────────────────────────────────────────────────────────── + +# Paired-SendMessage time window (seconds) for the teachback-completion +# rule. Per architect §7 / plan: 120s. +PAIRED_SENDMESSAGE_WINDOW_S = 120 + +# Owner-name carve-out for the self-completion rule is imported from +# shared.intentional_wait (the single source of truth). The dispatch_gate +# RESERVED_NAMES set holds the same names so a teammate can never spawn +# under one of these — see test_self_complete_exempt_agents_are_all_reserved. + +# Required handoff schema fields (advisory if present-but-malformed). +_HANDOFF_REQUIRED_FIELDS = ( + "produced", + "decisions", + "reasoning_chain", + "uncertainty", + "integration", + "open_questions", +) + + +# ─── paired-SendMessage check (for teachback completion) ───────────────────── + + +def _has_paired_sendmessage(owner: str, window_s: int) -> bool: + """Return True iff the owner's inbox file shows ANY message from + team-lead with timestamp within the last `window_s` seconds. + + Inbox path: ~/.claude/teams/{team_name}/inboxes/{owner}.json + Format: JSON array of {from, text, timestamp, ...}. + + Path-traversal defense: owner is sanitized (alphanumeric, '-', '_' only) + to prevent escape from the inboxes directory. team_name comes from + pact_context (harness-set). + + Fail-OPEN: any error reading/parsing returns False (advisory will fire). + Conservative since the goal is to surface a missing wake-message, not + suppress on read errors. + """ + if not owner or not isinstance(owner, str): + return False + if not re.fullmatch(r"[A-Za-z0-9_\-]+", owner): + return False + team_name = pact_context.get_team_name() if hasattr(pact_context, "get_team_name") else "" + if not team_name: + # Best-effort: read from pact_context cache directly + try: + team_name = pact_context.get_pact_context().get("team_name", "") + except Exception: + team_name = "" + if not team_name or not re.fullmatch(r"[A-Za-z0-9_\-]+", team_name): + return False + inbox_path = ( + Path.home() / ".claude" / "teams" / team_name / "inboxes" / f"{owner}.json" + ) + try: + if not inbox_path.is_file(): + return False + content = inbox_path.read_text(encoding="utf-8") + messages = json.loads(content) + except (OSError, ValueError, json.JSONDecodeError): + return False + if not isinstance(messages, list): + return False + + now_utc = datetime.now(timezone.utc) + cutoff_ts = now_utc.timestamp() - window_s + for msg in messages: + if not isinstance(msg, dict): + continue + sender = msg.get("from") + ts_str = msg.get("timestamp") + if not isinstance(sender, str) or sender != "team-lead": + continue + if not isinstance(ts_str, str): + continue + try: + # Accept both Z-suffix and offset forms; datetime.fromisoformat + # handles offsets directly; replace Z with +00:00 for safety. + normalized = ts_str.replace("Z", "+00:00") + ts_dt = datetime.fromisoformat(normalized) + if ts_dt.tzinfo is None: + ts_dt = ts_dt.replace(tzinfo=timezone.utc) + if ts_dt.timestamp() >= cutoff_ts: + return True + except (ValueError, TypeError): + continue + return False + + +# ─── metadata.completion_disputed writeback (direct FS) ────────────────────── + + +def _writeback_dispute(task_id: str) -> bool: + """Set metadata.completion_disputed=true and metadata.gate_writeback=true + on the task JSON, directly via filesystem write (no harness round-trip). + + Per user decision: direct FS write (no CLI shim). Reads via + shared.task_utils.read_task_json (path-traversal safe), mutates metadata, + writes back atomically (.tmp + os.replace). + + The gate_writeback marker is a recursion guard: if anything (the harness + or a future tool) replays the metadata change as a TaskUpdate, this + gate's step ① recursion check will skip it. + + Fail-OPEN by design: any IOError swallowed and returns False — advisory + is still emitted by the caller (architect §4 'failure mode'). Logged + via lifecycle_decision journal event. + + Returns True iff the writeback succeeded. + """ + if not task_id or not isinstance(task_id, str): + return False + sanitized_id = re.sub(r"[/\\]|\.\.", "", task_id) + if not sanitized_id: + return False + try: + team_name = pact_context.get_pact_context().get("team_name", "") + except Exception: + team_name = "" + if not team_name or not re.fullmatch(r"[A-Za-z0-9_\-]+", team_name): + return False + + task = read_task_json(sanitized_id, team_name) + if not task: + return False + + metadata = task.get("metadata") + if not isinstance(metadata, dict): + metadata = {} + task["metadata"] = metadata + metadata["completion_disputed"] = True + metadata["gate_writeback"] = True + + tasks_root = Path.home() / ".claude" / "tasks" / team_name + target = tasks_root / f"{sanitized_id}.json" + tmp = tasks_root / f".{sanitized_id}.json.tmp" + try: + tasks_root.mkdir(parents=True, exist_ok=True, mode=0o700) + tmp.write_text(json.dumps(task), encoding="utf-8") + os.replace(str(tmp), str(target)) + return True + except OSError: + try: + if tmp.exists(): + tmp.unlink() + except OSError: + pass + return False + + +# ─── core evaluation ───────────────────────────────────────────────────────── + + +def _validate_handoff_schema(handoff: object) -> str | None: + """Return None if handoff is well-formed, or a short reason string + describing the schema problem (suitable for advisory text). + """ + if not isinstance(handoff, dict): + return f"metadata.handoff must be object, got {type(handoff).__name__}" + missing = [f for f in _HANDOFF_REQUIRED_FIELDS if f not in handoff] + if missing: + return f"metadata.handoff missing required fields: {', '.join(missing)}" + return None + + +def evaluate_lifecycle(input_data: dict) -> list[tuple[str, str]]: + """Return list of (rule, message) advisory tuples. Empty list → ALLOW + silently. + + The ``rule`` element is a behavioral identifier (e.g. + ``"teachback_addblocks_missing"``, ``"self_completion"``) used in + journal events; the ``message`` element is the human-readable + advisory text emitted via additionalContext. + """ + advisories: list[tuple[str, str]] = [] + tool_name = input_data.get("tool_name", "") + tool_input = input_data.get("tool_input") or {} + tool_response = input_data.get("tool_response") or input_data.get("tool_output") or {} + if not isinstance(tool_input, dict): + tool_input = {} + if not isinstance(tool_response, dict): + tool_response = {} + + # ① Recursion guard (self-completion writeback self-trigger): skip + # silently if THIS update is the gate's own writeback. Checked FIRST + # before any other rule. + incoming_metadata = tool_input.get("metadata") or {} + if isinstance(incoming_metadata, dict) and incoming_metadata.get("gate_writeback") is True: + return [] + + # ② TaskCreate rules — teachback addBlocks + work-task addBlockedBy + if tool_name == "TaskCreate": + subject = (tool_input.get("subject") or "") + subject_upper = subject.upper() if isinstance(subject, str) else "" + owner = tool_input.get("owner") or "" + if not isinstance(owner, str): + owner = "" + + if "TEACHBACK" in subject_upper and not tool_input.get("addBlocks"): + advisories.append(( + "teachback_addblocks_missing", + "PACT task_lifecycle_gate: Teachback Task created without " + "addBlocks=[]. The teachback gate must block " + "the work task (per pact-completion-authority).", + )) + + if ( + "TEACHBACK" not in subject_upper + and owner.startswith("pact-") + and not tool_input.get("addBlockedBy") + ): + advisories.append(( + "work_addblockedby_missing", + f"PACT task_lifecycle_gate: pact-* Task created " + f"(owner={owner!r}) without addBlockedBy=[]. " + "Work tasks must block on teachback acceptance.", + )) + + # ③ TaskUpdate-to-completed rules — paired-send, handoff presence, + # handoff schema, self-completion + if tool_name == "TaskUpdate" and tool_input.get("status") == "completed": + task_id = tool_input.get("taskId", "") or "" + # Final post-state — prefer tool_response.task; fallback to bare dict. + task = tool_response.get("task") if isinstance(tool_response, dict) else None + if not isinstance(task, dict): + # Fall back to disk if harness output didn't include the task. + try: + team_name = pact_context.get_pact_context().get("team_name", "") + except Exception: + team_name = "" + task = read_task_json(task_id, team_name) if team_name else {} + + subject = task.get("subject") or "" + subject_upper = subject.upper() if isinstance(subject, str) else "" + owner = task.get("owner") or "" + if not isinstance(owner, str): + owner = "" + raw_metadata = task.get("metadata") + metadata = raw_metadata if isinstance(raw_metadata, dict) else {} + + # handoff_missing vs handoff_schema_invalid are disjoint per lead + # clarification: + # handoff missing/empty → handoff_missing advisory, skip schema + # check (no payload to validate). + # handoff present but schema malformed → handoff_schema_invalid + # advisory. + is_work_task = "TEACHBACK" not in subject_upper and owner.startswith("pact-") + if is_work_task: + handoff = metadata.get("handoff") + if not handoff: + advisories.append(( + "handoff_missing", + f"PACT task_lifecycle_gate: Task {task_id} " + f"(owner={owner!r}) marked completed without " + "metadata.handoff. HANDOFF synthesis was missed.", + )) + else: + schema_problem = _validate_handoff_schema(handoff) + if schema_problem: + advisories.append(( + "handoff_schema_invalid", + f"PACT task_lifecycle_gate: Task {task_id} " + f"metadata.handoff schema is invalid — {schema_problem}.", + )) + + # Teachback completion requires a paired wake-SendMessage to owner + if "TEACHBACK" in subject_upper and owner: + if not _has_paired_sendmessage(owner, PAIRED_SENDMESSAGE_WINDOW_S): + advisories.append(( + "completion_no_paired_send", + f"PACT task_lifecycle_gate: Teachback Task {task_id} " + f"completed without a paired wake-SendMessage to " + f"{owner!r} in the last {PAIRED_SENDMESSAGE_WINDOW_S}s. " + "blockedBy is pull-only; teammate idles indefinitely.", + )) + + # Teammate self-completion check + actor = trustworthy_actor_name(input_data) + if ( + actor is not None + and owner + and owner == actor + and owner not in SELF_COMPLETE_EXEMPT_AGENTS + and not is_self_complete_exempt(task) + ): + advisories.append(( + "self_completion", + f"PACT task_lifecycle_gate: teammate {actor!r} " + f"self-completed Task {task_id} without carve-out. " + "Lead-only completion authority — see " + "pact-completion-authority.md. Task marked " + "metadata.completion_disputed=true; lead must re-complete " + "intentionally to clear.", + )) + _writeback_dispute(task_id) + + return advisories + + +# ─── journal emit ──────────────────────────────────────────────────────────── + + +def _journal_lifecycle_decision( + input_data: dict, advisories: list[tuple[str, str]] +) -> None: + """Emit a free-form 'lifecycle_decision' journal event. Best-effort — + fail-open if journal append fails (matches existing append_event policy). + + Each advisory is a (rule, message) tuple; the journal records both the + behavioral rule identifiers and the human-readable messages so + downstream tooling can group by rule without parsing prose. + """ + try: + rules = [rule for rule, _ in advisories] + messages = [msg for _, msg in advisories] + event = make_event( + "lifecycle_decision", + tool_name=input_data.get("tool_name", ""), + advisories_count=len(advisories), + rules=rules, + advisories=messages, + verdict="advisory" if advisories else "allow", + ) + append_event(event) + except Exception: + pass + + +# ─── main ──────────────────────────────────────────────────────────────────── + + +def main() -> None: + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + # Malformed stdin → no-op (input-side failure is harness's domain). + print(_SUPPRESS_OUTPUT) + sys.exit(0) + + if not isinstance(input_data, dict): + print(_SUPPRESS_OUTPUT) + sys.exit(0) + + # Short-circuit on tools we don't care about (defensive — matcher should + # already restrict to TaskCreate|TaskUpdate, but PostToolUse hooks can + # be invoked from a co-located matcher entry alongside other tools). + tool_name = input_data.get("tool_name", "") + if tool_name not in ("TaskCreate", "TaskUpdate"): + print(_SUPPRESS_OUTPUT) + sys.exit(0) + + pact_context.init(input_data) + + try: + advisories = evaluate_lifecycle(input_data) + except Exception as e: # noqa: BLE001 — runtime catch-all → advisory + _emit_load_failure_advisory("runtime", e) + return # unreachable; helper exits + + _journal_lifecycle_decision(input_data, advisories) + + if not advisories: + print(_SUPPRESS_OUTPUT) + sys.exit(0) + + output = { + "hookSpecificOutput": { + "hookEventName": "PostToolUse", + "additionalContext": "\n\n".join(msg for _, msg in advisories), + } + } + print(json.dumps(output)) + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/pact-plugin/skills/pact-agent-teams/SKILL.md b/pact-plugin/skills/pact-agent-teams/SKILL.md index 2d38b1af..20c5b237 100644 --- a/pact-plugin/skills/pact-agent-teams/SKILL.md +++ b/pact-plugin/skills/pact-agent-teams/SKILL.md @@ -66,7 +66,7 @@ If `TaskGet` returns no metadata or the referenced task doesn't exist, proceed w The teachback protocol lives in the separate `pact-teachback` skill. It is preloaded into your context via the `skills:` frontmatter on your agent -file at `Task()` subagent spawn. See [pact-teachback/SKILL.md](../pact-teachback/SKILL.md) +file at `Agent()` subagent spawn. See [pact-teachback/SKILL.md](../pact-teachback/SKILL.md) for format, rules, and ordering requirements. Teachback is a **gate**: send it BEFORE any implementation work. Store diff --git a/pact-plugin/tests/helpers.py b/pact-plugin/tests/helpers.py index fb6de2fb..89f36fd7 100644 --- a/pact-plugin/tests/helpers.py +++ b/pact-plugin/tests/helpers.py @@ -181,7 +181,7 @@ def make_tool_use_block( Create a tool_use content block for assistant messages. Args: - name: Tool name (e.g., "Task", "Read", "Write") + name: Tool name (e.g., "Agent", "Read", "Write") input_data: Tool input parameters tool_use_id: Unique ID for the tool call @@ -320,6 +320,43 @@ def make_team_task_call( ) +def make_team_agent_call( + name: str, + team_name: str, + subagent_type: str, + prompt: str = "You are joining the team. Check TaskList for tasks assigned to you.", + tool_use_id: str = "teamagent-123", +) -> dict[str, Any]: + """ + Create an Agent tool call block with team_name for Agent Teams dispatch (#662). + + The current Claude Code platform spawn-tool name is ``Agent``. This factory + produces test fixtures matching the canonical post-#662 dispatch shape. + The historical sibling ``make_team_task_call`` is preserved so the + dual-token transcript parser can be exercised against pre-#662 transcripts. + + Args: + name: Teammate name (e.g., "preparer", "backend-coder") + team_name: Team to join (e.g., "pact-a1b2c3d4") + subagent_type: Agent type (e.g., "pact-backend-coder") + prompt: Thin prompt directing agent to check TaskList + tool_use_id: Unique ID for the tool call + + Returns: + Dict representing an Agent tool_use content block with team_name + """ + return make_tool_use_block( + name="Agent", + input_data={ + "name": name, + "team_name": team_name, + "subagent_type": subagent_type, + "prompt": prompt, + }, + tool_use_id=tool_use_id, + ) + + # ============================================================================= # Transcript Factories # ============================================================================= diff --git a/pact-plugin/tests/runbooks/662-dispatch-gate.md b/pact-plugin/tests/runbooks/662-dispatch-gate.md new file mode 100644 index 00000000..e3a3c58a --- /dev/null +++ b/pact-plugin/tests/runbooks/662-dispatch-gate.md @@ -0,0 +1,303 @@ +# Dispatch-Gate Runbook (#662) + +Manual fresh-session counter-test methodology for the dispatch-protocol +hardening landed in #662 (Commits 1-4). Hooks loaded in the session that +authors them do not fire — these checks MUST run in a fresh session +post-merge of the #662 PR. + +The runbook validates four enforcement surfaces: + +- **Hook-registration fidelity (matcher mutation)** — `matcher='Agent'` actually fires + on `Agent()` invocations. +- **Bash-marker-bypass closure** — `bootstrap_gate.is_marker_set` rejects an + empty/forged `bootstrap-complete` file. +- **Inline-mission advisory injection** — does PreToolUse `additionalContext` reach + the dispatching agent's next turn? (Empirical; informs `PACT_DISPATCH_INLINE_MISSION_MODE` + flip from `warn` → `deny`.) +- **Sabotaged-import fail-closed** — runtime gate-logic exception fail-closes + with `hookEventName="PreToolUse"`. + +After each execution, append a row to +[`RUNBOOK_RUN_DATES.md`](RUNBOOK_RUN_DATES.md) (`## 662-dispatch-gate.md` +section) with the date, operator, plugin version, sections passed, +inline-mission mode setting in effect, and any per-section observations. + +Implementation references: + +- `pact-plugin/hooks/dispatch_gate.py` — PreToolUse `matcher='Agent'` +- `pact-plugin/hooks/task_lifecycle_gate.py` — PostToolUse `matcher='TaskCreate|TaskUpdate'` +- `pact-plugin/hooks/bootstrap_gate.py::is_marker_set` — SHA256 marker-content verifier +- `pact-plugin/hooks/hooks.json` — hook registration matchers +- `pact-plugin/hooks/shared/dispatch_helpers.py` — registry + task-assigned helpers + `MARKER_SCHEMA_VERSION` +- Architect: `docs/architecture/662-dispatch-protocol.md` §7(a) (inline-mission mode rationale), §10 (marker-content verifier) + +--- + +## Prerequisites + +1. #662 PR is squash-merged to `main` and the new plugin version (≥ 4.1.3) + is installed at `~/.claude/plugins/cache/pact-plugin/PACT//`. +2. Start a **fresh** session in a project that has the plugin installed. + Do not reuse the session that authored the merge — its hook + registrations are stale. +3. Confirm hooks are loaded: + ``` + python -c "import json; d=json.load(open('$HOME/.claude/plugins/cache/pact-plugin/PACT/$(ls ~/.claude/plugins/cache/pact-plugin/PACT/ | tail -1)/pact-plugin/hooks/hooks.json')); \ + print([m['matcher'] for m in d['hooks']['PreToolUse']])" + ``` + Expected: list contains both `"Agent"` and `"TaskCreate|TaskUpdate"` + (the latter under PostToolUse, but the parse confirms structural + integrity). +4. Confirm session journal is writable: + ``` + ls -la ~/.claude/pact-sessions///session-journal.jsonl + ``` + +--- + +## Section 1 — Hook-registration fidelity (`matcher='Agent'`) + +**Goal**: confirm `dispatch_gate.py` actually fires when the harness +processes an `Agent()` tool call. + +**Steps**: + +1. In a fresh session with `/PACT:bootstrap` complete (so a team is + resident), provoke a name_required-trip dispatch — `Agent(subagent_type="pact-backend-coder")` + with no `name=` and no `team_name=`. +2. Expect: tool call denied with `permissionDecisionReason` mentioning + `"PACT dispatch_gate: name= parameter is required"` and the cheatsheet hint + `Agent(subagent_type='pact-*', name='', team_name='', ...)`. +3. Inspect the session journal: + ``` + tail -20 ~/.claude/pact-sessions///session-journal.jsonl \ + | python -c 'import sys,json; \ + [print(json.dumps(json.loads(l), indent=2)) for l in sys.stdin if "dispatch_decision" in l]' + ``` +4. Expect: at least one event with `type="dispatch_decision"`, + `decision="DENY"`, `rule="name_required"`. + +**Pass criteria**: + +- [ ] Spawn rejected with name_required message in `permissionDecisionReason`. +- [ ] Journal records `dispatch_decision` event matching the deny. +- [ ] `hookSpecificOutput.hookEventName == "PreToolUse"` in the deny + payload (visible in transcript JSONL if available). + +**Failure signals**: spawn proceeds (gate did not fire), no journal +event written, or `hookEventName` missing from deny payload. + +### 1.1 Counter-test — mutate matcher to `'WrongName'` + +Confirms the matcher value is load-bearing (not fired-on-everything by +some other registration). + +1. In `~/.claude/plugins/cache/pact-plugin/PACT//pact-plugin/hooks/hooks.json`, + edit the PreToolUse `matcher: "Agent"` block to `"WrongName"`. +2. Start ANOTHER fresh session (cache lives across sessions; the edit + takes effect at the next process start). +3. Repeat the name_required-trip dispatch from §1. +4. Expect: spawn proceeds (or fails for unrelated reasons — the gate + does not fire because the matcher does not bind). +5. Inspect journal — no `dispatch_decision` entry for the failed dispatch. + +**Pass criteria**: gate does NOT fire under the mutated matcher. + +**Revert procedure**: restore `matcher: "Agent"` in `hooks.json` BEFORE +proceeding to Section 2 (otherwise §2-§4 produce no signal). Verify with +`grep -c '"matcher": "Agent"' .../hooks/hooks.json` ≥ 3. + +--- + +## Section 2 — Bash-marker-bypass closure (bootstrap_gate marker fingerprint) + +**Goal**: confirm an empty / forged `bootstrap-complete` marker is +rejected by `is_marker_set`'s SHA256 content-fingerprint check. + +**Steps**: + +1. In a fresh session, BEFORE invoking `/PACT:bootstrap`, inspect the + session-dir path that the bootstrap_gate would consult: + ``` + python -c "import shared.pact_context as p; p.init({}); print(p.get_session_dir())" + ``` + (Run from `pact-plugin/hooks/` with `PYTHONPATH` set; or just note + the path emitted in the bootstrap_gate's deny message.) +2. Forge an empty marker via Bash (the Bash-touch attack surface): + ``` + touch ~/.claude/pact-sessions///bootstrap-complete + ``` +3. Run any command that would route through bootstrap_gate (the gate + fires on `_BLOCKED_TOOLS`; an `Agent()` call is sufficient). +4. Expect: gate STILL denies because the marker file content is empty — + The verifier (`is_marker_set`) requires a JSON body with + `v == MARKER_SCHEMA_VERSION`, valid SHA256 sentinel bound to + `(session_id, plugin_root, version)`. + +**Pass criteria**: + +- [ ] Forged empty marker is rejected. +- [ ] Deny message references bootstrap not being complete (or marker + verification failure). +- [ ] Marker remains on disk; session is not silently elevated. + +**Failure signals**: forged marker is accepted (regression — content-fingerprint check broken +or `is_marker_set` reduced to file-presence check); session proceeds as +if bootstrap completed. + +**Revert procedure**: +``` +rm ~/.claude/pact-sessions///bootstrap-complete +``` +Then run `/PACT:bootstrap` to install a properly-stamped marker for the +remainder of the runbook. + +### 2.1 Variant — malformed JSON + +Repeat with the marker file containing the literal string `not-json` (or +a JSON object with `v=999`). Expect rejection — the verifier enforces both schema +shape and `v == MARKER_SCHEMA_VERSION`. + +--- + +## Section 3 — Inline-mission advisory injection (empirical) + +**Goal**: observe whether PreToolUse `additionalContext` actually +reaches the dispatcher's next turn. This is the calibration input for +flipping `PACT_DISPATCH_INLINE_MISSION_MODE` from `warn` (default) to `deny`. + +**Steps**: + +1. With `PACT_DISPATCH_INLINE_MISSION_MODE` unset (default `warn`), provoke an inline-mission + trip — dispatch a properly-named teammate but with a long inline + prompt (≥ 800 chars) AND no `TaskList` / `task list` / + `tasks assigned` / `check your tasks` phrase in the prompt. +2. Expect: tool call SUCCEEDS (inline-mission returns `WARN`, not `DENY`). The gate + emits `additionalContext` per the harness contract. +3. Observe the dispatcher's next turn — does the dispatcher quote / + reference / acknowledge the inline-mission advisory text? +4. Inspect the journal for the `dispatch_decision` event with + `decision="WARN"`, `rule="long_inline_mission"`. + +**Pass criteria** (advisory works): + +- [ ] Spawn succeeds. +- [ ] Journal records `WARN` with `rule="long_inline_mission"`. +- [ ] Dispatcher's next turn references the advisory text (verbatim or + paraphrased) — proves `additionalContext` was injected. + +**Failure signals** (advisory silently dropped): + +- Spawn succeeds, journal records the WARN, but dispatcher proceeds as + if no advisory landed. This is the calibration trigger to flip + `export PACT_DISPATCH_INLINE_MISSION_MODE=deny` in the operator's shell rc. + +### 3.1 Variant — `PACT_DISPATCH_INLINE_MISSION_MODE=shadow` + +``` +export PACT_DISPATCH_INLINE_MISSION_MODE=shadow +``` + +Restart session. Repeat the inline-mission-trip dispatch. + +**Pass criteria**: + +- [ ] Spawn succeeds with no `additionalContext`. +- [ ] Journal still records `dispatch_decision` with `rule="long_inline_mission"`, + decision `"ALLOW"` (the shadow path returns ALLOW so caller + treats it as a normal allow; the journal entry is the calibration + data). + +### 3.2 Variant — `PACT_DISPATCH_INLINE_MISSION_MODE=deny` + +``` +export PACT_DISPATCH_INLINE_MISSION_MODE=deny +``` + +Restart session. Repeat the inline-mission-trip dispatch. + +**Pass criteria**: + +- [ ] Spawn DENIED. +- [ ] `permissionDecisionReason` references the inline-mission message. +- [ ] Journal records `decision="DENY"`, `rule="long_inline_mission"`. + +**Revert**: `unset PACT_DISPATCH_INLINE_MISSION_MODE` for subsequent sections. + +--- + +## Section 4 — Sabotaged-import fail-closed counter-test + +**Goal**: confirm runtime gate-logic exception fail-closes with the +correct `hookEventName`. + +**Steps**: + +1. Run the existing CI test outside-of-session as the structural + counter-test: + ``` + cd pact-plugin && python -m pytest tests/test_dispatch_gate_smoke.py::test_f21_fail_closed_module_load -v + ``` +2. Expect: test passes. The fixture sabotages + `shared/dispatch_helpers.py` via `shutil.copytree` + overwrite under + `PYTHONSAFEPATH=1`, asserts exit code 2 + `hookEventName="PreToolUse"` + + `permissionDecision="deny"`. +3. For an in-session smoke (optional, advisory only): a sabotaged + import in a fresh session would require pre-corrupting the installed + plugin cache, which contaminates the entire install — do NOT attempt + this against the user's live cache. The CI test is the canonical + signal; this runbook section exists to document the surface. + +**Pass criteria**: + +- [ ] CI test passes. +- [ ] No new sabotage attempts performed against the live install. + +**Failure signals**: CI test failure indicates module-load or runtime fail-closed regression — file +a follow-up issue and block the runbook run. + +--- + +## Section 5 — Acceptance summary + +A successful run hits Sections 1, 1.1, 2, 2.1, 3 (mode-default), 3.1, +3.2, 4 — eight discrete checks. Append the result row to +`RUNBOOK_RUN_DATES.md` per the section header below. + +If §3 fails (advisory silently dropped), the mitigation is a config +change, not a code regression: set `PACT_DISPATCH_INLINE_MISSION_MODE=deny` in the +project / user shell environment until the platform behavior changes. +File a tracking issue against the platform repo (not the plugin). + +If §1, §1.1, §2, §2.1, or §4 fails: regression. Revert the offending +commit and file a P1 issue. The dispatch-gate is part of the SACROSANCT +governance surface; do NOT ship a release with these checks failing. + +--- + +## Section 6 — Revert procedure (rollback the entire #662 surface) + +If post-merge dogfooding surfaces a regression that cannot be +remediated in-place: + +1. Tag the affected version on GitHub as `yanked` in release notes. +2. Bump plugin to a `4.2.1` patch that reverts: + - `pact-plugin/hooks/dispatch_gate.py` (delete file) + - `pact-plugin/hooks/task_lifecycle_gate.py` (delete file) + - `pact-plugin/hooks/shared/dispatch_helpers.py` (delete file) + - `pact-plugin/hooks/hooks.json` (revert PreToolUse `matcher='Agent'` + hooks list to `[team_guard.py]` only; revert PostToolUse + `matcher='TaskCreate|TaskUpdate'` to `[wake_lifecycle_emitter.py]` + only) + - `pact-plugin/hooks/bootstrap_gate.py` (revert marker-fingerprint + module-load fail-closed hardening + from Commit 1) +3. Re-run this runbook against the patched version to confirm the + surface is back to pre-#662 behavior. The expected result is that + §1 / §2 / §4 all FAIL (the gates are gone) — this is the signal that + the rollback is complete. +4. Document the regression in a follow-up issue with the journal + evidence captured during the failing run. + +The 4-file version dance applies to the rollback patch as well +(per pinned memory: every plugin-version bump gets a tag + GitHub +release). diff --git a/pact-plugin/tests/runbooks/RUNBOOK_RUN_DATES.md b/pact-plugin/tests/runbooks/RUNBOOK_RUN_DATES.md index fcf6527a..3f18bb75 100644 --- a/pact-plugin/tests/runbooks/RUNBOOK_RUN_DATES.md +++ b/pact-plugin/tests/runbooks/RUNBOOK_RUN_DATES.md @@ -8,6 +8,14 @@ informs the fallback-ladder trigger criteria evaluation across sessions (the lazy-load fidelity dogfooding signal cannot be CI-tested; runbook data is the substitute). +## 662-dispatch-gate.md + +| Run date (UTC) | Operator | Plugin version | Sections passed | inline-mission mode | Notes / counter-test outcomes | +| -------------- | -------- | -------------- | --------------- | ------- | ------------------------------- | +| _pending — execute post-merge in fresh session_ | | 4.1.3 | /8 | warn (default) | matcher-fidelity (§1+§1.1) PASS/FAIL · Bash-touch bypass (§2+§2.1) PASS/FAIL · inline-mission advisory injection (§3) WARN-visible / WARN-dropped → flipped to deny · inline-mission shadow (§3.1) PASS/FAIL · inline-mission deny (§3.2) PASS/FAIL · sabotaged-import fail-closed (§4) PASS/FAIL. | + +Sections-passed denominator is 8 per runbook §5 (§1, §1.1, §2, §2.1, §3, §3.1, §3.2, §4). The inline-mission mode column records whether the empirical observation kept the production default at `warn` or motivated a flip to `deny`. If a section fails, file a follow-up issue and link it in the Notes column. + ## v4.0.0-launch-and-isolation.md | Run date (UTC) | Operator | Plugin version | Sections passed | Notes / fallback-ladder signals | diff --git a/pact-plugin/tests/runbooks/v4.0.0-launch-and-isolation.md b/pact-plugin/tests/runbooks/v4.0.0-launch-and-isolation.md index 9df2ba95..ae4b9304 100644 --- a/pact-plugin/tests/runbooks/v4.0.0-launch-and-isolation.md +++ b/pact-plugin/tests/runbooks/v4.0.0-launch-and-isolation.md @@ -96,7 +96,7 @@ sentinels (check that AT LEAST ONE is present in the orchestrator body): **Steps**: 1. Launch session via Section 1. -2. Dispatch a teammate via `Task(subagent_type="pact-backend-coder", ...)`. +2. Dispatch a teammate via `Agent(subagent_type="pact-backend-coder", ...)`. 3. Capture the spawned teammate's system prompt (via debug logging or the platform's introspection tool). 4. `grep` the captured system prompt for each sentinel string. @@ -227,7 +227,7 @@ directive — that channel was deleted in v4.0.0). **Steps**: 1. Launch via Section 1. -2. Use the orchestrator to dispatch a teammate via `Task(...)` with the +2. Use the orchestrator to dispatch a teammate via `Agent(...)` with the canonical Two-Task Dispatch Shape (Task A teachback + Task B work, blockedBy=[A]). 3. Observe the spawned teammate's first response. It should: diff --git a/pact-plugin/tests/test_agents_structure.py b/pact-plugin/tests/test_agents_structure.py index 2af5d021..a17e9774 100644 --- a/pact-plugin/tests/test_agents_structure.py +++ b/pact-plugin/tests/test_agents_structure.py @@ -43,7 +43,7 @@ def agent_files(): return list(AGENTS_DIR.glob("*.md")) -# Subset of agent files that are TEAMMATES — i.e., spawned via Task() with +# Subset of agent files that are TEAMMATES — i.e., spawned via Agent() with # `skills:` frontmatter preload. Excludes pact-orchestrator.md, which is # delivered via `claude --agent` and has minimal frontmatter (no skills:). TEAMMATE_AGENT_NAMES = EXPECTED_AGENTS - {"pact-orchestrator"} @@ -129,7 +129,7 @@ class TestNoSkillInvocationOnFirstAction: skill) or `Skill("PACT:bootstrap")` (orchestrator-only ritual command). The team protocol, teachback rules, and algedonic content arrive via the - spawn-time skills: frontmatter (preload at Task() spawn). A fossil + spawn-time skills: frontmatter (preload at Agent() spawn). A fossil `Skill("PACT:teammate-bootstrap")` directive in any agent body points at a permanently removed command. A `Skill("PACT:bootstrap")` directive in a teammate body points at the orchestrator-only session-start ritual; only diff --git a/pact-plugin/tests/test_auditor_reminder.py b/pact-plugin/tests/test_auditor_reminder.py index da6fcb0e..112cf5a6 100644 --- a/pact-plugin/tests/test_auditor_reminder.py +++ b/pact-plugin/tests/test_auditor_reminder.py @@ -197,7 +197,7 @@ def test_coder_without_auditor_outputs_json(self, teams_dir, capsys): """Outputs systemMessage JSON when reminder needed.""" _write_team_config(teams_dir, "test-team", []) stdin_data = json.dumps({ - "tool_name": "Task", + "tool_name": "Agent", "tool_input": { "subagent_type": "pact-backend-coder", "team_name": "test-team", @@ -217,7 +217,7 @@ def test_coder_without_auditor_outputs_json(self, teams_dir, capsys): def test_non_coder_silent(self, capsys): """No output for non-coder agent types.""" stdin_data = json.dumps({ - "tool_name": "Task", + "tool_name": "Agent", "tool_input": { "subagent_type": "pact-test-engineer", "team_name": "test-team", @@ -242,7 +242,7 @@ def test_invalid_json_stdin(self): def test_exception_fail_open(self, capsys): """Exits 0 and emits error JSON on unexpected exception.""" stdin_data = json.dumps({ - "tool_name": "Task", + "tool_name": "Agent", "tool_input": { "subagent_type": "pact-backend-coder", "team_name": "test-team", diff --git a/pact-plugin/tests/test_bootstrap_gate.py b/pact-plugin/tests/test_bootstrap_gate.py index 3089b963..8c974d64 100644 --- a/pact-plugin/tests/test_bootstrap_gate.py +++ b/pact-plugin/tests/test_bootstrap_gate.py @@ -5,19 +5,14 @@ Tests cover: _check_tool_allowed() unit tests: -1. Marker exists → None for any tool (fast path) +1. Marker exists (properly stamped with valid content fingerprint) → None for any tool (fast path) 2. No marker + blocked tool (Edit) → deny reason string 3. No marker + blocked tool (Write) → deny reason string -4. No marker + blocked tool (Task) → deny reason string +4. No marker + blocked tool (Agent) → deny reason string 5. No marker + blocked tool (NotebookEdit) → deny reason string 6. No marker + allowed tool (Read) → None -7. No marker + allowed tool (Glob) → None -8. No marker + allowed tool (Grep) → None -9. No marker + allowed tool (Bash) → None (critical: bootstrap needs Bash) -10. No marker + allowed tool (WebFetch) → None -11. No marker + allowed tool (WebSearch) → None -12. No marker + allowed tool (AskUserQuestion) → None -13. No marker + allowed tool (ExitPlanMode) → None +7-13. No marker + allowed tools (Glob, Grep, Bash, WebFetch, WebSearch, + AskUserQuestion, ExitPlanMode) → None 14. No marker + MCP tool → None (mcp__ prefix match) 15. Non-PACT session (no session dir) → None 16. Teammate → None (passthrough) @@ -31,25 +26,45 @@ 22. Non-PACT → exit 0, suppressOutput 23. Teammate → exit 0, suppressOutput -Fail-open (P0): +Fail-OPEN preserved for input-side failures (P0): 24. Malformed stdin → exit 0, suppressOutput 25. Empty stdin → exit 0, suppressOutput -26. Exception in _check_tool_allowed → exit 0, suppressOutput + +Fail-CLOSED for gate-logic exceptions (P0): +26. Exception in _check_tool_allowed → exit 2, deny JSON with hookEventName Error/suppress mutual exclusivity (P0): -27. Error paths never emit systemMessage -28. Deny path emits permissionDecision, not suppressOutput +27. Input-side fail-open paths emit suppressOutput, never systemMessage +28. Deny path (block + runtime fail-closed) emits permissionDecision, not suppressOutput 29. Allow paths emit suppressOutput, not hookSpecificOutput -Blocked tool set completeness (P2): +Blocked tool set completeness (P2 — post-#662): 30. Exactly 4 blocked tools in the set -31. Bash is NOT in blocked set (circular dependency guard) +31. Members are exactly {Edit, Write, Agent, NotebookEdit} +32. Bash is NOT in blocked set (circular dependency guard) +33. Read is NOT in blocked set (exploration tool) Deny reason content (P2): -32. Deny reason mentions Skill("PACT:bootstrap") -33. Deny reason mentions available tools (Bash, Read, Glob, Grep) +34. Deny reason mentions Skill("PACT:bootstrap") +35. Deny reason mentions available tools (Bash, Read, Glob, Grep) + +is_marker_set() — public helper: +36. None / empty session_dir → False +37. Marker absent → False +38. Marker symlink (S2) → False +39. Marker is a directory (S2 corollary) → False +40. Ancestor symlink (S4) → False +41. Properly-stamped marker → True +42. Empty file (legacy `touch` form) → False +43. Marker with wrong sid → False +44. Marker with wrong version → False +45. Malformed JSON marker → False +46. Marker with wrong signature → False +47. Marker with oversized content → False +48. Marker with missing plugin context → False """ +import hashlib import io import json import sys @@ -97,11 +112,44 @@ def _run_main(input_data, capsys): return exc_info.value.code, json.loads(captured.out.strip()) -def _setup_pact_session(monkeypatch, tmp_path, with_marker=False): +def _write_f24_marker(session_dir: Path, plugin_root: Path, + plugin_version: str = "9.9.9", + marker_version: int = 1, + sid: str | None = None, + sig: str | None = None) -> Path: + """Write a properly-stamped marker. Override fields to forge invalid + variants for negative tests. + """ + from bootstrap_gate import MARKER_SCHEMA_VERSION + + real_sid = sid if sid is not None else session_dir.name + real_sig_input = ( + f"{real_sid}|{str(plugin_root).rstrip('/')}|{plugin_version}|{marker_version}" + ) + real_sig = sig if sig is not None else hashlib.sha256( + real_sig_input.encode("utf-8") + ).hexdigest() + payload = {"v": marker_version, "sid": real_sid, "sig": real_sig} + marker = session_dir / BOOTSTRAP_MARKER_NAME + marker.write_text(json.dumps(payload), encoding="utf-8") + # Sanity: caller using default args should produce a valid stamp + # for the current MARKER_SCHEMA_VERSION constant. + if marker_version == 1: + assert MARKER_SCHEMA_VERSION == 1 + return marker + + +def _setup_pact_session(monkeypatch, tmp_path, with_marker=False, + plugin_version="9.9.9"): """Set up a PACT session context with session dir under tmp_path. Monkeypatches Path.home to tmp_path so get_session_dir() returns a path under tmp_path. Returns the session_dir path. + + When ``with_marker=True`` writes a properly-stamped marker (post-#662); + callers that want to test legacy or invalid markers should pass + ``with_marker=False`` and use ``_write_f24_marker`` directly with override + fields. """ import shared.pact_context as ctx_module @@ -110,12 +158,18 @@ def _setup_pact_session(monkeypatch, tmp_path, with_marker=False): session_dir = tmp_path / ".claude" / "pact-sessions" / _SLUG / _SESSION_ID session_dir.mkdir(parents=True, exist_ok=True) + plugin_root = tmp_path / "plugin" + (plugin_root / ".claude-plugin").mkdir(parents=True, exist_ok=True) + (plugin_root / ".claude-plugin" / "plugin.json").write_text( + json.dumps({"version": plugin_version}), encoding="utf-8" + ) + context_file = session_dir / "pact-session-context.json" context_file.write_text(json.dumps({ "team_name": "", "session_id": _SESSION_ID, "project_dir": _PROJECT_DIR, - "plugin_root": "", + "plugin_root": str(plugin_root), "started_at": "2026-01-01T00:00:00Z", }), encoding="utf-8") @@ -123,7 +177,7 @@ def _setup_pact_session(monkeypatch, tmp_path, with_marker=False): monkeypatch.setattr(ctx_module, "_cache", None) if with_marker: - (session_dir / BOOTSTRAP_MARKER_NAME).touch() + _write_f24_marker(session_dir, plugin_root, plugin_version=plugin_version) return session_dir @@ -138,7 +192,7 @@ class TestCheckToolAllowed: # --- Marker exists: fast path --- - @pytest.mark.parametrize("tool_name", ["Edit", "Write", "Task", "NotebookEdit", "Read", "Bash"]) + @pytest.mark.parametrize("tool_name", ["Edit", "Write", "Agent", "NotebookEdit", "Read", "Bash"]) def test_marker_exists_allows_any_tool(self, monkeypatch, tmp_path, tool_name): """Marker exists → None for any tool (including normally-blocked ones).""" from bootstrap_gate import _check_tool_allowed @@ -150,7 +204,7 @@ def test_marker_exists_allows_any_tool(self, monkeypatch, tmp_path, tool_name): # --- No marker: blocked tools --- - @pytest.mark.parametrize("tool_name", ["Edit", "Write", "Task", "NotebookEdit"]) + @pytest.mark.parametrize("tool_name", ["Edit", "Write", "Agent", "NotebookEdit"]) def test_blocked_tools_return_deny_reason(self, monkeypatch, tmp_path, tool_name): """No marker + blocked tool → deny reason string.""" from bootstrap_gate import _check_tool_allowed @@ -237,12 +291,13 @@ def test_teammate_via_agent_id(self, monkeypatch, tmp_path): session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) # Override context to have a team_name + plugin_root = tmp_path / "plugin" context_file = session_dir / "pact-session-context.json" context_file.write_text(json.dumps({ "team_name": "pact-test1234", "session_id": _SESSION_ID, "project_dir": _PROJECT_DIR, - "plugin_root": "", + "plugin_root": str(plugin_root), "started_at": "2026-01-01T00:00:00Z", }), encoding="utf-8") ctx_module._cache = None @@ -320,7 +375,7 @@ def test_allowed_tool_exits_0(self, monkeypatch, tmp_path, capsys): assert output == _SUPPRESS_EXPECTED def test_marker_exists_exits_0(self, monkeypatch, tmp_path, capsys): - """Marker exists → exit 0 (fast path).""" + """Marker exists (properly stamped) → exit 0 (fast path).""" _setup_pact_session(monkeypatch, tmp_path, with_marker=True) exit_code, output = _run_main(_make_input("Edit"), capsys) @@ -350,12 +405,14 @@ def test_teammate_exits_0(self, monkeypatch, tmp_path, capsys): # ============================================================================= -# Fail-open — P0 priority +# Fail-open (input-side) — P0 priority # ============================================================================= -class TestFailOpen: - """P0: Every exception path must fail-open (exit 0, suppressOutput).""" +class TestInputSideFailOpen: + """P0: Malformed/empty stdin remains fail-OPEN — input-side failures + are the harness's domain (cannot evaluate without input). + """ def test_malformed_stdin_json(self, capsys): """Invalid JSON on stdin → fail-open.""" @@ -381,8 +438,20 @@ def test_empty_stdin(self, capsys): captured = capsys.readouterr() assert json.loads(captured.out.strip()) == _SUPPRESS_EXPECTED - def test_exception_in_check_tool_allowed(self, capsys): - """RuntimeError in _check_tool_allowed → fail-open.""" + +# ============================================================================= +# Fail-closed (gate-logic exception) — P0 priority +# ============================================================================= + + +class TestFailClosedGateLogic: + """Runtime fail-closed (#662, post-#658 defect class): runtime exception in + ``_check_tool_allowed`` must DENY (not fail-OPEN). Pre-#662 this path + was fail-OPEN — that was the same defect class as #658. + """ + + def test_exception_in_check_tool_allowed_emits_deny(self, capsys): + """RuntimeError in _check_tool_allowed → exit 2 with structured deny.""" from bootstrap_gate import main with patch( @@ -393,34 +462,13 @@ def test_exception_in_check_tool_allowed(self, capsys): with pytest.raises(SystemExit) as exc_info: main() - assert exc_info.value.code == 0 - captured = capsys.readouterr() - assert json.loads(captured.out.strip()) == _SUPPRESS_EXPECTED - - def test_oserror_in_marker_check_treats_marker_absent(self, monkeypatch, tmp_path, capsys): - """OSError when checking marker → marker treated as absent → blocked - tool denied (gate stays armed). Behavior change from pre-S2 fix: - previously `Path.exists()` raises propagated to the outer except - and fell open with suppressOutput. Now `is_marker_set` catches - OSError internally and returns False — the conservative choice - per S2 trust-boundary rationale ('don't claim the marker is set - when we can't verify it'). The OUTER fail-open contract (any - raisable path → suppressOutput) still holds for genuine - programmer errors above the marker-check layer.""" - from bootstrap_gate import main - - _setup_pact_session(monkeypatch, tmp_path, with_marker=False) - - with patch("os.lstat", side_effect=OSError("disk error")): - with patch("sys.stdin", io.StringIO(json.dumps(_make_input("Edit")))): - with pytest.raises(SystemExit) as exc_info: - main() - - # Edit is a blocked tool + marker absent → exit 2 (deny). assert exc_info.value.code == 2 captured = capsys.readouterr() - output = json.loads(captured.out.strip()) - assert output["hookSpecificOutput"]["permissionDecision"] == "deny" + parsed = json.loads(captured.out.strip()) + hso = parsed["hookSpecificOutput"] + assert hso["hookEventName"] == "PreToolUse" + assert hso["permissionDecision"] == "deny" + assert "RuntimeError" in hso["permissionDecisionReason"] # ============================================================================= @@ -429,8 +477,9 @@ def test_oserror_in_marker_check_treats_marker_absent(self, monkeypatch, tmp_pat class TestErrorSuppressMutualExclusivity: - """P0: These hooks use suppressOutput for fail-open, never systemMessage. - Deny path uses hookSpecificOutput, never suppressOutput.""" + """P0: input-side fail-open uses suppressOutput; deny path (block + runtime fail-closed + fail-closed) uses hookSpecificOutput. systemMessage is never emitted. + """ def test_malformed_stdin_no_system_message(self, capsys): """Malformed stdin → suppressOutput, not systemMessage.""" @@ -445,8 +494,8 @@ def test_malformed_stdin_no_system_message(self, capsys): assert "suppressOutput" in parsed assert "systemMessage" not in parsed - def test_exception_no_system_message(self, capsys): - """Exception → suppressOutput, not systemMessage.""" + def test_gate_logic_exception_no_system_message(self, capsys): + """Runtime fail-closed → hookSpecificOutput, not systemMessage.""" from bootstrap_gate import main with patch( @@ -459,7 +508,7 @@ def test_exception_no_system_message(self, capsys): captured = capsys.readouterr() parsed = json.loads(captured.out.strip()) - assert "suppressOutput" in parsed + assert "hookSpecificOutput" in parsed assert "systemMessage" not in parsed def test_deny_path_no_suppress_output(self, monkeypatch, tmp_path, capsys): @@ -480,12 +529,12 @@ def test_allow_path_no_hook_specific_output(self, monkeypatch, tmp_path, capsys) # ============================================================================= -# Blocked tool set completeness — P2 priority +# Blocked tool set completeness — P2 priority (post-#662) # ============================================================================= class TestBlockedToolSet: - """P2: Verify the blocked tool set is correct and complete.""" + """P2: Verify the blocked tool set is correct and complete (post-#662).""" def test_blocked_set_exact_cardinality(self): """Exactly 4 tools in the blocked set.""" @@ -494,16 +543,26 @@ def test_blocked_set_exact_cardinality(self): assert len(_BLOCKED_TOOLS) == 4 def test_blocked_set_exact_members(self): - """Blocked set contains exactly Edit, Write, Task, NotebookEdit. + """Blocked set contains exactly Edit, Write, Agent, NotebookEdit (#662). - The agent-dispatch tool name is `Task` (the canonical platform - tool). Cross-evidence: hooks.json PreToolUse team_guard + - PostToolUse auditor_reminder both use matcher='Task' and fire - correctly in production. + The agent-dispatch tool name is `Agent` — the canonical Claude Code + platform name (verified against code.claude.com docs as of 2026-05-06). + Earlier `Task` literal (commit 4c286c1f) was the wrong rename + direction; #662 reverts it. """ from bootstrap_gate import _BLOCKED_TOOLS - assert _BLOCKED_TOOLS == frozenset({"Edit", "Write", "Task", "NotebookEdit"}) + assert _BLOCKED_TOOLS == frozenset({"Edit", "Write", "Agent", "NotebookEdit"}) + + def test_blocked_set_does_not_contain_task(self): + """Regression-prevention: Task is NOT in the blocked set (#662). + + Pre-#662, commit 4c286c1f wrongly renamed Agent→Task here. + This test fails-closed if anyone reverses the rename direction. + """ + from bootstrap_gate import _BLOCKED_TOOLS + + assert "Task" not in _BLOCKED_TOOLS def test_bash_not_blocked(self): """Bash must NOT be in blocked set (circular dependency).""" @@ -559,18 +618,19 @@ class TestMarkerLifecycle: """P3: Gate transitions based on marker presence.""" def test_gate_transitions_deny_to_allow(self, monkeypatch, tmp_path, capsys): - """Before marker: deny Edit. After marker: allow Edit.""" + """Before marker: deny Edit. After marker stamp: allow Edit.""" import shared.pact_context as ctx_module session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + plugin_root = tmp_path / "plugin" # Before marker — Edit denied exit_code_before, output_before = _run_main(_make_input("Edit"), capsys) assert exit_code_before == 2 assert "permissionDecision" in output_before.get("hookSpecificOutput", {}) - # Create marker - (session_dir / BOOTSTRAP_MARKER_NAME).touch() + # Write a properly-stamped marker + _write_f24_marker(session_dir, plugin_root, plugin_version="9.9.9") # Reset cache for second call ctx_module._cache = None @@ -586,7 +646,7 @@ def test_repeated_deny_is_consistent(self, monkeypatch, tmp_path, capsys): _setup_pact_session(monkeypatch, tmp_path, with_marker=False) - for tool in ["Edit", "Write", "Task"]: + for tool in ["Edit", "Write", "Agent"]: ctx_module._cache = None exit_code, output = _run_main(_make_input(tool), capsys) @@ -607,23 +667,25 @@ def test_shared_constant_value(self): assert BOOTSTRAP_MARKER_NAME == "bootstrap-complete" def test_bootstrap_md_references_same_marker(self): - """bootstrap.md touch command must reference the shared marker name.""" + """bootstrap.md marker producer must reference the shared marker name.""" bootstrap_md = ( Path(__file__).parent.parent / "commands" / "bootstrap.md" ) content = bootstrap_md.read_text(encoding="utf-8") - assert f"touch \"/{BOOTSTRAP_MARKER_NAME}\"" in content + # The marker producer (#662) writes the marker via python3, but the + # marker file path still embeds BOOTSTRAP_MARKER_NAME literally. + assert BOOTSTRAP_MARKER_NAME in content # ============================================================================= -# is_marker_set — public helper (Arch-M1 + S2 + S4 defense) +# is_marker_set — public helper (leaf-symlink + ancestor-symlink + content-fingerprint defenses) # ============================================================================= class TestIsMarkerSet: - """Public predicate `is_marker_set(session_dir)` — does a real marker - exist? Defends S2 (symlink-planted bypass) + S4 (ancestor symlink). - Plan §High-Risk-TDD-Specs Q4 names this as a 7-method TDD target. + """Public predicate `is_marker_set(session_dir)` — does a properly-stamped + properly-stamped marker exist? Defends symlink-planted bypass + (leaf and ancestor) and Bash-touch bypass (via SHA256 content fingerprint). """ def test_returns_false_when_session_dir_none(self): @@ -641,73 +703,156 @@ def test_returns_false_when_marker_absent(self, tmp_path): assert is_marker_set(tmp_path) is False - def test_returns_true_when_marker_present_as_regular_file(self, tmp_path): + def test_returns_true_when_marker_properly_stamped( + self, monkeypatch, tmp_path + ): + """Only properly-stamped markers satisfy the gate.""" from bootstrap_gate import is_marker_set - (tmp_path / BOOTSTRAP_MARKER_NAME).touch() - assert is_marker_set(tmp_path) is True + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=True) + assert is_marker_set(session_dir) is True - def test_returns_false_when_marker_is_symlink(self, tmp_path): - """S2 attack chain: planted symlink at the marker path pointing - at ANY existing file falsely satisfies `Path.exists()` (which - follows symlinks). The defense uses `os.lstat() + S_ISREG` - which checks the leaf without following the link. + def test_returns_false_when_marker_is_empty_file_legacy_touch( + self, monkeypatch, tmp_path + ): + """Bash-touch bypass closure (#662): legacy `touch bootstrap-complete` + (empty file) MUST NOT satisfy the gate. + """ + from bootstrap_gate import is_marker_set - Reproducer for the bypass-without-defense: - ln -s /etc/hostname /bootstrap-complete - → Path.exists() returns True → gate would allow → BYPASS + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + (session_dir / BOOTSTRAP_MARKER_NAME).touch() + assert is_marker_set(session_dir) is False - With defense: - os.lstat() returns the symlink's own stat → S_ISLNK, not - S_ISREG → returns False → gate stays armed. - """ + def test_returns_false_when_marker_is_symlink(self, monkeypatch, tmp_path): + """S2 attack chain — symlink at the marker path is rejected.""" from bootstrap_gate import is_marker_set - # Plant a real file outside the session dir. + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) target = tmp_path / "decoy_target" target.touch() - # Plant the marker as a symlink to the decoy. - marker = tmp_path / BOOTSTRAP_MARKER_NAME + marker = session_dir / BOOTSTRAP_MARKER_NAME marker.symlink_to(target) assert marker.exists() is True # Path.exists follows symlinks - assert is_marker_set(tmp_path) is False # but is_marker_set rejects + assert is_marker_set(session_dir) is False # but is_marker_set rejects - def test_returns_false_when_marker_is_directory(self, tmp_path): - """S2 corollary: a directory at the marker path is also rejected - (S_ISREG False).""" + def test_returns_false_when_marker_is_directory( + self, monkeypatch, tmp_path + ): + """S2 corollary: a directory at the marker path is rejected.""" from bootstrap_gate import is_marker_set - (tmp_path / BOOTSTRAP_MARKER_NAME).mkdir() - assert is_marker_set(tmp_path) is False + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + (session_dir / BOOTSTRAP_MARKER_NAME).mkdir() + assert is_marker_set(session_dir) is False def test_returns_false_when_ancestor_is_symlink(self, tmp_path): - """S4 attack chain: planted symlink at any ancestor of the - session_dir (e.g., ~/.claude itself being a symlink to attacker- - controlled /tmp/evil/.claude) lets the attacker plant a regular - file marker satisfying the leaf-only check. - - Reproducer: - ln -s /tmp/evil ~/.claude - mkdir -p /tmp/evil/pact-sessions/{slug}/{session_id} - touch /tmp/evil/pact-sessions/{slug}/{session_id}/bootstrap-complete - → leaf is_symlink() returns False → leaf-only gate would allow - - Defense: Path.resolve(strict=False) follows ALL ancestor symlinks - in the path; if the resolved path differs from the absolute input - path, an ancestor was a symlink → reject. - """ + """S4 attack chain: symlinked ancestor is rejected.""" from bootstrap_gate import is_marker_set - # Real session_dir target. real_dir = tmp_path / "real_session_dir" real_dir.mkdir() (real_dir / BOOTSTRAP_MARKER_NAME).touch() - # Symlink the parent directory to the real one. link_dir = tmp_path / "linked_session_dir" link_dir.symlink_to(real_dir) - # The marker IS a real file (via the symlink path) but the path - # has a symlink ancestor → defense rejects. assert (link_dir / BOOTSTRAP_MARKER_NAME).exists() is True + # Both paths fail content-fingerprint validation (empty content + # is not a valid stamp), but + # the ancestor-symlink check fires FIRST and ensures the bypass + # would be rejected even if the content fingerprint were satisfied. assert is_marker_set(link_dir) is False - # Sanity: the real path (no ancestor symlink) IS accepted. - assert is_marker_set(real_dir) is True + assert is_marker_set(real_dir) is False # content fingerprint fails on empty file + + def test_rejects_wrong_sid(self, monkeypatch, tmp_path): + """Marker with mismatched sid (not session_dir.name) rejected.""" + from bootstrap_gate import is_marker_set + + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + plugin_root = tmp_path / "plugin" + _write_f24_marker( + session_dir, plugin_root, plugin_version="9.9.9", sid="wrong-session" + ) + assert is_marker_set(session_dir) is False + + def test_rejects_wrong_version(self, monkeypatch, tmp_path): + """Marker with v != MARKER_SCHEMA_VERSION rejected.""" + from bootstrap_gate import is_marker_set + + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + plugin_root = tmp_path / "plugin" + _write_f24_marker( + session_dir, plugin_root, plugin_version="9.9.9", marker_version=99 + ) + assert is_marker_set(session_dir) is False + + def test_rejects_malformed_json(self, monkeypatch, tmp_path): + """Non-JSON marker content rejected.""" + from bootstrap_gate import is_marker_set + + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + (session_dir / BOOTSTRAP_MARKER_NAME).write_text( + "not json at all", encoding="utf-8" + ) + assert is_marker_set(session_dir) is False + + def test_rejects_extra_keys(self, monkeypatch, tmp_path): + """Marker with keys beyond {v, sid, sig} rejected.""" + from bootstrap_gate import is_marker_set + + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + plugin_root = tmp_path / "plugin" + marker = session_dir / BOOTSTRAP_MARKER_NAME + marker.write_text( + json.dumps({ + "v": 1, "sid": session_dir.name, "sig": "deadbeef", + "extra": "snuck in", + }), + encoding="utf-8", + ) + assert is_marker_set(session_dir) is False + + def test_rejects_wrong_signature(self, monkeypatch, tmp_path): + """Marker with a non-matching SHA256 signature rejected.""" + from bootstrap_gate import is_marker_set + + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + plugin_root = tmp_path / "plugin" + _write_f24_marker( + session_dir, plugin_root, plugin_version="9.9.9", sig="0" * 64 + ) + assert is_marker_set(session_dir) is False + + def test_rejects_oversized_content(self, monkeypatch, tmp_path): + """Marker file > 256 bytes rejected (pathological-read defense).""" + from bootstrap_gate import is_marker_set + + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + # Write a JSON object that's syntactically correct but huge. + marker = session_dir / BOOTSTRAP_MARKER_NAME + big_payload = {"v": 1, "sid": session_dir.name, + "sig": "x" * 1024} + marker.write_text(json.dumps(big_payload), encoding="utf-8") + assert is_marker_set(session_dir) is False + + def test_rejects_when_plugin_root_missing(self, monkeypatch, tmp_path): + """Cannot compute expected signature without plugin context.""" + from bootstrap_gate import is_marker_set + import shared.pact_context as ctx_module + + session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + plugin_root = tmp_path / "plugin" + # Stamp the marker with what WOULD be a valid sig, then break the + # plugin context. + _write_f24_marker(session_dir, plugin_root, plugin_version="9.9.9") + # Remove plugin_root from context (simulate older session_init or + # a corrupted context file). + context_file = session_dir / "pact-session-context.json" + context_file.write_text(json.dumps({ + "team_name": "", + "session_id": _SESSION_ID, + "project_dir": _PROJECT_DIR, + "plugin_root": "", + "started_at": "2026-01-01T00:00:00Z", + }), encoding="utf-8") + ctx_module._cache = None + assert is_marker_set(session_dir) is False diff --git a/pact-plugin/tests/test_bootstrap_prompt_gate.py b/pact-plugin/tests/test_bootstrap_prompt_gate.py index 0aa17548..a3c8c630 100644 --- a/pact-plugin/tests/test_bootstrap_prompt_gate.py +++ b/pact-plugin/tests/test_bootstrap_prompt_gate.py @@ -62,39 +62,53 @@ def _run_main(input_data, capsys): return exc_info.value.code, json.loads(captured.out.strip()) -def _setup_pact_session(monkeypatch, tmp_path, with_marker=False): +def _setup_pact_session(monkeypatch, tmp_path, with_marker=False, + plugin_version="9.9.9"): """Set up a PACT session context with session dir under tmp_path. Monkeypatches Path.home to tmp_path so get_session_dir() returns a path under tmp_path. Writes a context file and patches pact_context - module state. + module state. When ``with_marker=True``, writes a properly-stamped + properly-stamped marker (post-#662); empty `touch` markers no longer satisfy the + gate. Returns the session_dir path. """ + import hashlib import shared.pact_context as ctx_module monkeypatch.setattr(Path, "home", lambda: tmp_path) - # Build session dir path matching what get_session_dir() will compute session_dir = tmp_path / ".claude" / "pact-sessions" / _SLUG / _SESSION_ID session_dir.mkdir(parents=True, exist_ok=True) - # Write context file in the session dir + plugin_root = tmp_path / "plugin" + (plugin_root / ".claude-plugin").mkdir(parents=True, exist_ok=True) + (plugin_root / ".claude-plugin" / "plugin.json").write_text( + json.dumps({"version": plugin_version}), encoding="utf-8" + ) + context_file = session_dir / "pact-session-context.json" context_file.write_text(json.dumps({ "team_name": "", "session_id": _SESSION_ID, "project_dir": _PROJECT_DIR, - "plugin_root": "", + "plugin_root": str(plugin_root), "started_at": "2026-01-01T00:00:00Z", }), encoding="utf-8") - # Patch pact_context module to use this context file monkeypatch.setattr(ctx_module, "_context_path", context_file) monkeypatch.setattr(ctx_module, "_cache", None) if with_marker: - (session_dir / BOOTSTRAP_MARKER_NAME).touch() + sid = session_dir.name + sig = hashlib.sha256( + f"{sid}|{str(plugin_root).rstrip('/')}|{plugin_version}|1".encode() + ).hexdigest() + (session_dir / BOOTSTRAP_MARKER_NAME).write_text( + json.dumps({"v": 1, "sid": sid, "sig": sig}), + encoding="utf-8", + ) return session_dir @@ -302,14 +316,12 @@ def test_oserror_in_marker_check_treats_marker_absent( self, monkeypatch, tmp_path, capsys ): """OSError on the marker check is now caught INSIDE - `is_marker_set` (post R2-B1 / commit 5b12f805) and treated as - marker-absent → bootstrap directive injected, gate stays armed. - Pre-R2-B1: `Path.exists()` raise propagated to the outer except - and produced suppressOutput. Post-R2-B1: bootstrap_prompt_gate - delegates to bootstrap_gate.is_marker_set, which has the same - conservative-fail-closed semantics as the sibling gate (the - Cycle-2 S2 fix established this contract for the gate; R2-B1 - propagates the same contract to the prompt-gate). + `is_marker_set` and treated as marker-absent → bootstrap directive + injected, gate stays armed. Previously, `Path.exists()` raises + propagated to the outer except and produced suppressOutput. The + current contract: bootstrap_prompt_gate delegates to + bootstrap_gate.is_marker_set, which has the same + conservative-fail-closed semantics as the sibling gate. The OUTER fail-open contract still holds for genuine programmer errors above the marker-check layer; this test pins the marker- @@ -390,17 +402,26 @@ class TestMarkerLifecycle: """P3: Marker creation → gate self-disable → idempotent suppress.""" def test_gate_transitions_on_marker_creation(self, monkeypatch, tmp_path, capsys): - """Before marker: inject. After marker: suppress.""" + """Before marker: inject. After marker stamp: suppress.""" + import hashlib import shared.pact_context as ctx_module session_dir = _setup_pact_session(monkeypatch, tmp_path, with_marker=False) + plugin_root = tmp_path / "plugin" # Before marker — should inject _, output_before = _run_main(_make_input(), capsys) assert "hookSpecificOutput" in output_before - # Create marker - (session_dir / BOOTSTRAP_MARKER_NAME).touch() + # Stamp a properly-formed marker (post-#662) + sid = session_dir.name + sig = hashlib.sha256( + f"{sid}|{str(plugin_root).rstrip('/')}|9.9.9|1".encode() + ).hexdigest() + (session_dir / BOOTSTRAP_MARKER_NAME).write_text( + json.dumps({"v": 1, "sid": sid, "sig": sig}), + encoding="utf-8", + ) # Reset cache for second call ctx_module._cache = None diff --git a/pact-plugin/tests/test_cybernetic_cross_references.py b/pact-plugin/tests/test_cybernetic_cross_references.py index cc454a53..a429eca0 100644 --- a/pact-plugin/tests/test_cybernetic_cross_references.py +++ b/pact-plugin/tests/test_cybernetic_cross_references.py @@ -253,7 +253,7 @@ class TestStructuralVerificationDisciplineConsistency: # Rule-carrying surfaces only. The agent body, the protocol anchor, and # its SSOT carry the full rule; orchestrate.md carries the dispatch-primer - # pointer (it is the sole Task() dispatch site for the auditor, per + # pointer (it is the sole Agent() dispatch site for the auditor, per # preparer §a). comPACT.md + pact-workflows.md are `see-X-for-details` # cross-refs, not dispatch primers, so they do NOT carry the discipline # phrase — the cascade invariant does not apply to them. @@ -329,7 +329,7 @@ def test_discipline_reference_colocated_with_git_diff( ) # Dispatch-primer consumer files only. orchestrate.md is the sole auditor - # Task() spawn site (preparer §a); its bullet-list dispatch payload is + # Agent() spawn site (preparer §a); its bullet-list dispatch payload is # read at spawn time by the auditor and carries a brief discipline primer # next to the 'Auditor skipped' anchor. comPACT.md + pact-workflows.md # contain `Auditor skipped` in cross-ref documentation blocks that are diff --git a/pact-plugin/tests/test_dispatch_gate.py b/pact-plugin/tests/test_dispatch_gate.py new file mode 100644 index 00000000..294b26ed --- /dev/null +++ b/pact-plugin/tests/test_dispatch_gate.py @@ -0,0 +1,989 @@ +""" +Comprehensive coverage for dispatch_gate.py — #662 PreToolUse hook. + +Sibling to test_dispatch_gate_smoke.py (the 7 minimum-viable cases). +This file expands every rule landed in the gate into a behavioral matrix. + +Rule coverage: + - name_required — name= missing/empty/whitespace → DENY + - team_name_required — team_name= empty → DENY + - name_too_long / name_invalid_regex / name_reserved_token — name + length/NFKC/regex/reserved-token violations → DENY + - specialist_not_registered — subagent_type not in agent registry → DENY + - team_name_mismatch / team_name_unavailable — team mismatch or empty + session source → DENY + - no_task_assigned — no Task with owner=name → DENY + - long_inline_mission — long inline mission OR no TaskList reference, + disposition controlled by PACT_DISPATCH_INLINE_MISSION_MODE + (warn|deny|shadow) → WARN | DENY | ALLOW(shadow) + - name_not_unique — name already in team config members → DENY + - plugin_agents_missing — plugin_root agents/ directory missing → DENY + - Runtime gate-logic exception → fail-closed DENY (covered via + subprocess in smoke) + - Journal: every decision (ALLOW + WARN + DENY) emits a + dispatch_decision event with rule + verdict + - Prompt redaction at the journal-write boundary + - Carve-outs — SOLO_EXEMPT / non-pact-* subagent_type → ALLOW + - Anti-sprawl — single evaluate_dispatch composition + +Disciplines applied: + - PR #660 R2: never pop shared.* from sys.modules in this test process. + Subprocess sabotage for runtime fail-closed lives in the smoke file + using PYTHONSAFEPATH. + - #638 cardinality: each rule's deny is asserted by behavioral rule + identifier (e.g. ``"name_required"``, ``"long_inline_mission"``), not + deny string equality, so wording iterations don't cause test churn. + - feedback_no_planning_artifact_test_names: rule names describe + behavior, not provenance. + - Credential literals in redaction tests are split via Python + adjacent-string-literal concatenation so the repo-root pre-commit + secret-scanner does not false-positive on this fixture. +""" + +import io +import json +import sys +from pathlib import Path +from unittest.mock import patch + +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent / "hooks")) + + +_SUPPRESS_EXPECTED = {"suppressOutput": True} +_TEAM = "pact-test" +_NAME = "tester" + + +# ============================================================================= +# Helpers +# ============================================================================= + + +def _make_input( + subagent_type="pact-architect", + name=_NAME, + team_name=_TEAM, + prompt="Standard mission. Check TaskList for tasks assigned to you.", +): + return { + "hook_event_name": "PreToolUse", + "session_id": "test-session", + "tool_name": "Agent", + "tool_input": { + "subagent_type": subagent_type, + "name": name, + "team_name": team_name, + "prompt": prompt, + }, + } + + +def _run_main(input_data, capsys): + """Invoke dispatch_gate.main() in-process. Returns (exit_code, stdout_json).""" + from dispatch_gate import main + + with patch("sys.stdin", io.StringIO(json.dumps(input_data))): + with pytest.raises(SystemExit) as exc_info: + main() + + captured = capsys.readouterr() + out = captured.out.strip() + return exc_info.value.code, json.loads(out) if out else {} + + +def _setup_session(monkeypatch, tmp_path, plugin_root: Path, team_name=_TEAM): + """Wire pact_context to point at a tmp session, set HOME so + has_task_assigned + _team_member_names read tmp dirs. + """ + import shared.pact_context as ctx_module + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + ctx_path = tmp_path / "pact-session-context.json" + ctx_path.write_text( + json.dumps( + { + "team_name": team_name, + "session_id": "test-session", + "project_dir": str(tmp_path / "project"), + "plugin_root": str(plugin_root), + "started_at": "2026-01-01T00:00:00Z", + } + ), + encoding="utf-8", + ) + monkeypatch.setattr(ctx_module, "_context_path", ctx_path) + monkeypatch.setattr(ctx_module, "_cache", None) + monkeypatch.setattr(ctx_module, "init", lambda input_data: None) + + import shared.dispatch_helpers as dh + + dh._specialist_registry.cache_clear() + + +def _seed_plugin(plugin_root: Path, agents=("pact-architect",)): + agents_dir = plugin_root / "agents" + agents_dir.mkdir(parents=True, exist_ok=True) + for stem in agents: + (agents_dir / f"{stem}.md").write_text(f"---\nname: {stem}\n---\n") + + +def _seed_team(home: Path, team_name=_TEAM, members=(), tasks=()): + """Write fake team config + canonical tasks store. + + config.json lives under ``HOME/.claude/teams/{team_name}/`` (the + ``_team_member_names`` read path). Task files live under + ``HOME/.claude/tasks/{team_name}/`` (the canonical task store per + ``shared/task_utils.py``, which is what ``has_task_assigned`` reads + after the path-alignment fix). + """ + team_dir = home / ".claude" / "teams" / team_name + team_dir.mkdir(parents=True, exist_ok=True) + (team_dir / "config.json").write_text( + json.dumps( + { + "team_name": team_name, + "members": [{"name": m} for m in members], + } + ), + encoding="utf-8", + ) + tasks_dir = home / ".claude" / "tasks" / team_name + tasks_dir.mkdir(parents=True, exist_ok=True) + for i, (owner, status) in enumerate(tasks): + (tasks_dir / f"task_{i}.json").write_text( + json.dumps({"id": str(i), "owner": owner, "status": status}), + encoding="utf-8", + ) + + +def _full_setup( + monkeypatch, + tmp_path, + *, + agents=("pact-architect",), + members=(), + tasks=((_NAME, "pending"),), + team_name=_TEAM, +): + """One-call setup: plugin agents/, session context, team config + tasks.""" + plugin_root = tmp_path / "plugin" + _seed_plugin(plugin_root, agents=agents) + _setup_session(monkeypatch, tmp_path, plugin_root, team_name=team_name) + _seed_team(tmp_path, team_name=team_name, members=members, tasks=tasks) + return plugin_root + + +def _capture_journal(monkeypatch): + """Replace append_event in both shared.session_journal and dispatch_gate + so every emit goes into a captured list. Returns the list. + """ + captured: list[dict] = [] + + def _capture(event): + captured.append(event) + return True + + import shared.session_journal as sj + monkeypatch.setattr(sj, "append_event", _capture) + import dispatch_gate + monkeypatch.setattr(dispatch_gate, "append_event", _capture) + return captured + + +# ============================================================================= +# name_required — name= absent / empty / whitespace +# ============================================================================= + + +@pytest.mark.parametrize( + "name_input", + [ + "", # empty string + " ", # whitespace-only — also fails the regex rule + "\t", # tab-only + ], + ids=["empty_string", "whitespace_only", "tab_only"], +) +def test_or_regex_deny_when_name_is_empty_or_whitespace( + name_input, tmp_path, monkeypatch, capsys +): + """name_required (empty) or name_invalid_regex (whitespace fails regex) both DENY. Either rule is + acceptable — the load-bearing invariant is that an unusable name is + rejected with hookEventName=PreToolUse and exit 2. + """ + _full_setup(monkeypatch, tmp_path) + code, out = _run_main(_make_input(name=name_input), capsys) + assert code == 2 + hso = out["hookSpecificOutput"] + assert hso["hookEventName"] == "PreToolUse" + assert hso["permissionDecision"] == "deny" + reason = hso["permissionDecisionReason"] + assert ("name_required" in reason) or ("name= parameter is required" in reason) \ + or ("name_invalid_regex" in reason) or ("must match" in reason) + + +def test_deny_when_name_key_missing(tmp_path, monkeypatch, capsys): + """tool_input lacks the name key entirely — gate treats as empty → name_required DENY.""" + _full_setup(monkeypatch, tmp_path) + payload = _make_input() + del payload["tool_input"]["name"] + code, out = _run_main(payload, capsys) + assert code == 2 + assert "name= parameter is required" in out["hookSpecificOutput"]["permissionDecisionReason"] + + +# ============================================================================= +# team_name_required — team_name= empty +# ============================================================================= + + +def test_deny_when_team_name_key_missing(tmp_path, monkeypatch, capsys): + """tool_input lacks team_name → team_name_required DENY (caught BEFORE the session-team check).""" + _full_setup(monkeypatch, tmp_path) + payload = _make_input() + del payload["tool_input"]["team_name"] + code, out = _run_main(payload, capsys) + assert code == 2 + assert "team_name= parameter is required" in out["hookSpecificOutput"]["permissionDecisionReason"] + + +# ============================================================================= +# name validation — regex / length cap / NFKC / reserved tokens +# ============================================================================= + + +def test_deny_when_name_exceeds_64_char_cap(tmp_path, monkeypatch, capsys): + """Length cap fires BEFORE regex (cheap-first ordering).""" + _full_setup(monkeypatch, tmp_path) + long_name = "a" * 65 + code, out = _run_main(_make_input(name=long_name), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "exceeds limit" in reason + assert "length" in reason.lower() + + +def test_allows_name_at_64_char_boundary(tmp_path, monkeypatch, capsys): + """64 chars is the max permitted (boundary <=). Combined with the no_task_assigned check below + we need a task with owner=long_name OR confirm the name-length check itself + passes. Use a 64-char name + seed task with that owner. + """ + long_name = "a" * 64 + _full_setup( + monkeypatch, + tmp_path, + members=(), + tasks=((long_name, "pending"),), + ) + code, out = _run_main(_make_input(name=long_name), capsys) + # Either ALLOW (name validation passed, all other rules pass) or some unrelated DENY, + # but NOT a name-length DENY — that's the load-bearing assertion. + if code == 2: + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "exceeds limit" not in reason + assert "must match" not in reason + assert "reserved-token" not in reason + else: + assert code == 0 + + +@pytest.mark.parametrize( + "bad_name", + [ + "BadName", # uppercase + "has space", # space + "has_underscore", # underscore + "trailing-", # degenerate: trailing hyphen + "-leading", # degenerate: leading hyphen + "--", # degenerate: only hyphens + "-", # degenerate: single hyphen + "(parens)", # parens + "with\nnewline", # newline + "наме", # Cyrillic — fails regex even after NFKC + "​zero-width", # zero-width-space prefix + ], + ids=[ + "uppercase", + "space", + "underscore", + "trailing_dash", + "leading_dash", + "only_hyphens", + "single_hyphen", + "parens", + "newline", + "cyrillic", + "zero_width", + ], +) +def test_deny_invalid_name_chars(bad_name, tmp_path, monkeypatch, capsys): + """NFKC normalize then regex check — none of these survive. + + The regex requires at least one alphanumeric and forbids leading or + trailing hyphens; degenerate names like "-", "--", "-foo", "foo-" + are rejected by name_invalid_regex. + """ + _full_setup(monkeypatch, tmp_path) + code, out = _run_main(_make_input(name=bad_name), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "must match" in reason + + +def test_deny_fullwidth_lookalike_after_nfkc(tmp_path, monkeypatch, capsys): + """Fullwidth digits/letters NFKC-normalize to ASCII, but the regex check + runs on the NORMALIZED form. We want to assert that the LOOKALIKE shape + is rejected by SOME rule — either name validation (if NFKC produces non-regex chars) + or another rule. The load-bearing invariant: a name with fullwidth chars + cannot smuggle through. + + Implementation note: dispatch_gate normalizes BEFORE regex. test + (fullwidth) NFKC-normalizes to "test", which IS valid ASCII. Per the + impl, this name is therefore ACCEPTED by name validation. That is acceptable + behavior — fullwidth lookalikes that legitimately normalize to a + safe lowercase ASCII identifier are not security-sensitive. This test + pins the empirical observation rather than an idealized expectation. + """ + _full_setup( + monkeypatch, + tmp_path, + tasks=(("test", "pending"),), + ) + fullwidth = "test" + code, _out = _run_main(_make_input(name=fullwidth), capsys) + # Either accepted (NFKC → "test" matches regex, has task) or denied for + # an unrelated rule. Never name-regex denied since NFKC produces ASCII. + assert code in (0, 2) + + +@pytest.mark.parametrize( + "reserved", + [ + "team-lead", + "lead", + "user", + "external", + "peer", + "unknown", + "solo", + # Self-completion-exempt names. Spawning under either of these + # would let the spawned teammate self-complete tasks without + # triggering the lead-only-completion advisory in + # task_lifecycle_gate (the advisory short-circuits on owner ∈ + # SELF_COMPLETE_EXEMPT_AGENTS). Reserving them at spawn time + # closes that confused-deputy bypass. + "secretary", + "pact-secretary", + ], +) +def test_deny_reserved_token(reserved, tmp_path, monkeypatch, capsys): + """Reserved tokens DENY even though they pass the regex.""" + _full_setup(monkeypatch, tmp_path) + code, out = _run_main(_make_input(name=reserved), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "reserved-token" in reason + + +def test_self_complete_exempt_agents_are_all_reserved(): + """Cross-module subset invariant. + + Every agent name that the lifecycle gate allows to self-complete + MUST also be in dispatch_gate.RESERVED_NAMES, so a dispatch can + never spawn under one of those names. If a future change adds an + agent to the exempt set without also adding it to RESERVED_NAMES, + this test fails — closing the confused-deputy bypass before it + ships. + + Categorical pattern. This exempt-vs-reserved pairing is a specific + instance of a general rule: any future privilege class keyed on + owner-name (audit-only, signing-authority, telemetry-immune, + quota-exempt, anything else that gates an action by matching a + teammate name) MUST (a) live in a shared module so the privilege + membership is the single source of truth, and (b) carry its own + ⊆ RESERVED_NAMES subset assertion so a dispatch can never spawn + under one of those names. The lifecycle gate's + SELF_COMPLETE_EXEMPT_AGENTS is the first such class; this test is + the template for any future ones. Adding a new privilege class + without the subset assertion re-opens the same defect class + (spawn-under-privileged-name → confused-deputy bypass). + """ + import dispatch_gate + from shared.intentional_wait import SELF_COMPLETE_EXEMPT_AGENTS + + missing = SELF_COMPLETE_EXEMPT_AGENTS - dispatch_gate.RESERVED_NAMES + assert not missing, ( + f"SELF_COMPLETE_EXEMPT_AGENTS contains names not in " + f"dispatch_gate.RESERVED_NAMES: {sorted(missing)}. Spawning " + "under any of these would let the spawned teammate " + "self-complete tasks without triggering the lead-only-completion " + "advisory. Add them to RESERVED_NAMES to close the bypass." + ) + + +# ============================================================================= +# specialist_not_registered — subagent_type not in registry +# ============================================================================= + + +def test_deny_when_subagent_type_not_in_registry(tmp_path, monkeypatch, capsys): + """pact-nonexistent doesn't appear in agents/ glob → specialist_not_registered DENY.""" + _full_setup(monkeypatch, tmp_path, agents=("pact-architect",)) + code, out = _run_main(_make_input(subagent_type="pact-nonexistent"), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "not a registered PACT specialist" in reason + + +# ============================================================================= +# team_name_mismatch / team_name_unavailable — team_name mismatch or empty session source +# ============================================================================= + + +def test_deny_when_team_name_mismatch(tmp_path, monkeypatch, capsys): + """Spawn passes team_name='wrong-team' but session is 'pact-test' → team_name_mismatch DENY.""" + _full_setup(monkeypatch, tmp_path) + code, out = _run_main(_make_input(team_name="wrong-team"), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "does not match current session team" in reason + + +def test_deny_when_session_team_unavailable(tmp_path, monkeypatch, capsys): + """Empty-source decision (architect §7(h)): when session context has + empty team_name, fail-closed — adversary passing team_name='' would + otherwise equal the empty session value. + + The team_name_required rule catches the explicit empty team_name on + the spawn-input side BEFORE this rule runs, so we exercise this with a + non-empty spawn team_name + against an empty session team_name. + """ + plugin_root = tmp_path / "plugin" + _seed_plugin(plugin_root) + _setup_session(monkeypatch, tmp_path, plugin_root, team_name="") + _seed_team(tmp_path, members=(), tasks=((_NAME, "pending"),)) + code, out = _run_main(_make_input(team_name=_TEAM), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "session team_name is unavailable" in reason + + +# ============================================================================= +# no_task_assigned — spawn before TaskCreate +# ============================================================================= + + +def test_deny_when_no_task_for_owner(tmp_path, monkeypatch, capsys): + """No task exists with owner=tester → no_task_assigned DENY.""" + _full_setup(monkeypatch, tmp_path, tasks=()) # zero tasks + code, out = _run_main(_make_input(), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "no Task assigned" in reason + + +def test_deny_when_task_owner_differs(tmp_path, monkeypatch, capsys): + """Task exists but for a different owner → no_task_assigned DENY (still no task for tester).""" + _full_setup(monkeypatch, tmp_path, tasks=(("other-agent", "pending"),)) + code, out = _run_main(_make_input(), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "no Task assigned" in reason + + +def test_deny_when_task_completed_only(tmp_path, monkeypatch, capsys): + """Only completed tasks count as 'no active task'. has_task_assigned + requires status in {pending, in_progress}. + """ + _full_setup(monkeypatch, tmp_path, tasks=((_NAME, "completed"),)) + code, out = _run_main(_make_input(), capsys) + assert code == 2 + assert "no Task assigned" in out["hookSpecificOutput"]["permissionDecisionReason"] + + +# ============================================================================= +# long_inline_mission — long inline mission / no TaskList ref / mode tri-state +# ============================================================================= + + +def test_warn_when_prompt_lacks_task_reference(tmp_path, monkeypatch, capsys): + """Default mode is 'warn' → ALLOW with additionalContext advisory. + INLINE_MISSION_MODE was read at module-load BEFORE this test — we don't + override it here; default is 'warn' unless a prior test set the env var. + """ + import dispatch_gate + + monkeypatch.setattr(dispatch_gate, "INLINE_MISSION_MODE", "warn") + _full_setup(monkeypatch, tmp_path) + short_no_taskref = "Do the thing." + code, out = _run_main(_make_input(prompt=short_no_taskref), capsys) + # WARN: exit 0, additionalContext present (no permissionDecision). + assert code == 0 + hso = out["hookSpecificOutput"] + assert hso["hookEventName"] == "PreToolUse" + assert "additionalContext" in hso + assert "prompt is long" in hso["additionalContext"] \ + or "lacks a TaskList reference" in hso["additionalContext"] + + +def test_warn_when_prompt_exceeds_800_chars(tmp_path, monkeypatch, capsys): + """Long prompt + TaskList reference still WARNs (length-or-no-ref).""" + import dispatch_gate + + monkeypatch.setattr(dispatch_gate, "INLINE_MISSION_MODE", "warn") + _full_setup(monkeypatch, tmp_path) + long_prompt = "x" * 801 + " Check TaskList for tasks assigned to you." + code, out = _run_main(_make_input(prompt=long_prompt), capsys) + assert code == 0 + hso = out["hookSpecificOutput"] + assert "additionalContext" in hso + assert "prompt is long" in hso["additionalContext"] \ + or "lacks a TaskList reference" in hso["additionalContext"] + + +def test_deny_in_deny_mode(tmp_path, monkeypatch, capsys): + """Mode='deny' promotes WARN → DENY.""" + import dispatch_gate + + monkeypatch.setattr(dispatch_gate, "INLINE_MISSION_MODE", "deny") + _full_setup(monkeypatch, tmp_path) + code, out = _run_main(_make_input(prompt="No reference here."), capsys) + assert code == 2 + hso = out["hookSpecificOutput"] + assert hso["permissionDecision"] == "deny" + assert "prompt is long" in hso["permissionDecisionReason"] \ + or "lacks a TaskList reference" in hso["permissionDecisionReason"] + + +def test_silent_allow_in_shadow_mode(tmp_path, monkeypatch, capsys): + """Mode='shadow' returns ALLOW silently — no advisory, no deny — but + the journal still records the long_inline_mission trigger for calibration. + """ + import dispatch_gate + + monkeypatch.setattr(dispatch_gate, "INLINE_MISSION_MODE", "shadow") + captured = _capture_journal(monkeypatch) + _full_setup(monkeypatch, tmp_path) + code, out = _run_main(_make_input(prompt="No reference."), capsys) + assert code == 0 + assert out == _SUPPRESS_EXPECTED + # Journal sees the long-inline-mission trigger. + assert any( + e.get("type") == "dispatch_decision" + and e.get("rule") == "long_inline_mission" + for e in captured + ) + + +# ============================================================================= +# name_not_unique — uniqueness vs live team members +# ============================================================================= + + +def test_deny_when_name_already_in_team_members(tmp_path, monkeypatch, capsys): + """Member 'tester' already lives in team.config.json → name_not_unique DENY.""" + _full_setup( + monkeypatch, + tmp_path, + members=(_NAME,), # 'tester' already present + tasks=((_NAME, "pending"),), + ) + code, out = _run_main(_make_input(), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "is already a live member" in reason + + +def test_allows_unique_name_when_other_members_present( + tmp_path, monkeypatch, capsys +): + """Different live member doesn't trigger the uniqueness rule for an incoming new name.""" + _full_setup( + monkeypatch, + tmp_path, + members=("someone-else",), + tasks=((_NAME, "pending"),), + ) + code, out = _run_main(_make_input(), capsys) + assert code == 0 + assert out == _SUPPRESS_EXPECTED + + +# ============================================================================= +# plugin_agents_missing — plugin_root agents/ directory missing +# ============================================================================= + + +def test_deny_when_plugin_agents_missing(tmp_path, monkeypatch, capsys): + """plugin_root resolves to a path whose agents/ subdir doesn't exist.""" + plugin_root = tmp_path / "broken-plugin" + plugin_root.mkdir() # exists but no agents/ subdir + _setup_session(monkeypatch, tmp_path, plugin_root) + _seed_team(tmp_path, members=(), tasks=((_NAME, "pending"),)) + code, out = _run_main(_make_input(), capsys) + assert code == 2 + reason = out["hookSpecificOutput"]["permissionDecisionReason"] + assert "plugin agents/ directory is unavailable" in reason + + +# ============================================================================= +# Carve-outs — SOLO_EXEMPT + non-pact-* subagent_type +# ============================================================================= + + +@pytest.mark.parametrize( + "carve_out_type", + ["general-purpose", "Explore", "Plan"], +) +def test_solo_exempt_allows_without_name_or_team( + carve_out_type, tmp_path, monkeypatch, capsys +): + """Research-tier subagents legitimately spawn solo. ALLOW even with + name='' and team_name='' (which would otherwise trip name_required/team_name_required). + """ + _full_setup(monkeypatch, tmp_path) + code, out = _run_main( + _make_input(subagent_type=carve_out_type, name="", team_name=""), + capsys, + ) + assert code == 0 + assert out == _SUPPRESS_EXPECTED + + +def test_non_pact_subagent_type_passes_through(tmp_path, monkeypatch, capsys): + """An arbitrary non-pact-* subagent_type isn't this gate's business.""" + _full_setup(monkeypatch, tmp_path) + code, out = _run_main( + _make_input(subagent_type="some-other-tool", name="", team_name=""), + capsys, + ) + assert code == 0 + assert out == _SUPPRESS_EXPECTED + + +# ============================================================================= +# journal emit on every gate decision +# ============================================================================= + + +def test_journal_emit_on_allow(tmp_path, monkeypatch, capsys): + """Happy-path ALLOW still emits a dispatch_decision journal event.""" + captured = _capture_journal(monkeypatch) + _full_setup(monkeypatch, tmp_path) + code, _out = _run_main(_make_input(), capsys) + assert code == 0 + assert captured, "expected at least one journal event for ALLOW" + last = captured[-1] + assert last["type"] == "dispatch_decision" + assert last["decision"] == "ALLOW" + + +def test_journal_emit_on_deny_carries_rule(tmp_path, monkeypatch, capsys): + """DENY journal event carries the rule identifier (name_required here).""" + captured = _capture_journal(monkeypatch) + _full_setup(monkeypatch, tmp_path) + _run_main(_make_input(name=""), capsys) + deny_events = [ + e + for e in captured + if e.get("type") == "dispatch_decision" and e.get("decision") == "DENY" + ] + assert deny_events + assert deny_events[-1]["rule"] == "name_required" + + +def test_journal_emit_on_warn_carries_f7(tmp_path, monkeypatch, capsys): + """WARN (default inline-mission mode) journal event records rule='long_inline_mission'.""" + import dispatch_gate + + monkeypatch.setattr(dispatch_gate, "INLINE_MISSION_MODE", "warn") + captured = _capture_journal(monkeypatch) + _full_setup(monkeypatch, tmp_path) + _run_main(_make_input(prompt="No reference."), capsys) + warn_events = [ + e + for e in captured + if e.get("type") == "dispatch_decision" and e.get("decision") == "WARN" + ] + assert warn_events + assert warn_events[-1]["rule"] == "long_inline_mission" + + +# ============================================================================= +# prompt redaction at journal-write boundary +# ============================================================================= + + +@pytest.mark.parametrize( + "secret_token", + [ + # Adjacent-string-literal concatenation defeats the repo-root + # pre-commit secret-scanner regex while preserving runtime value. + "sk" "-ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "sk" "-ant-" "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "sk" "-ant-" "api03-" "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "xoxb" "-ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "ghp" "_ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "gho" "_ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "ghu" "_ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "ghs" "_ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "ghr" "_ABCDEFGHIJKLMNOPQRSTUVWXYZ012345", + "AKIA" "ABCDEFGHIJKLMNOP", + "AIza" "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", + ], + ids=[ + "openai", + "anthropic_basic", + "anthropic_api03", + "slack", + "github_pat", + "github_oauth", + "github_user", + "github_server", + "github_refresh", + "aws", + "google", + ], +) +def test_redacts_credential_patterns_in_journal( + secret_token, tmp_path, monkeypatch, capsys +): + """Each credential pattern is scrubbed BEFORE journal write. Verbatim + permissionDecisionReason is unaffected (kept for dispatcher debugging). + """ + captured = _capture_journal(monkeypatch) + _full_setup(monkeypatch, tmp_path) + prompt = f"Embedded: {secret_token} ignore. Check TaskList." + _run_main(_make_input(prompt=prompt), capsys) + assert captured + journaled = captured[-1].get("prompt_redacted", "") + assert "[REDACTED]" in journaled + assert secret_token not in journaled + + +def test_redacts_jwt_shape_in_journal(tmp_path, monkeypatch, capsys): + """JWT three-segment base64url shape is also redacted.""" + captured = _capture_journal(monkeypatch) + _full_setup(monkeypatch, tmp_path) + # Split via Python adjacent-string-literal concatenation so the + # repo-root pre-commit JWT-shape scanner (git_commit_check.py) does + # not flag this fixture as a real token. Runtime value matches the + # joined literal; the dispatch_gate JWT regex still matches at runtime. + fake_jwt = ( + "eyJ" "hbGciOiJIUzI1NiJ9" + "." "eyJ" "zdWIiOiIxMjMifQ" + "." "signaturepart_zZz123" + ) + _run_main( + _make_input(prompt=f"Token: {fake_jwt}. Check TaskList."), capsys + ) + assert captured + journaled = captured[-1].get("prompt_redacted", "") + assert "[REDACTED]" in journaled + assert fake_jwt not in journaled + + +def test_redacts_pem_private_key_block_in_journal(tmp_path, monkeypatch, capsys): + """A multi-line PEM-encoded private-key block is fully redacted at + the journal-write boundary, so accidentally pasting a key into a + spawn prompt never persists to disk. + """ + captured = _capture_journal(monkeypatch) + _full_setup(monkeypatch, tmp_path) + # Adjacent-string-literal concatenation so the repo-root pre-commit + # secret-scanner does not flag this fixture as a real key. Runtime + # value matches the joined literal. + fake_pem = ( + "-----BEGIN " "RSA PRIVATE KEY" "-----\n" + "MIIEowIBAAKCAQEA0Z8ZLfaketestkeyforredaction0123456789ABCDEFGH\n" + "moarbase64andthensome+/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKL\n" + "-----END " "RSA PRIVATE KEY" "-----" + ) + _run_main( + _make_input(prompt=f"Pasted key follows:\n{fake_pem}\nCheck TaskList."), + capsys, + ) + assert captured + journaled = captured[-1].get("prompt_redacted", "") + assert "[REDACTED]" in journaled + assert "MIIEowIBAAKCAQEA" not in journaled + assert "RSA PRIVATE KEY" not in journaled + + +# ============================================================================= +# Anti-sprawl invariant (auditor §11 YELLOW) +# ============================================================================= + + +def test_evaluate_dispatch_is_single_composition_function(): + """Auditor YELLOW note: gate file is 444 LOC vs 300 soft budget. The + important invariant isn't line count — it's that the F-row rules + compose in a single decision function rather than fragmenting into + per-row handlers. + + Asserts: dispatch_gate exposes ONE function with `evaluate_` prefix + that returns a 3-tuple (decision, reason, rule). No per-F-row + public functions snuck in. + """ + import dispatch_gate + import inspect + + public_evaluate_fns = [ + name + for name, obj in inspect.getmembers(dispatch_gate, inspect.isfunction) + if name.startswith("evaluate_") and not name.startswith("_") + ] + assert public_evaluate_fns == ["evaluate_dispatch"], ( + f"expected single evaluate_dispatch composition, got {public_evaluate_fns}" + ) + # Per-F-row functions would have shapes like _f1_check, _evaluate_f7, etc. + forbidden_prefixes = ("_evaluate_f", "_f1_", "_f2_", "_f3_", "_f4_") + fn_names = [ + name for name, _ in inspect.getmembers(dispatch_gate, inspect.isfunction) + ] + sprawl = [ + n for n in fn_names if any(n.startswith(p) for p in forbidden_prefixes) + ] + assert not sprawl, f"per-F-row sprawl detected: {sprawl}" + + +# ============================================================================= +# Defensive: malformed stdin / non-Agent tool / non-dict input +# ============================================================================= + + +def test_malformed_stdin_fail_open(tmp_path, monkeypatch, capsys): + """Malformed stdin → suppressOutput, exit 0 (input-side errors are + the harness's domain, not the gate's). + """ + from dispatch_gate import main + + with patch("sys.stdin", io.StringIO("not json")): + with pytest.raises(SystemExit) as exc: + main() + assert exc.value.code == 0 + out = capsys.readouterr().out.strip() + assert json.loads(out) == _SUPPRESS_EXPECTED + + +def test_non_agent_tool_no_op(tmp_path, monkeypatch, capsys): + """Defensive: if the matcher routes a non-Agent tool here, no-op.""" + payload = _make_input() + payload["tool_name"] = "Read" + code, out = _run_main(payload, capsys) + assert code == 0 + assert out == _SUPPRESS_EXPECTED + + +# ============================================================================= +# Path-alignment regression — has_task_assigned reads the canonical +# task store at ~/.claude/tasks/{team_name}/, NOT the legacy +# ~/.claude/teams/{team_name}/tasks/. Counter-test cardinality: +# reverting the path in shared/dispatch_helpers.py back to the legacy +# layout makes these regression tests flip from PASS to FAIL. +# ============================================================================= + + +def test_canonical_path_satisfies_no_task_assigned(tmp_path, monkeypatch): + """has_task_assigned MUST read ~/.claude/tasks/{team_name}/. + + Seed a task ONLY at the canonical path; leave the legacy path empty. + The fixed implementation returns True. An implementation that read + the legacy path would return False — that's the counter-test + cardinality. + """ + from shared.dispatch_helpers import has_task_assigned + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + canonical = tmp_path / ".claude" / "tasks" / _TEAM + canonical.mkdir(parents=True) + (canonical / "1.json").write_text( + json.dumps({"id": "1", "owner": _NAME, "status": "pending"}), + encoding="utf-8", + ) + legacy = tmp_path / ".claude" / "teams" / _TEAM / "tasks" + assert not legacy.exists() + + assert has_task_assigned(_TEAM, _NAME) is True + + +def test_legacy_path_alone_does_not_satisfy_no_task_assigned(tmp_path, monkeypatch): + """A task at ONLY the legacy ~/.claude/teams/{team}/tasks/ path must NOT + satisfy has_task_assigned. This pins the path the implementation reads + so a future regression to the legacy layout flips this assertion. + """ + from shared.dispatch_helpers import has_task_assigned + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + legacy = tmp_path / ".claude" / "teams" / _TEAM / "tasks" + legacy.mkdir(parents=True) + (legacy / "1.json").write_text( + json.dumps({"id": "1", "owner": _NAME, "status": "pending"}), + encoding="utf-8", + ) + canonical = tmp_path / ".claude" / "tasks" / _TEAM + assert not canonical.exists() + + assert has_task_assigned(_TEAM, _NAME) is False + + +def test_canonical_path_aligns_with_task_utils(tmp_path, monkeypatch): + """The path has_task_assigned reads must be the same root that + task_utils.read_task_json uses. If task_utils ever moves, this test + surfaces the divergence at the dispatch-gate layer. + """ + from shared.dispatch_helpers import has_task_assigned + from shared.task_utils import read_task_json + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + canonical = tmp_path / ".claude" / "tasks" / _TEAM + canonical.mkdir(parents=True) + (canonical / "1.json").write_text( + json.dumps({"id": "1", "owner": _NAME, "status": "pending"}), + encoding="utf-8", + ) + + assert has_task_assigned(_TEAM, _NAME) is True + assert read_task_json("1", _TEAM).get("owner") == _NAME + + +# ============================================================================= +# team_name case-sensitivity normalization +# ============================================================================= + + +def test_mixed_case_team_name_normalizes_consistently(tmp_path, monkeypatch, capsys): + """A spawn under a mixed-case team_name behaves identically to its + lowercase form across the session-equality, member-uniqueness, and + task-assigned rules. + + Without normalization at the entry of evaluate_dispatch, the + session-equality check would lowercase its operand while the + filesystem reads (member-roster + task-store) used the raw value. + On a case-sensitive filesystem the spawn would slip past the + equality check yet read the wrong directories, producing + inconsistent verdicts. + + The fixture seeds tasks + members under the canonical lowercase team + directory (which is what session_team returns); the spawn input + passes the same name in mixed case. Expect ALLOW, matching the + pure-lowercase happy path. + """ + plugin_root = _full_setup( + monkeypatch, + tmp_path, + members=("someone-else",), + tasks=((_NAME, "pending"),), + ) + assert plugin_root # used to ensure setup completes + + mixed = _TEAM.upper() # e.g. "PACT-TEST" + code, out = _run_main(_make_input(team_name=mixed), capsys) + assert code == 0, ( + f"Mixed-case team_name should normalize and ALLOW (canonical " + f"lowercase form passes all rules); got code={code} out={out}" + ) diff --git a/pact-plugin/tests/test_dispatch_gate_smoke.py b/pact-plugin/tests/test_dispatch_gate_smoke.py new file mode 100644 index 00000000..fa5f3b11 --- /dev/null +++ b/pact-plugin/tests/test_dispatch_gate_smoke.py @@ -0,0 +1,307 @@ +""" +Smoke tests for dispatch_gate.py — #662 PreToolUse matcher='Agent' hook. + +NOT comprehensive coverage — that lives in the TEST phase / Commit-4 +follow-up. These cases are the minimum-viable surface that proves the +gate's structural contract: + + 1. Happy-path ALLOW (registered specialist, valid name, matching team, + task assigned, short prompt with TaskList reference) + 2. name_required DENY: empty name= + 3. team_name_required DENY: empty team_name= + 4. name_reserved_token DENY: reserved name (`team-lead`) + 5. SOLO_EXEMPT carve-out: subagent_type='general-purpose' → ALLOW + 6. Module-load fail-closed counter-test: subprocess invocation with + PYTHONPATH manipulated so shared.dispatch_helpers raises + ImportError → DENY output includes hookEventName + exit 2 + +The fail-closed counter-test uses subprocess+PYTHONPATH per PR #660 R2 +discipline: NEVER pop shared.* from sys.modules in the test process. +Sabotage runs in a subprocess so the test process's import state stays +clean. +""" + +import io +import json +import os +import subprocess +import sys +from pathlib import Path +from unittest.mock import patch + +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent / "hooks")) + + +_SUPPRESS_EXPECTED = {"suppressOutput": True} +_TEAM = "pact-test" +_NAME = "tester" + + +# ─── helpers ─────────────────────────────────────────────────────────────── + + +def _make_input(subagent_type="pact-architect", name=_NAME, team_name=_TEAM, + prompt="Standard mission. Check TaskList for tasks assigned to you."): + return { + "hook_event_name": "PreToolUse", + "session_id": "test-session", + "tool_name": "Agent", + "tool_input": { + "subagent_type": subagent_type, + "name": name, + "team_name": team_name, + "prompt": prompt, + }, + } + + +def _run_main(input_data, capsys): + """Invoke dispatch_gate.main() in-process. Returns (exit_code, stdout_json).""" + from dispatch_gate import main + + with patch("sys.stdin", io.StringIO(json.dumps(input_data))): + with pytest.raises(SystemExit) as exc_info: + main() + + captured = capsys.readouterr() + out = captured.out.strip() + return exc_info.value.code, json.loads(out) if out else {} + + +def _setup_session(monkeypatch, tmp_path, plugin_root: Path, team_name=_TEAM): + """Wire pact_context to point at a tmp session, set HOME so + has_task_assigned + _team_member_names read tmp dirs. + """ + import shared.pact_context as ctx_module + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + ctx_path = tmp_path / "pact-session-context.json" + ctx_path.write_text(json.dumps({ + "team_name": team_name, + "session_id": "test-session", + "project_dir": str(tmp_path / "project"), + "plugin_root": str(plugin_root), + "started_at": "2026-01-01T00:00:00Z", + }), encoding="utf-8") + monkeypatch.setattr(ctx_module, "_context_path", ctx_path) + monkeypatch.setattr(ctx_module, "_cache", None) + + # Override init() so re-init from hook stdin doesn't overwrite our path. + monkeypatch.setattr(ctx_module, "init", lambda input_data: None) + + # Clear the registry cache so each test sees the freshly-built plugin_root. + import shared.dispatch_helpers as dh + dh._specialist_registry.cache_clear() + + +def _seed_plugin(plugin_root: Path, agents=("pact-architect",)): + agents_dir = plugin_root / "agents" + agents_dir.mkdir(parents=True, exist_ok=True) + for stem in agents: + (agents_dir / f"{stem}.md").write_text("---\nname: " + stem + "\n---\n") + + +def _seed_team(home: Path, team_name=_TEAM, members=(), tasks=()): + """Write fake team config + canonical tasks store. + + config.json under ``HOME/.claude/teams/{team_name}/`` (read by + ``_team_member_names``); tasks under ``HOME/.claude/tasks/{team_name}/`` + (the canonical store per ``shared/task_utils.py``, read by + ``has_task_assigned`` after the path-alignment fix). + """ + team_dir = home / ".claude" / "teams" / team_name + team_dir.mkdir(parents=True, exist_ok=True) + (team_dir / "config.json").write_text(json.dumps({ + "team_name": team_name, + "members": [{"name": m} for m in members], + }), encoding="utf-8") + tasks_dir = home / ".claude" / "tasks" / team_name + tasks_dir.mkdir(parents=True, exist_ok=True) + for i, (owner, status) in enumerate(tasks): + (tasks_dir / f"task_{i}.json").write_text(json.dumps({ + "id": str(i), + "owner": owner, + "status": status, + }), encoding="utf-8") + + +# ─── tests ───────────────────────────────────────────────────────────────── + + +def test_allow_happy_path(tmp_path, monkeypatch, capsys): + """Registered specialist, valid name, matching team, task assigned, + short prompt with TaskList reference → ALLOW (suppressOutput, exit 0). + """ + plugin_root = tmp_path / "plugin" + _seed_plugin(plugin_root, agents=("pact-architect",)) + _setup_session(monkeypatch, tmp_path, plugin_root) + _seed_team(tmp_path, members=(), tasks=((_NAME, "pending"),)) + + code, out = _run_main(_make_input(), capsys) + assert code == 0 + assert out == _SUPPRESS_EXPECTED + + +def test_deny_empty_name(tmp_path, monkeypatch, capsys): + plugin_root = tmp_path / "plugin" + _seed_plugin(plugin_root) + _setup_session(monkeypatch, tmp_path, plugin_root) + + code, out = _run_main(_make_input(name=""), capsys) + assert code == 2 + hso = out["hookSpecificOutput"] + assert hso["hookEventName"] == "PreToolUse" + assert hso["permissionDecision"] == "deny" + assert "name= parameter is required" in hso["permissionDecisionReason"] + + +def test_deny_empty_team_name(tmp_path, monkeypatch, capsys): + plugin_root = tmp_path / "plugin" + _seed_plugin(plugin_root) + _setup_session(monkeypatch, tmp_path, plugin_root) + + code, out = _run_main(_make_input(team_name=""), capsys) + assert code == 2 + hso = out["hookSpecificOutput"] + assert hso["hookEventName"] == "PreToolUse" + assert hso["permissionDecision"] == "deny" + assert "team_name= parameter is required" in hso["permissionDecisionReason"] + + +def test_deny_reserved_name(tmp_path, monkeypatch, capsys): + """Reserved name 'team-lead' would shadow the routing literal — DENY.""" + plugin_root = tmp_path / "plugin" + _seed_plugin(plugin_root) + _setup_session(monkeypatch, tmp_path, plugin_root) + + code, out = _run_main(_make_input(name="team-lead"), capsys) + assert code == 2 + hso = out["hookSpecificOutput"] + assert hso["hookEventName"] == "PreToolUse" + assert hso["permissionDecision"] == "deny" + assert "reserved-token" in hso["permissionDecisionReason"] + + +def test_solo_exempt_carve_out(tmp_path, monkeypatch, capsys): + """subagent_type='general-purpose' bypasses the gate even without + name/team_name — research agents legitimately spawn solo. + """ + plugin_root = tmp_path / "plugin" + _seed_plugin(plugin_root) + _setup_session(monkeypatch, tmp_path, plugin_root) + + code, out = _run_main( + _make_input(subagent_type="general-purpose", name="", team_name=""), + capsys, + ) + assert code == 0 + assert out == _SUPPRESS_EXPECTED + + +def test_fail_closed_module_load(tmp_path): + """Module-load fail-closed counter-test: sabotage shared.dispatch_helpers via PYTHONPATH + so its import raises, then invoke dispatch_gate.py as a subprocess. + Expect: exit 2, stdout JSON with hookEventName + permissionDecision='deny'. + + Subprocess isolation per PR #660 R2 discipline — NEVER pop shared.* + from sys.modules in the test process. The sabotaged shared/ tree + lives entirely in tmp_path: copy the real shared/ package, replace + dispatch_helpers.py with a raise, and point PYTHONPATH at the copy. + """ + import shutil + + repo_hooks = Path(__file__).parent.parent / "hooks" + real_shared = repo_hooks / "shared" + sabotage_root = tmp_path / "sabotage" + sabotage_shared = sabotage_root / "shared" + # Copy the real shared/ tree so every OTHER submodule (pact_context, + # session_journal, ...) imports normally. + shutil.copytree(real_shared, sabotage_shared) + # Overwrite ONLY dispatch_helpers.py with a forced-raise stub so the + # `from shared.dispatch_helpers import ...` line in dispatch_gate.py + # fires the wrapped except BaseException → _emit_load_failure_deny. + (sabotage_shared / "dispatch_helpers.py").write_text( + "raise ImportError('sabotage: forced module-load failure')\n" + ) + + env = os.environ.copy() + # Sabotage dir first → its `shared` package wins over the real one. + env["PYTHONPATH"] = os.pathsep.join([ + str(sabotage_root), + str(repo_hooks), + ]) + # Python (per PEP 432 / 3.11+) auto-prepends the script's parent dir + # to sys.path[0], which would let `shared` resolve to the REAL + # hooks/shared/ before our sabotage. PYTHONSAFEPATH=1 disables that + # auto-insert so PYTHONPATH ordering is authoritative. + env["PYTHONSAFEPATH"] = "1" + + proc = subprocess.run( + [sys.executable, str(repo_hooks / "dispatch_gate.py")], + input=json.dumps({ + "hook_event_name": "PreToolUse", + "session_id": "test", + "tool_name": "Agent", + "tool_input": {}, + }), + capture_output=True, + text=True, + env=env, + timeout=10, + ) + assert proc.returncode == 2, ( + f"expected exit 2, got {proc.returncode}. stdout={proc.stdout!r} " + f"stderr={proc.stderr!r}" + ) + out = json.loads(proc.stdout.strip()) + hso = out["hookSpecificOutput"] + assert hso["hookEventName"] == "PreToolUse" + assert hso["permissionDecision"] == "deny" + assert "load failure" in hso["permissionDecisionReason"].lower() \ + or "module imports" in hso["permissionDecisionReason"].lower() + + +def test_redaction_in_journal(tmp_path, monkeypatch, capsys): + """Credential redaction verification: prompt containing a credential pattern is + redacted in the journal-written form. The user-facing + permissionDecisionReason is unaffected (verbatim prompt fragment is + kept for dispatcher debugging). + + We trigger team_name_required (empty team_name) so the gate DENIES, then read the + captured journal event to confirm prompt_redacted contains + [REDACTED] and NOT the original sk-... token. + """ + plugin_root = tmp_path / "plugin" + _seed_plugin(plugin_root) + _setup_session(monkeypatch, tmp_path, plugin_root) + + captured_events = [] + + def _capture_append(event): + captured_events.append(event) + return True + + import shared.session_journal as sj + monkeypatch.setattr(sj, "append_event", _capture_append) + import dispatch_gate + monkeypatch.setattr(dispatch_gate, "append_event", _capture_append) + + # Split via Python adjacent-string-literal concatenation so the + # repo-root pre-commit secret-scanner regex + # ``["']sk-[a-zA-Z0-9]{20,}["']`` does NOT match this test fixture. + # Runtime value is identical to the joined literal. + secret = "sk" "-ABCDEFGHIJKLMNOPQRSTUVWXYZ012345" + prompt = f"Embedded credential: {secret} please ignore. Check TaskList." + code, _out = _run_main( + _make_input(team_name="", prompt=prompt), + capsys, + ) + assert code == 2 + assert captured_events, "expected a journal event to be captured" + event = captured_events[-1] + assert event["type"] == "dispatch_decision" + assert "[REDACTED]" in event["prompt_redacted"] + assert secret not in event["prompt_redacted"] diff --git a/pact-plugin/tests/test_error_output.py b/pact-plugin/tests/test_error_output.py index 3fcad7b9..ce7e76f9 100644 --- a/pact-plugin/tests/test_error_output.py +++ b/pact-plugin/tests/test_error_output.py @@ -996,7 +996,7 @@ def test_no_reminder_suppress(self, capsys): from auditor_reminder import main input_data = json.dumps({ - "tool_name": "Task", + "tool_name": "Agent", "tool_input": {"subagent_type": "pact-test-engineer"}, }) with patch("sys.stdin", io.StringIO(input_data)): diff --git a/pact-plugin/tests/test_hooks_json.py b/pact-plugin/tests/test_hooks_json.py index 12ef3c02..473337b2 100644 --- a/pact-plugin/tests/test_hooks_json.py +++ b/pact-plugin/tests/test_hooks_json.py @@ -39,7 +39,7 @@ # Hooks that MUST be synchronous (blocking) — they affect tool decisions MUST_BE_SYNC = { - "team_guard.py", # Blocks Task dispatch if no team + "team_guard.py", # Blocks Agent dispatch if no team (#662) "worktree_guard.py", # Blocks edits outside worktree "validate_handoff.py", # Validates agent output "agent_handoff_emitter.py", # Writes agent_handoff journal event on TaskCompleted @@ -394,3 +394,125 @@ def test_session_start_has_exactly_one_hook(self, hooks_config): assert "session_init.py" in session_start[0]["hooks"][0]["command"], ( "SessionStart's sole hook must be session_init.py." ) + + +class TestSpawnToolMatchersPost662: + """#662: matcher='Agent' on team_guard + auditor_reminder; matcher + 'TaskCreate|TaskUpdate' on wake_lifecycle_emitter PRESERVED. The earlier + matcher='Task' was wrong — the canonical Claude Code platform tool + name for sub-agent spawning is `Agent`. Cat-2 task-management tools + (TaskCreate/TaskUpdate/TaskList/...) are unrelated and MUST stay. + """ + + def _all_matcher_pairs(self, hooks_config): + pairs = [] + for event_type, entries in hooks_config["hooks"].items(): + for entry in entries: + if "matcher" not in entry: + continue + commands = [ + h.get("command", "") + for h in entry.get("hooks", []) + ] + pairs.append((event_type, entry["matcher"], commands)) + return pairs + + def test_team_guard_matcher_is_agent(self, hooks_config): + """PreToolUse team_guard matcher MUST be 'Agent' (#662 Cat-1).""" + for event_type, matcher, commands in self._all_matcher_pairs( + hooks_config + ): + if any("team_guard.py" in c for c in commands): + assert matcher == "Agent", ( + f"team_guard.py matcher must be 'Agent' (#662); got " + f"{matcher!r} on {event_type}" + ) + return + pytest.fail("team_guard.py entry not found in hooks.json") + + def test_auditor_reminder_matcher_is_agent(self, hooks_config): + """PostToolUse auditor_reminder matcher MUST be 'Agent' (#662 Cat-1).""" + for event_type, matcher, commands in self._all_matcher_pairs( + hooks_config + ): + if any("auditor_reminder.py" in c for c in commands): + assert matcher == "Agent", ( + f"auditor_reminder.py matcher must be 'Agent' (#662); " + f"got {matcher!r} on {event_type}" + ) + return + pytest.fail("auditor_reminder.py entry not found in hooks.json") + + def test_wake_lifecycle_emitter_matcher_unchanged(self, hooks_config): + """Cat-2 preservation regression-prevention (#662): the + wake_lifecycle_emitter matcher 'TaskCreate|TaskUpdate' MUST NOT be + renamed. These are PACT plugin task-system tools, distinct from + the spawn-tool `Agent`. Renaming them would break lifecycle hooks. + """ + for event_type, matcher, commands in self._all_matcher_pairs( + hooks_config + ): + if any("wake_lifecycle_emitter.py" in c for c in commands): + assert matcher == "TaskCreate|TaskUpdate", ( + f"wake_lifecycle_emitter.py matcher MUST remain " + f"'TaskCreate|TaskUpdate' (Cat-2 preservation, #662); " + f"got {matcher!r} on {event_type}" + ) + return + pytest.fail( + "wake_lifecycle_emitter.py entry not found in hooks.json" + ) + + def test_no_matcher_is_bare_task_string(self, hooks_config): + """Regression-prevention: NO matcher should be the bare 'Task' + string after #662. The valid uses are matcher='Agent' (spawn tool) + or matcher='TaskCreate|TaskUpdate' (Cat-2 task-management tools). + """ + offenders = [] + for event_type, matcher, _ in self._all_matcher_pairs(hooks_config): + if matcher == "Task": + offenders.append((event_type, matcher)) + assert not offenders, ( + f"No matcher may be the bare 'Task' literal post-#662 — that " + f"was the wrong rename direction. Offenders: {offenders}" + ) + + +class TestCat2PreservationBaseline: + """#662 PREPARE §2 baseline: Cat-2 task-management names + (TaskCreate/TaskUpdate/TaskList/TaskGet/TaskStop/TaskOutput) appear + ≥551 times across pact-plugin/. The Cat-1 rename Task→Agent MUST NOT + have decreased this count. Counts may grow as new gates land. + """ + + _CAT2_NAMES = ( + "TaskCreate", "TaskUpdate", "TaskList", + "TaskGet", "TaskStop", "TaskOutput", + ) + _BASELINE = 551 + + def test_cat2_total_at_or_above_baseline(self): + import re + plugin_dir = Path(__file__).parent.parent + pattern = re.compile( + r"\b(?:TaskCreate|TaskUpdate|TaskList|TaskGet|TaskStop|TaskOutput)\b" + ) + total = 0 + for path in plugin_dir.rglob("*"): + if not path.is_file(): + continue + # Skip binary-ish / vendored paths. + if any(part.startswith(".") for part in path.relative_to(plugin_dir).parts): + continue + try: + text = path.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + continue + total += sum(1 for _ in pattern.finditer(text)) + assert total >= self._BASELINE, ( + f"Cat-2 preservation regression (#662): grep total {total} < " + f"baseline {self._BASELINE}. The Cat-1 rename Task→Agent MUST " + f"NOT decrease the Cat-2 count. Investigate which file lost " + f"a TaskCreate/TaskUpdate/TaskList/TaskGet/TaskStop/TaskOutput " + f"reference." + ) diff --git a/pact-plugin/tests/test_patterns.py b/pact-plugin/tests/test_patterns.py index acea329a..69f2aeaa 100644 --- a/pact-plugin/tests/test_patterns.py +++ b/pact-plugin/tests/test_patterns.py @@ -30,7 +30,7 @@ REVIEW_PROMPT_INSTRUCTION_MAX_LENGTH, TASK_SUMMARY_MAX_LENGTH, PACT_AGENT_PATTERN, - TASK_TOOL_PATTERN, + SPAWN_TOOL_PATTERN, SUBAGENT_TYPE_PATTERN, WorkflowPattern, compile_workflow_patterns, @@ -444,9 +444,9 @@ def test_pact_agent_pattern(self): def test_task_tool_pattern(self): """Test Task tool pattern matching.""" - assert TASK_TOOL_PATTERN.search('"name": "Task"') is not None - assert TASK_TOOL_PATTERN.search('"name":"Task"') is not None - assert TASK_TOOL_PATTERN.search('"name": "task"') is not None # Case insensitive + assert SPAWN_TOOL_PATTERN.search('"name": "Task"') is not None + assert SPAWN_TOOL_PATTERN.search('"name":"Task"') is not None + assert SPAWN_TOOL_PATTERN.search('"name": "task"') is not None # Case insensitive def test_subagent_type_pattern(self): """Test subagent type extraction pattern.""" diff --git a/pact-plugin/tests/test_patterns_fuzz.py b/pact-plugin/tests/test_patterns_fuzz.py index 4071d0cc..9eafcef3 100644 --- a/pact-plugin/tests/test_patterns_fuzz.py +++ b/pact-plugin/tests/test_patterns_fuzz.py @@ -22,7 +22,7 @@ CONTEXT_EXTRACTORS, PENDING_ACTION_PATTERNS, PACT_AGENT_PATTERN, - TASK_TOOL_PATTERN, + SPAWN_TOOL_PATTERN, SUBAGENT_TYPE_PATTERN, is_termination_signal, extract_context_value, @@ -227,7 +227,7 @@ def test_pact_agent_pattern_no_crash(self, content: str): @pytest.mark.parametrize("content", EDGE_CASE_INPUTS) def test_task_tool_pattern_no_crash(self, content: str): """Test Task tool pattern doesn't crash on edge cases.""" - result = TASK_TOOL_PATTERN.search(content) + result = SPAWN_TOOL_PATTERN.search(content) assert result is None or hasattr(result, 'group') @pytest.mark.parametrize("content", EDGE_CASE_INPUTS) @@ -268,7 +268,7 @@ def test_repeated_characters(self): pattern.search(content) PACT_AGENT_PATTERN.search(content) - TASK_TOOL_PATTERN.search(content) + SPAWN_TOOL_PATTERN.search(content) SUBAGENT_TYPE_PATTERN.search(content) elapsed = time.time() - start_time diff --git a/pact-plugin/tests/test_peer_inject.py b/pact-plugin/tests/test_peer_inject.py index 475d54d9..f0fc0516 100644 --- a/pact-plugin/tests/test_peer_inject.py +++ b/pact-plugin/tests/test_peer_inject.py @@ -664,7 +664,7 @@ class TestSanitizeAgentName: block's substring check, that injected line would cause the teammate to self-identify as the orchestrator. The exploit requires upstream orchestrator compromise (the orchestrator must pass hostile input - via Task(name=...)), so practical exploitability is low — but the + via Agent(name=...)), so practical exploitability is low — but the fix is cheap and security-engineer verified the spoofing mechanism with a Python PoC during cycle 1 review. diff --git a/pact-plugin/tests/test_pin_caps_gate_matrix.py b/pact-plugin/tests/test_pin_caps_gate_matrix.py index 465f4bf2..fd264eeb 100644 --- a/pact-plugin/tests/test_pin_caps_gate_matrix.py +++ b/pact-plugin/tests/test_pin_caps_gate_matrix.py @@ -789,7 +789,7 @@ class TestPinCapsGate_Matrix_Passthrough: @pytest.mark.parametrize( "tool_name", - ["Read", "Bash", "Grep", "Glob", "Task", "NotebookEdit", "TodoWrite"], + ["Read", "Bash", "Grep", "Glob", "Agent", "NotebookEdit", "TodoWrite"], ) def test_non_gated_tools_allow(self, gate_env, tool_name): env = gate_env(pin_count=3) diff --git a/pact-plugin/tests/test_pin_staleness_gate.py b/pact-plugin/tests/test_pin_staleness_gate.py index df19735d..837d2e96 100644 --- a/pact-plugin/tests/test_pin_staleness_gate.py +++ b/pact-plugin/tests/test_pin_staleness_gate.py @@ -93,7 +93,7 @@ class TestPinStalenessGate_ToolMatch: """Only Edit and Write are gated — other tools always pass.""" @pytest.mark.parametrize("tool_name", ["Read", "Bash", "Glob", "Grep", - "Task", "NotebookEdit", ""]) + "Agent", "NotebookEdit", ""]) def test_non_gated_tools_pass(self, tool_name, gate_env): gate_env(marker_present=True) result = _call_gate({ diff --git a/pact-plugin/tests/test_skills_structure.py b/pact-plugin/tests/test_skills_structure.py index 32749309..29bfb4fb 100644 --- a/pact-plugin/tests/test_skills_structure.py +++ b/pact-plugin/tests/test_skills_structure.py @@ -213,6 +213,49 @@ def test_lead_gate_appears_before_communication(self): _ALL_SKILL_FILES = sorted(SKILLS_DIR.glob("*/SKILL.md")) +# ============================================================================= +# F20 — pact-*.md agent files must declare pact-agent-teams in skills frontmatter +# ============================================================================= + +# Per architect §6 / §7(b)+(f), every spawned PACT teammate must preload the +# pact-agent-teams skill so the team-protocol body is in the agent's context +# at first turn. pact-orchestrator is delivered via `claude --agent` rather +# than spawned, so its frontmatter shape differs (no `skills:` block) and is +# carved out. +PACT_AGENTS_DIR = SKILLS_DIR.parent / "agents" +F20_CARVE_OUT_FILES = frozenset({"pact-orchestrator"}) +_PACT_AGENT_FILES = sorted(PACT_AGENTS_DIR.glob("pact-*.md")) + + +@pytest.mark.parametrize( + "agent_path", + _PACT_AGENT_FILES, + ids=[p.stem for p in _PACT_AGENT_FILES], +) +def test_f20_pact_agent_declares_pact_agent_teams_skill(agent_path): + """F20 pre-merge audit: each pact-*.md teammate file must list + `pact-agent-teams` under its skills: frontmatter block. Without the + skill preload, spawned teammates never see TaskList / SendMessage / + completion-authority protocol — the original #662 trigger surface. + + Carve-out: pact-orchestrator is delivered via `claude --agent` and has + no skills: block at all. Documented in F20_CARVE_OUT_FILES. + """ + if agent_path.stem in F20_CARVE_OUT_FILES: + pytest.skip( + f"{agent_path.stem} is carved out from F20 — orchestrator " + "persona delivered via --agent flag, not spawn" + ) + text = agent_path.read_text(encoding="utf-8") + fm = parse_frontmatter(text) + assert fm is not None, f"{agent_path.name} has no frontmatter" + skills_value = fm.get("skills", "") + assert "pact-agent-teams" in skills_value, ( + f"{agent_path.name} skills frontmatter missing pact-agent-teams. " + "Spawned teammates would lack the team-protocol body — see #662 F20." + ) + + class TestNoFirstActionFossilInSkillBodies: """Negative-invariant fossilization guard: skill bodies must not contain the v3.x FIRST-ACTION + Skill("PACT:teammate-bootstrap") + peer_inject diff --git a/pact-plugin/tests/test_task_lifecycle_gate.py b/pact-plugin/tests/test_task_lifecycle_gate.py new file mode 100644 index 00000000..a2460575 --- /dev/null +++ b/pact-plugin/tests/test_task_lifecycle_gate.py @@ -0,0 +1,641 @@ +""" +Comprehensive coverage for task_lifecycle_gate.py — #662 PostToolUse hook. + +Sibling to test_task_lifecycle_gate_smoke.py (the 6 minimum-viable cases). +This file expands every rule landed in the gate. + +Rule coverage: + - teachback_addblocks_missing — TaskCreate TEACHBACK without + addBlocks=[] → advisory + - work_addblockedby_missing — TaskCreate pact-* non-TEACHBACK + without addBlockedBy → advisory + - completion_no_paired_send — TaskUpdate(completed) teachback + Task without paired SendMessage → advisory + Boundary tested at 119s (silent) and 121s (advisory). + - handoff_missing — TaskUpdate(completed) pact-* work Task + without metadata.handoff → advisory + - self_completion — Teammate self-completes Task → advisory + + completion_disputed writeback + Carve-outs: secretary self-complete (SELF_COMPLETE_EXEMPT_AGENTS), + signal task (metadata.completion_type=signal — exempt via the + is_self_complete_exempt predicate), recursion-marker skip. + Sketch-A: actor unresolvable → CURRENT skip behavior; encoded with + explicit deviation-documenting test referencing architect §5.3. + - handoff_schema_invalid — TaskUpdate(completed) with malformed + metadata.handoff → advisory (disjoint from handoff_missing — + handoff present but schema-incomplete). + - module-load failure → advisory + hookEventName=PostToolUse + exit 0 + Anti-sprawl — single evaluate_lifecycle composition. + +Disciplines applied: + - PR #660 R2: never pop shared.* from sys.modules in this test process. + - Rule names describe behavior, not provenance — per + `feedback_no_planning_artifact_test_names`. +""" + +import io +import json +import sys +from datetime import datetime, timezone +from pathlib import Path +from unittest.mock import patch + +import pytest + + +sys.path.insert(0, str(Path(__file__).parent.parent / "hooks")) + +import task_lifecycle_gate as tlg # noqa: E402 + + +# ============================================================================= +# Helpers +# ============================================================================= + + +def _stdin(payload: dict) -> io.StringIO: + return io.StringIO(json.dumps(payload)) + + +def _capture_main(payload: dict, capsys) -> tuple[int, dict | None]: + with patch.object(sys, "stdin", _stdin(payload)): + with pytest.raises(SystemExit) as exc: + tlg.main() + raw_code = exc.value.code if exc.value.code is not None else 0 + code = int(raw_code) if isinstance(raw_code, int) else 0 + out = capsys.readouterr().out.strip() + parsed = json.loads(out) if out else None + return code, parsed + + +# ============================================================================= +# teachback_addblocks_missing — TEACHBACK Task without addBlocks=[] +# ============================================================================= + + +def test_silent_when_teachback_carries_addblocks(pact_context): + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskCreate", + "tool_input": { + "subject": "preparer: TEACHBACK for foo", + "owner": "pact-preparer", + "addBlocks": ["42"], + }, + "tool_response": {}, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "teachback_addblocks_missing" for rule, _ in advisories), ( + f"expected silent (teachback_addblocks_missing satisfied), got: {advisories}" + ) + + +# ============================================================================= +# work_addblockedby_missing — pact-* non-TEACHBACK Task without addBlockedBy=[] +# ============================================================================= + + +def test_silent_when_work_task_carries_addblockedby(pact_context): + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskCreate", + "tool_input": { + "subject": "implement foo", + "owner": "pact-backend-coder", + "addBlockedBy": ["41"], + }, + "tool_response": {}, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "work_addblockedby_missing" for rule, _ in advisories) + + +def test_silent_when_owner_is_not_pact_specialist(pact_context): + """Non-pact-* owner doesn't trigger work_addblockedby_missing even without addBlockedBy.""" + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskCreate", + "tool_input": { + "subject": "lead-only task", + "owner": "team-lead", + }, + "tool_response": {}, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "work_addblockedby_missing" for rule, _ in advisories) + + +# ============================================================================= +# completion_no_paired_send — teachback completion without paired SendMessage (120s window) +# ============================================================================= + + +def _setup_team_inbox( + tmp_path: Path, + monkeypatch, + owner: str, + team_name: str, + paired_offset_seconds: float | None, +): + """Seed ~/.claude/teams/{team_name}/inboxes/{owner}.json with one message + from team-lead at `now - paired_offset_seconds`. None → empty inbox. + """ + monkeypatch.setattr(Path, "home", lambda: tmp_path) + inbox_dir = tmp_path / ".claude" / "teams" / team_name / "inboxes" + inbox_dir.mkdir(parents=True) + if paired_offset_seconds is not None: + ts = datetime.now(timezone.utc).timestamp() - paired_offset_seconds + ts_iso = ( + datetime.fromtimestamp(ts, tz=timezone.utc) + .isoformat() + .replace("+00:00", "Z") + ) + messages = [ + { + "from": "team-lead", + "text": "completion ack", + "timestamp": ts_iso, + } + ] + else: + messages = [] + (inbox_dir / f"{owner}.json").write_text( + json.dumps(messages), encoding="utf-8" + ) + + +def test_silent_when_paired_sendmessage_within_window( + tmp_path, monkeypatch, pact_context +): + """Paired SendMessage 30s ago (well within 120s) → no completion_no_paired_send advisory.""" + pact_context(team_name="test-team", session_id="test-session") + _setup_team_inbox( + tmp_path, monkeypatch, owner="preparer", team_name="test-team", + paired_offset_seconds=30, + ) + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "1", "status": "completed"}, + "tool_response": { + "task": { + "id": "1", + "subject": "preparer: TEACHBACK for foo", + "owner": "preparer", + "metadata": {}, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "completion_no_paired_send" for rule, _ in advisories), ( + f"expected silent within 120s window, got: {advisories}" + ) + + +def test_silent_at_119s_boundary(tmp_path, monkeypatch, pact_context): + """119s ago is still within the 120s window → silent.""" + pact_context(team_name="test-team", session_id="test-session") + _setup_team_inbox( + tmp_path, monkeypatch, owner="preparer", team_name="test-team", + paired_offset_seconds=119, + ) + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "1", "status": "completed"}, + "tool_response": { + "task": { + "id": "1", + "subject": "preparer: TEACHBACK for foo", + "owner": "preparer", + "metadata": {}, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "completion_no_paired_send" for rule, _ in advisories) + + +def test_advisory_at_121s_boundary(tmp_path, monkeypatch, pact_context): + """121s ago is outside the 120s window → completion_no_paired_send fires.""" + pact_context(team_name="test-team", session_id="test-session") + _setup_team_inbox( + tmp_path, monkeypatch, owner="preparer", team_name="test-team", + paired_offset_seconds=121, + ) + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "1", "status": "completed"}, + "tool_response": { + "task": { + "id": "1", + "subject": "preparer: TEACHBACK for foo", + "owner": "preparer", + "metadata": {}, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "completion_no_paired_send" for rule, _ in advisories), ( + f"expected completion_no_paired_send outside window, got: {advisories}" + ) + + +def test_advisory_when_inbox_empty(tmp_path, monkeypatch, pact_context): + """No paired SendMessage at all → completion_no_paired_send fires.""" + pact_context(team_name="test-team", session_id="test-session") + _setup_team_inbox( + tmp_path, monkeypatch, owner="preparer", team_name="test-team", + paired_offset_seconds=None, + ) + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "1", "status": "completed"}, + "tool_response": { + "task": { + "id": "1", + "subject": "preparer: TEACHBACK for foo", + "owner": "preparer", + "metadata": {}, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "completion_no_paired_send" for rule, _ in advisories) + + +# ============================================================================= +# handoff_missing — pact-* work-Task completed with empty metadata.handoff +# ============================================================================= + + +def test_silent_when_handoff_well_formed(pact_context): + """Valid handoff schema → no handoff_missing and no handoff_schema_invalid.""" + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "42", "status": "completed"}, + "tool_response": { + "task": { + "id": "42", + "subject": "implement foo", + "owner": "pact-backend-coder", + "metadata": { + "handoff": { + "produced": "x", + "decisions": "x", + "reasoning_chain": "x", + "uncertainty": "x", + "integration": "x", + "open_questions": "x", + } + }, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "handoff_missing" for rule, _ in advisories) + assert not any(rule == "handoff_schema_invalid" for rule, _ in advisories) + + +@pytest.mark.parametrize( + "metadata_shape", + [ + {}, # absent metadata.handoff + {"handoff": {}}, # empty-dict metadata.handoff + {"handoff": None}, # explicit-null metadata.handoff + ], + ids=["absent", "empty_dict", "null"], +) +def test_advisory_when_pact_work_task_completes_without_handoff( + metadata_shape, pact_context, +): + """A pact-* work Task that transitions to status=completed without a + metadata.handoff payload fires the handoff_missing advisory. Covers + three variants of "no handoff": absent key, empty dict, explicit + null. The schema-invalid rule must NOT also fire — handoff_missing + and handoff_schema_invalid are disjoint per the gate contract. + """ + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "42", "status": "completed"}, + "tool_response": { + "task": { + "id": "42", + "subject": "implement foo", + "owner": "pact-backend-coder", + "metadata": metadata_shape, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "handoff_missing" for rule, _ in advisories), ( + f"expected handoff_missing advisory for {metadata_shape}, " + f"got: {advisories}" + ) + assert not any(rule == "handoff_schema_invalid" for rule, _ in advisories), ( + "handoff_missing and handoff_schema_invalid must be disjoint; " + f"both fired for {metadata_shape}" + ) + + +# ============================================================================= +# handoff_schema_invalid — handoff present but schema malformed (disjoint from handoff_missing) +# ============================================================================= + + +@pytest.mark.parametrize( + "missing_field", + [ + "produced", + "decisions", + "reasoning_chain", + "uncertainty", + "integration", + "open_questions", + ], +) +def test_advisory_for_each_missing_required_field(missing_field, pact_context): + """Handoff present but missing one required field → handoff_schema_invalid. Disjoint + from handoff_missing — handoff_missing fires only on missing/empty handoff payload entirely. + """ + pact_context(team_name="test-team", session_id="test-session") + full_handoff = { + "produced": "x", + "decisions": "x", + "reasoning_chain": "x", + "uncertainty": "x", + "integration": "x", + "open_questions": "x", + } + full_handoff.pop(missing_field) + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "42", "status": "completed"}, + "tool_response": { + "task": { + "id": "42", + "subject": "implement foo", + "owner": "pact-backend-coder", + "metadata": {"handoff": full_handoff}, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "handoff_schema_invalid" for rule, _ in advisories), ( + f"expected handoff_schema_invalid advisory for missing {missing_field}, got: {advisories}" + ) + # handoff_missing must NOT also fire — disjoint per impl / lead clarification. + assert not any(rule == "handoff_missing" for rule, _ in advisories) + + +def test_advisory_when_handoff_is_non_dict(pact_context): + """metadata.handoff is a string instead of a dict → handoff_schema_invalid advisory.""" + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "42", "status": "completed"}, + "tool_response": { + "task": { + "id": "42", + "subject": "implement foo", + "owner": "pact-backend-coder", + "metadata": {"handoff": "just a string"}, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "handoff_schema_invalid" for rule, _ in advisories) + + +# ============================================================================= +# self_completion carve-outs — secretary, signal task, recursion marker, unresolvable actor +# ============================================================================= + + +def test_silent_when_secretary_self_completes(pact_context): + """Secretary owner is in SELF_COMPLETE_EXEMPT_AGENTS → no advisory.""" + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + "agent_id": "secretary@test-team", + "tool_input": {"taskId": "5", "status": "completed"}, + "tool_response": { + "task": { + "id": "5", + "subject": "save institutional memory", + "owner": "secretary", + "metadata": { + "handoff": { + "produced": "x", + "decisions": "x", + "reasoning_chain": "x", + "uncertainty": "x", + "integration": "x", + "open_questions": "x", + } + }, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "self_completion" for rule, _ in advisories) + + +def test_silent_when_signal_task_self_completes(pact_context): + """Signal task (metadata.completion_type='signal' AND + metadata.type in {'blocker','algedonic'}) is exempted by + is_self_complete_exempt(task) per shared.intentional_wait L201-L204. + """ + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + "agent_id": "backend-coder-3@test-team", + "tool_input": {"taskId": "6", "status": "completed"}, + "tool_response": { + "task": { + "id": "6", + "subject": "signal: ack", + "owner": "backend-coder-3", + "metadata": { + "completion_type": "signal", + "type": "blocker", + "handoff": { + "produced": "x", + "decisions": "x", + "reasoning_chain": "x", + "uncertainty": "x", + "integration": "x", + "open_questions": "x", + }, + }, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "self_completion" for rule, _ in advisories) + + +def test_skips_when_actor_unresolvable( + pact_context, +): + """Documents an intentional deviation from architect §5.3: that + spec says when ``trustworthy_actor_name`` returns None (no + agent_id, or no ``@`` in agent_id), the gate should still emit a + self-completion advisory. + + The CURRENT implementation (task_lifecycle_gate.py condition + ``actor is not None``) skips the advisory in that case. + + This test encodes the CURRENT skip behavior so a future change + surfaces the deviation deliberately. Resolution tracked in a + follow-up issue (filed at stage-ready). DO NOT 'fix' the gate to + satisfy this test — fix the test only if the architect §5.3 + reconciliation lands. + """ + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + # No agent_id at all → trustworthy_actor_name returns None. + "tool_input": {"taskId": "7", "status": "completed"}, + "tool_response": { + "task": { + "id": "7", + "subject": "implement foo", + "owner": "backend-coder-3", + "metadata": { + "handoff": { + "produced": "x", + "decisions": "x", + "reasoning_chain": "x", + "uncertainty": "x", + "integration": "x", + "open_questions": "x", + } + }, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + # Architect §5.3 would expect a self_completion advisory; current impl skips. Assert SKIP. + assert not any(rule == "self_completion" for rule, _ in advisories), ( + "If self_completion fired here, the gate has been changed to match architect " + "§5.3 (advisory-emit on unresolvable actor). Confirm the change " + "was intentional and update this test + close the follow-up issue." + ) + + +# ============================================================================= +# self_completion — lead-driven completion is silent (actor != owner) +# ============================================================================= + + +def test_silent_when_lead_completes_teammates_task(pact_context): + """team-lead@test-team completing a teammate's task → not self_completion.""" + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + "agent_id": "team-lead@test-team", + "tool_input": {"taskId": "8", "status": "completed"}, + "tool_response": { + "task": { + "id": "8", + "subject": "implement foo", + "owner": "backend-coder-3", + "metadata": { + "handoff": { + "produced": "x", + "decisions": "x", + "reasoning_chain": "x", + "uncertainty": "x", + "integration": "x", + "open_questions": "x", + } + }, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert not any(rule == "self_completion" for rule, _ in advisories) + + +# ============================================================================= +# module-load advisory contract (smoke covers the full helper invoke) +# ============================================================================= + + +def test_runtime_advisory_carries_post_tool_use_event_name(capsys): + """Direct invocation of _emit_load_failure_advisory under simulated + runtime exception → exit 0 (PostToolUse cannot DENY) + hookEventName + 'PostToolUse' in the output. Mirrors smoke S6 with broader assertion. + """ + err = RuntimeError("simulated runtime fail") + with pytest.raises(SystemExit) as exc: + tlg._emit_load_failure_advisory("runtime", err) + assert exc.value.code == 0 + out = json.loads(capsys.readouterr().out.strip()) + hso = out["hookSpecificOutput"] + assert hso["hookEventName"] == "PostToolUse" + assert "additionalContext" in hso + assert "runtime" in hso["additionalContext"] + assert "RuntimeError" in hso["additionalContext"] + + +# ============================================================================= +# Anti-sprawl invariant +# ============================================================================= + + +def test_evaluate_lifecycle_is_single_composition_function(): + """Auditor §11 YELLOW: gate file is 429 LOC. Pin that the F-row rules + compose in a single decision function rather than fragmenting. + """ + import inspect + + public_evaluate_fns = [ + name + for name, obj in inspect.getmembers(tlg, inspect.isfunction) + if name.startswith("evaluate_") and not name.startswith("_") + ] + assert public_evaluate_fns == ["evaluate_lifecycle"], ( + f"expected single evaluate_lifecycle, got {public_evaluate_fns}" + ) + forbidden_prefixes = ( + "_evaluate_f", + "_f8_", + "_f9_", + "_f10_", + "_f11_", + "_f12_", + "_f13_", + ) + fn_names = [ + name for name, _ in inspect.getmembers(tlg, inspect.isfunction) + ] + sprawl = [ + n for n in fn_names if any(n.startswith(p) for p in forbidden_prefixes) + ] + assert not sprawl, f"per-F-row sprawl detected: {sprawl}" + + +# ============================================================================= +# Defensive: malformed stdin / non-target tool / empty advisories path +# ============================================================================= + + +def test_main_no_op_for_unrelated_tool(capsys): + """matcher should already restrict, but defensive belt: tool_name='Read' + → suppressOutput, exit 0. + """ + code, out = _capture_main({"tool_name": "Read"}, capsys) + assert code == 0 + assert out == {"suppressOutput": True} + + +def test_main_no_op_on_malformed_stdin(capsys): + """Malformed JSON → fail-OPEN with suppressOutput.""" + with patch.object(sys, "stdin", io.StringIO("not json")): + with pytest.raises(SystemExit) as exc: + tlg.main() + assert exc.value.code == 0 + out = capsys.readouterr().out.strip() + assert json.loads(out) == {"suppressOutput": True} diff --git a/pact-plugin/tests/test_task_lifecycle_gate_smoke.py b/pact-plugin/tests/test_task_lifecycle_gate_smoke.py new file mode 100644 index 00000000..96a087f7 --- /dev/null +++ b/pact-plugin/tests/test_task_lifecycle_gate_smoke.py @@ -0,0 +1,216 @@ +""" +Smoke tests for task_lifecycle_gate.py — PostToolUse hook enforcing +PACT lifecycle rules (#662 Commit 3). + +NOT comprehensive coverage — that is TEST-phase scope. These cases lock +the load-bearing decisions in place so a future regression surfaces fast: + + S1. teachback_addblocks_missing advisory: TEACHBACK Task created without addBlocks + S2. work_addblockedby_missing advisory: pact-* Task created without addBlockedBy + S3. handoff_missing advisory: pact-* work-Task completed with empty metadata.handoff + S4. self_completion advisory + writeback: teammate self-completes; assert + metadata.completion_disputed=True AND gate_writeback=True written + to disk + S5. self_completion recursion-marker self-skip: tool_input.metadata.gate_writeback + already True → no advisories, no writeback (counter-test) + S6. Module-load fail-closed counter-test: simulate cross-package import failure + via the public _emit_load_failure_advisory helper → advisory emitted + with hookEventName + exit 0 +""" + +import io +import json +import sys +from pathlib import Path +from unittest.mock import patch + +import pytest + + +sys.path.insert(0, str(Path(__file__).parent.parent / "hooks")) + +import task_lifecycle_gate as tlg # noqa: E402 + + +# ─── helpers ─────────────────────────────────────────────────────────────── + + +def _stdin(payload: dict) -> io.StringIO: + return io.StringIO(json.dumps(payload)) + + +def _capture_main(payload: dict, capsys) -> tuple[int, dict | None]: + """Run main() with payload as stdin; return (exit_code, parsed_stdout).""" + with patch.object(sys, "stdin", _stdin(payload)): + with pytest.raises(SystemExit) as exc: + tlg.main() + raw_code = exc.value.code if exc.value.code is not None else 0 + code = int(raw_code) if isinstance(raw_code, int) else 0 + out = capsys.readouterr().out.strip() + parsed = json.loads(out) if out else None + return code, parsed + + +# ─── S1: teachback_addblocks_missing — TEACHBACK Task created without addBlocks ─ + + +def test_teachback_create_without_addblocks(pact_context, capsys): + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskCreate", + "tool_input": { + "subject": "preparer: TEACHBACK for foo", + "owner": "pact-preparer", + # addBlocks deliberately omitted + }, + "tool_response": {}, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "teachback_addblocks_missing" for rule, _ in advisories), ( + f"expected teachback_addblocks_missing advisory, got: {advisories}" + ) + + +# ─── S2: work_addblockedby_missing — pact-* Task created without addBlockedBy ─── + + +def test_work_task_create_without_addblockedby(pact_context, capsys): + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskCreate", + "tool_input": { + "subject": "implement foo", + "owner": "pact-backend-coder", + # addBlockedBy deliberately omitted + }, + "tool_response": {}, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "work_addblockedby_missing" for rule, _ in advisories), ( + f"expected work_addblockedby_missing advisory, got: {advisories}" + ) + + +# ─── S3: handoff_missing — pact-* work-Task completed without metadata.handoff ── + + +def test_work_task_completed_without_handoff(pact_context, capsys): + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + "tool_input": {"taskId": "42", "status": "completed"}, + "tool_response": { + "task": { + "id": "42", + "subject": "implement foo", + "owner": "pact-backend-coder", + "metadata": {}, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "handoff_missing" for rule, _ in advisories), ( + f"expected handoff_missing advisory, got: {advisories}" + ) + # handoff_schema_invalid must NOT also fire — disjoint per lead clarification. + assert not any(rule == "handoff_schema_invalid" for rule, _ in advisories) + + +# ─── S4: self_completion — teammate self-completes → advisory + FS writeback ──── + + +def test_self_completion_writeback(tmp_path, monkeypatch, pact_context): + pact_context(team_name="test-team", session_id="test-session") + + # Stage a fake task on disk under HOME/.claude/tasks/test-team/. + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + tasks_dir = tmp_path / ".claude" / "tasks" / "test-team" + tasks_dir.mkdir(parents=True) + task_path = tasks_dir / "99.json" + task_payload = { + "id": "99", + "subject": "implement foo", + "owner": "backend-coder-3", + "metadata": {}, + } + task_path.write_text(json.dumps(task_payload), encoding="utf-8") + + # agent_id=name@team format → trustworthy_actor_name returns "backend-coder-3". + payload = { + "tool_name": "TaskUpdate", + "agent_id": "backend-coder-3@test-team", + "tool_input": {"taskId": "99", "status": "completed"}, + "tool_response": { + "task": { + "id": "99", + "subject": "implement foo", + "owner": "backend-coder-3", + "metadata": { + "handoff": { # well-formed so handoff_missing/handoff_schema_invalid don't also fire + "produced": "x", + "decisions": "x", + "reasoning_chain": "x", + "uncertainty": "x", + "integration": "x", + "open_questions": "x", + } + }, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert any(rule == "self_completion" for rule, _ in advisories), ( + f"expected self_completion advisory, got: {advisories}" + ) + + # Writeback must have landed on disk with both markers. + written = json.loads(task_path.read_text(encoding="utf-8")) + assert written["metadata"]["completion_disputed"] is True + assert written["metadata"]["gate_writeback"] is True + + +# ─── S5: self_completion recursion-marker self-skip ───────────────────────────── + + +def test_recursion_marker_self_skip(pact_context): + pact_context(team_name="test-team", session_id="test-session") + payload = { + "tool_name": "TaskUpdate", + "agent_id": "backend-coder-3@test-team", + "tool_input": { + "taskId": "99", + "status": "completed", + # gate_writeback=True → step ① short-circuit + "metadata": {"gate_writeback": True, "completion_disputed": True}, + }, + "tool_response": { + "task": { + "id": "99", + "subject": "implement foo", + "owner": "backend-coder-3", + "metadata": {"gate_writeback": True, "completion_disputed": True}, + } + }, + } + advisories = tlg.evaluate_lifecycle(payload) + assert advisories == [], ( + f"expected silent skip on recursion marker, got: {advisories}" + ) + + +# ─── S6: Module-load fail-closed-as-advisory simulation ───────────────────────── + + +def test_load_failure_emits_advisory_exit_0(capsys): + err = ImportError("simulated cross-package import failure") + with pytest.raises(SystemExit) as exc: + tlg._emit_load_failure_advisory("module imports", err) + # PostToolUse cannot DENY — exit 0, advisory output, hookEventName present. + assert exc.value.code == 0 + parsed = json.loads(capsys.readouterr().out.strip()) + hso = parsed["hookSpecificOutput"] + assert hso["hookEventName"] == "PostToolUse" + assert "additionalContext" in hso + assert "task_lifecycle_gate" in hso["additionalContext"] + assert "ImportError" in hso["additionalContext"]