From 40d9b1c27569a417085f57d92a6e96f2c552b671 Mon Sep 17 00:00:00 2001
From: Alex <alex@bytescribe.com>
Date: Wed, 10 Jun 2026 08:08:20 -0400
Subject: [PATCH] =?UTF-8?q?feat:=20loop-age=20features=20=E2=80=94=20guard?=
 =?UTF-8?q?s.protect,=20SubagentStop,=20check=20--against,=20progress-awar?=
 =?UTF-8?q?e=20bounces?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four features that put donegate inside agentic fan-out workflows instead
of only at the session's terminal stop:

- guards.protect + no_protected_edits: pin the files the checks *mean*
  (package.json scripts, lint/test configs). Hashed into the baseline
  like the donefile; changed, deleted, or newly shadowing files are
  findings. Falls back to the git diff when there is no baseline, so it
  works in CI. Closes the '"test": "exit 0"' indirection hole.

- SubagentStop adapter (Claude Code): donegate install claude now wires
  `donegate hook claude --subagent` — a guards-only tamper scan at every
  subagent boundary. No checks run, so fan-outs are gated per node at
  git-diff cost; findings bounce the subagent while it still has the
  context to undo them. Subagent bounces keep their own ledger so a
  noisy fan-out can't burn the terminal gate's budget.

- donegate check --against <ref>: judge mode. Evaluates checks + guards
  against an explicit ref, ignoring the session baseline — grade each
  worktree against its fork point from a workflow script, pin CI to the
  PR base, or re-derive a verdict past a re-blessed baseline. Receipts
  record kind "explicit"; a nonexistent ref is a config error (exit 2),
  never a silent pass.

- Progress-aware bounce budget: gate.max_bounces now counts consecutive
  bounces without new progress. A stop attempt with strictly fewer
  failing checks + tripped guards than the session's best refreshes the
  budget (and says so in the reason). Best-ever is the bar, so
  oscillating failure sets can't farm refreshes and total bounces stay
  bounded — loop-until-done semantics without the hostage situation.

Docs: new docs/agent-loops.md (terminal gate / per-node scan / judge
mode, worktree behavior), spec + hooks + threat-model + README updated.
An empty guards.protect adds no receipt noise for existing repos.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 CHANGELOG.md          |  23 +++++++++
 README.md             |  25 +++++++---
 docs/agent-loops.md   |  90 +++++++++++++++++++++++++++++++++++
 docs/hooks.md         |  33 +++++++++----
 docs/spec.md          |  32 +++++++++++--
 docs/threat-model.md  |  18 ++++---
 src/baseline.ts       |  18 +++++++
 src/check.ts          |  22 +++++++--
 src/cli.ts            |  17 +++++--
 src/donefile.ts       |  11 ++++-
 src/guards.ts         |  61 +++++++++++++++++++++++-
 src/hooks.ts          |  79 +++++++++++++++++++++++++------
 src/install.ts        |  20 ++++++--
 src/types.ts          |  15 +++++-
 test/donefile.test.ts |  10 ++++
 test/guards.test.ts   | 106 +++++++++++++++++++++++++++++++++++++++++-
 test/helpers.ts       |   4 ++
 test/hooks.test.ts    |  79 +++++++++++++++++++++++++++++++
 test/install.test.ts  |  26 +++++++++++
 19 files changed, 635 insertions(+), 54 deletions(-)
 create mode 100644 docs/agent-loops.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 61668e0..e911fb1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,29 @@
 
 ## Unreleased
 
+- **`guards.protect` + `no_protected_edits`** — pin the files your checks
+  *mean* (package.json scripts, lint/test/build configs). They're hashed into
+  the baseline like the donefile itself; any change, deletion, or new
+  shadowing file is a finding. Closes the `"test": "exit 0"` hole. Falls back
+  to the git diff when there's no baseline, so it works in CI too.
+- **`SubagentStop` adapter (Claude Code)** — `donegate install claude` now
+  also wires a guards-only tamper scan at every subagent boundary
+  (`donegate hook claude --subagent`). No checks run, so fan-out workflows are
+  gated per node at git-diff cost; subagent bounces use their own ledger.
+- **`donegate check --against <ref>`** — judge mode: evaluate checks + guards
+  against an explicit git ref, ignoring the session baseline. Makes donegate
+  scriptable as the deterministic judge in fan-out workflows (grade each
+  worktree against its fork point) and re-derives verdicts from git history
+  alone. Receipts record the comparison as `explicit`; a nonexistent ref is a
+  config error, not a silent pass.
+- **Progress-aware bounce budget** — `gate.max_bounces` now counts
+  *consecutive bounces without new progress*: a stop attempt with strictly
+  fewer failing checks + tripped guards than the session's best refreshes the
+  budget (and says so). An agent steadily fixing a long list is never cut off
+  mid-fix; "best ever" as the bar keeps total bounces bounded.
+- `docs/agent-loops.md` — where donegate sits in agentic loops and dynamic
+  workflows: terminal gate, per-subagent guard scan, judge mode, worktree
+  behavior.
 - **The donefile can no longer be deleted or broken out of the way.** The stop
   hook used to treat a missing DONE.md as "not my repo" and an unparseable one
   as a config typo — both fail-open, both one `rm` or one bad edit away from
diff --git a/README.md b/README.md
index d23f256..f74c063 100644
--- a/README.md
+++ b/README.md
@@ -139,9 +139,12 @@ guards:
   no_done_edits: true      # this file edited mid-session         → fail
   no_new_todos: warn
   no_debug_artifacts: warn
+  protect:                 # files that define what the checks MEAN
+    - package.json         # ("test": "exit 0" is not a fix)
+    - eslint.config.js
 
 gate:
-  max_bounces: 3           # re-prompts per session before giving up
+  max_bounces: 3           # no-progress re-prompts before giving up
 ```
 ````
 
@@ -162,6 +165,7 @@ tries to finish, it diffs reality against that baseline:
 | `no_deleted_tests` | deleted test files, per-file test counts dropping | fail |
 | `no_disabled_lint` | `eslint-disable` `biome-ignore` `@ts-ignore` `# noqa` `# type: ignore` `//nolint` `#[allow(...)]` `@SuppressWarnings` `rubocop:disable` — added anywhere | fail |
 | `no_done_edits` | DONE.md modified or deleted mid-session | fail |
+| `no_protected_edits` | files listed in `guards.protect` (package.json, lint/test configs — the files that define what the checks *mean*) changed, deleted, or shadowed | fail |
 | `no_new_todos` | `TODO` / `FIXME` / `HACK` introduced in code | warn |
 | `no_debug_artifacts` | `console.log` `debugger` `breakpoint()` `pdb.set_trace` `binding.pry` `dbg!` left in non-test code | warn |
 
@@ -176,16 +180,20 @@ followed, so moving a test file is never "deleting" it.
 
 Guards are a **ratchet, not a sandbox**: they make the cheap, common shortcuts
 loud and expensive, with receipts. An agent with shell access can still find
-quieter moves — weakening assertions, redefining what `npm test` means in
-package.json, re-blessing the baseline itself. What the gate catches, what it
-deliberately doesn't, and why CI is the copy of the gate an agent can't touch:
-[docs/threat-model.md](docs/threat-model.md).
+quieter moves — weakening assertions, re-blessing the baseline itself. What
+the gate catches, what it deliberately doesn't, and why CI is the copy of the
+gate an agent can't touch: [docs/threat-model.md](docs/threat-model.md).
+
+Running fan-out workflows with subagents and worktrees? donegate gates those
+boundaries too — a guards-only scan at every `SubagentStop`, and
+`check --against <ref>` as the deterministic judge over any diff:
+[docs/agent-loops.md](docs/agent-loops.md).
 
 ## Works with
 
 | | command | mechanism |
 |---|---|---|
-| **Claude Code** | `donegate install claude` | `Stop` hook — blocks the stop, feeds failures back |
+| **Claude Code** | `donegate install claude` | `Stop` hook — blocks the stop, feeds failures back · `SubagentStop` — guards-only scan per subagent |
 | **Codex CLI** | `donegate install codex` | `Stop` hook (`.codex/hooks.json`) |
 | **Cursor** | `donegate install cursor` | `stop` hook → `followup_message` |
 | **GitHub Actions** | `donegate install ci` | gates PRs, posts the receipt as a comment |
@@ -239,6 +247,11 @@ is not the agent's to edit.
 **Won't it delete the failing test?** That trips `no_deleted_tests` — file
 deletions *and* per-file test-count drops.
 
+**Won't it just change what `npm test` means in package.json?** List the
+files your checks depend on in `guards.protect` and that trips
+`no_protected_edits` — they're hashed into the baseline like the donefile
+itself.
+
 **Does this replace CI?** No — it runs *before* the agent declares victory,
 while it still has context to fix things. CI stays as the backstop (and
 `donegate install ci` makes CI speak DONE.md too).
diff --git a/docs/agent-loops.md b/docs/agent-loops.md
new file mode 100644
index 0000000..b6a5383
--- /dev/null
+++ b/docs/agent-loops.md
@@ -0,0 +1,90 @@
+# donegate in agent loops
+
+Coding agents run a loop: gather context → take action → verify → repeat.
+Increasingly that loop fans out — orchestrators spawn subagents, subagents get
+their own worktrees, workflow scripts coordinate the lot. donegate has a
+specific seat at three points of that topology, and this page maps them.
+
+## The three seats
+
+| where | mechanism | what runs | cost |
+|---|---|---|---|
+| **terminal stop** | `Stop` hook | full gate: checks + guards | your test suite |
+| **subagent boundary** | `SubagentStop` hook (`hook claude --subagent`) | guards only | git diffs + regexes — fast |
+| **judge in a fan-out** | `donegate check --against <ref> --json` | checks + guards vs an explicit ref | your call (use `--only` to scope) |
+
+### Terminal stop — the gate on the loop's exit
+
+The classic donegate role: the agent tries to finish, the gate runs the
+repo's definition of done, failure bounces the agent back with the report in
+its context. This is the **deterministic verifier** in the loop's
+verify-work phase — exit codes and diffs, no LLM judging anything, which also
+means it can't share an LLM judge's self-preference for the code that was
+just written.
+
+### Subagent boundary — tamper scan per node
+
+A full test suite per subagent would be brutal; a tamper scan isn't. The
+`SubagentStop` hook (installed automatically by `donegate install claude`)
+runs **guards only**: did this subagent skip or delete tests, silence the
+linter, touch a protected file, edit the donefile? Findings block the
+subagent's completion the same way the stop hook blocks the session — the
+finding lands while the subagent still has the context to undo it, instead of
+surfacing at the terminal stop after its output was already absorbed.
+
+Read-only subagents (searchers, reviewers) change nothing, trip nothing, and
+pay one git diff. Subagent bounces are tracked in their own ledger
+(`<session>:subagent`), so a noisy fan-out can't burn the bounce budget the
+terminal gate relies on.
+
+### Judge mode — `--against` in workflow scripts
+
+Fan-out patterns end with verification: N agents produced N diffs, something
+deterministic should grade them before anything merges. `--against` pins the
+comparison to an explicit ref — the worktree's fork point, the PR base —
+instead of whatever baseline/merge-base resolution would guess. With `--json`
+the receipt is machine-readable; the exit code is the verdict
+(0 done / 1 checks failed / 3 bar was lowered).
+
+```js
+// inside a workflow script: judge each worktree before accepting it
+const verdict = await bash(
+  `cd ${worktree} && npx -y donegate check --against ${forkPoint} --json --quiet`,
+);
+// exit 0 → accept; exit 3 → the diff "passes" because the bar moved — reject loudly
+```
+
+`--against` deliberately **ignores the session baseline** — judge mode judges
+a diff, not a session. That also makes it the answer to a re-blessed
+baseline: `donegate check --against origin/main` re-derives the verdict from
+git history alone.
+
+## Worktree behavior
+
+Linked worktrees get their own `.donegate/` (it's per-root and gitignored).
+Inside a fresh worktree there is usually **no session baseline**, so guards
+fall back to git comparisons — added-line scans against HEAD or merge-base
+still work; baseline-only detections (count drops in untouched files,
+protected-file hashes) degrade gracefully. For full-strength guards in a
+worktree, record a baseline when it's created (`donegate baseline`) or judge
+it from outside with `--against <fork point>`.
+
+## Loop-until-done, bounded
+
+A fixed bounce cap fights the loop: an agent steadily fixing a long failure
+list gets cut off mid-fix. donegate's budget counts **consecutive bounces
+without new progress** — when a stop attempt's failure count (failing checks +
+tripped guards) drops below the session's best, the budget refreshes and the
+agent is told so. "Best ever" is the bar, not "better than last time," so
+oscillating between two failure sets can't farm refreshes; total bounces stay
+bounded and a wedged session still exits with a red receipt.
+
+## What this does not change
+
+The loop's failure modes that donegate addresses are the *mechanical* ones:
+declaring done early (agentic laziness that trips a check), lowering the bar
+to get green (guards), drifting past the definition of done (DONE.md is
+re-read from disk every stop — compaction can't summarize it away). The
+*semantic* failure modes — weakened assertions, vacuous tests, an agent
+grading its own homework — still need a clean-context reviewer or a human;
+see [threat-model.md](threat-model.md) for the honest boundary.
diff --git a/docs/hooks.md b/docs/hooks.md
index 05bd7da..bc711f2 100644
--- a/docs/hooks.md
+++ b/docs/hooks.md
@@ -1,12 +1,18 @@
 # Agent integrations
 
-`donegate install <target>` wires the gate into an agent's lifecycle. Two hooks
-get installed per agent:
+`donegate install <target>` wires the gate into an agent's lifecycle:
 
 - **session start** → `donegate baseline --if-missing --quiet` — snapshots
-  test files and DONE.md so the tamper guards have something to diff against.
+  test files, protected files, and DONE.md so the tamper guards have something
+  to diff against.
 - **stop** → `donegate hook <agent>` — runs the full gate when the agent tries
   to finish, and blocks the stop (with the failure report) if the verdict is red.
+- **subagent stop** (Claude Code only) → `donegate hook claude --subagent` —
+  a **guards-only** tamper scan at every subagent boundary. No checks run, so
+  it's cheap enough to pay per subagent; a subagent that skipped tests or
+  touched a protected file is bounced while it still has the context to undo
+  it. Subagent bounces use their own ledger so a noisy fan-out can't burn the
+  terminal gate's budget.
 
 Project-level installs are the default and are **shareable** — commit the config
 and every teammate's agent is gated too. Add `--global` to install at the user
@@ -29,6 +35,9 @@ budget — keep their sum under the stop timeout.
     "Stop": [
       { "hooks": [{ "type": "command", "command": "npx -y donegate hook claude" }] }
     ],
+    "SubagentStop": [
+      { "hooks": [{ "type": "command", "command": "npx -y donegate hook claude --subagent" }] }
+    ],
     "SessionStart": [
       { "hooks": [{ "type": "command", "command": "npx -y donegate baseline --if-missing --quiet" }] }
     ]
@@ -37,7 +46,9 @@ budget — keep their sum under the stop timeout.
 ```
 
 On a red verdict the hook prints `{"decision": "block", "reason": "<report>"}`
-— Claude Code keeps the session going and feeds the report to the model.
+— Claude Code keeps the session going and feeds the report to the model. The
+`SubagentStop` entry speaks the same contract but runs guards only (see
+[agent-loops.md](agent-loops.md)).
 
 ## Codex CLI
 
@@ -85,10 +96,16 @@ because tests were deleted" is visible right in the review.
 
 A stop hook that can block forever is a hostage situation, so every block
 increments a per-session bounce counter (`.donegate/state.json`, pruned after
-24h). After `gate.max_bounces` (default 3) the gate stops blocking and lets the
-stop through with a loud warning — but it keeps verifying, so the receipt
-always tells the truth. Sessions that recover reset their counter on the first
-green run.
+24h). After `gate.max_bounces` (default 3) **consecutive attempts without new
+progress** the gate stops blocking and lets the stop through with a loud
+warning — but it keeps verifying, so the receipt always tells the truth.
+
+Progress refreshes the budget: when a stop attempt's failure count (failing
+checks + tripped guards) drops strictly below the session's best so far, the
+counter resets and the agent is told so — a session steadily fixing a long
+list is never cut off mid-fix. "Best ever" is the bar rather than "better
+than last time", so oscillating between failure sets can't farm refreshes and
+total bounces stay bounded. Sessions that go green reset entirely.
 
 ## When the gate itself is the target
 
diff --git a/docs/spec.md b/docs/spec.md
index aab16be..d38cade 100644
--- a/docs/spec.md
+++ b/docs/spec.md
@@ -40,14 +40,24 @@ guards:               # optional — tamper detection levels
   no_disabled_lint: true     # eslint-disable/noqa/@ts-ignore/nolint added
   no_new_todos: warn         # TODO/FIXME/HACK introduced
   no_debug_artifacts: warn   # console.log/debugger/pdb.set_trace left behind
+  no_protected_edits: true   # files matching `protect` changed mid-session
   test_globs:                # optional — what counts as a test file
     ["**/*.test.*", "**/*.spec.*", "**/test_*.py", "**/*_test.go", "..."]
   exclude: []                # optional — files exempt from guard analysis
                              # (for code that legitimately CONTAINS the
                              # patterns: lint configs, scanners, donegate itself)
+  protect: []                # optional — globs for files the verdict depends on
+                             # but the gate doesn't run: the files that define
+                             # what the check commands MEAN (package.json,
+                             # eslint/jest/pytest/tsconfig configs). Hashed into
+                             # the baseline; any change, deletion, or new
+                             # shadowing file trips no_protected_edits.
 
 gate:                 # optional
-  max_bounces: 3      # stop-hook re-prompts per session before giving up (1-20)
+  max_bounces: 3      # consecutive no-progress stop-hook re-prompts per
+                      # session before giving up (1-20); progress — a strictly
+                      # lower failing-check + tripped-guard count than the
+                      # session's best — refreshes the budget
 ```
 
 Guard levels: `true` (findings fail the gate), `"warn"` (findings are reported
@@ -73,10 +83,17 @@ pass?"* Guards ask ***"was the bar lowered so it would pass?"*** They compare
 the current tree against a **baseline**:
 
 1. a **session baseline** recorded when an agent session starts (test-file
-   hashes, test/skip counts, the DONE.md hash, and the git HEAD at that moment), or
+   hashes, test/skip counts, hashes of `guards.protect` files, the DONE.md
+   hash, and the git HEAD at that moment), or
 2. **HEAD**, when there's uncommitted work and no session baseline, or
 3. the **merge-base with the default branch**, for clean trees (the CI case).
 
+An **explicit ref** (`donegate check --against <ref>`) overrides all three,
+including the session baseline: judge mode evaluates a diff, not a session.
+The verdict is then derivable from git history alone — useful for grading
+fan-out worktrees from a workflow script, pinning CI to the PR base, or
+re-deriving a verdict past a re-blessed baseline.
+
 All guard findings are deterministic, diff-based, and cite `file:line`
 evidence. Guards never call a model and never make network requests.
 
@@ -110,9 +127,14 @@ tries to finish:
   output tails, guard findings with file:line) is fed back to the agent, which
   keeps working. Each block increments a per-session **bounce counter**.
 - **pass** → the stop proceeds; the bounce counter resets; the receipt is green.
-- **bounces exhausted** (`gate.max_bounces`) → the gate stops *blocking* but
-  never stops *verifying*: the stop is allowed with a loud warning and a red
-  receipt. The gate must not be able to trap an agent in an infinite loop.
+- **progress** → a stop attempt whose failure count (failing checks + tripped
+  guards) is strictly below the session's best **refreshes the bounce budget**:
+  an agent steadily working down a list is never cut off mid-fix. Best-ever is
+  the bar, so alternating between failure sets cannot farm refreshes.
+- **bounces exhausted** (`gate.max_bounces` consecutive attempts without new
+  progress) → the gate stops *blocking* but never stops *verifying*: the stop
+  is allowed with a loud warning and a red receipt. The gate must not be able
+  to trap an agent in an infinite loop.
 - a repo **without** a DONE.md → the hook is a silent no-op. A **broken**
   DONE.md → warn and allow (a config typo must never wedge an agent).
 - user-initiated aborts are never blocked.
diff --git a/docs/threat-model.md b/docs/threat-model.md
index 5ec6930..5434f35 100644
--- a/docs/threat-model.md
+++ b/docs/threat-model.md
@@ -20,6 +20,8 @@ Deterministic, diff-based, with `file:line` receipts:
 | DONE.md edited mid-session | `no_done_edits` → exit 3 |
 | DONE.md **deleted** mid-session | stop bounced — the baseline remembers it existed |
 | DONE.md **broken** mid-session (no longer parses) | stop bounced — its hash no longer matches the baseline |
+| a `guards.protect` file changed/deleted/shadowed (package.json, lint config) | `no_protected_edits` → exit 3 |
+| a subagent tampering inside a fan-out | `SubagentStop` guard scan bounces it at its own boundary |
 
 Exit 3 — *"checks pass but the bar was lowered to get there"* — is
 deliberately a distinct, alarmable signal.
@@ -35,12 +37,16 @@ that rather than guess it:
   count stays flat), updating snapshots to bake a bug in, or writing both the
   bug and the test that blesses it. Catching these means judging *meaning*; a
   diff and a regex can't, and donegate refuses to guess.
-- **Command indirection.** DONE.md is hash-guarded, but `run: npm test`
-  resolves through package.json — and package.json, jest.config, pytest.ini,
-  tsconfig, and the linter config are all agent-editable. `"test": "exit 0"`
-  is green. Mitigations: prefer direct invocations in your checks
-  (`npx vitest run` rather than `npm test`) and treat tooling-config diffs as
-  review-worthy, because they are.
+- **Command indirection — unless you pin it.** DONE.md is hash-guarded, but
+  `run: npm test` resolves through package.json — and package.json,
+  jest.config, pytest.ini, tsconfig, and the linter config are all
+  agent-editable. `"test": "exit 0"` is green. The fix is `guards.protect`:
+  list those files and they're hashed into the baseline; any change, deletion,
+  or new shadowing config trips `no_protected_edits`. What protect *can't* do
+  is know which files you forgot to list — prefer direct invocations in your
+  checks (`npx vitest run` rather than `npm test`), protect the configs those
+  tools read, and treat tooling-config diffs as review-worthy, because they
+  are.
 - **Attacks on donegate's own state.** An agent with shell access can run
   `donegate baseline` to bless its own donefile edit, hand-edit
   `.donegate/state.json` to burn the bounce budget, or patch
diff --git a/src/baseline.ts b/src/baseline.ts
index 0525743..e5f7b84 100644
--- a/src/baseline.ts
+++ b/src/baseline.ts
@@ -203,6 +203,24 @@ export async function createBaseline(config: DoneConfig): Promise<Baseline> {
     donefile_path: path.relative(config.root, config.sourcePath).split(path.sep).join('/'),
     test_files: entries,
   };
+
+  // Pin the files the verdict depends on but the gate doesn't run — the ones
+  // that define what the check commands *mean* (package.json scripts,
+  // lint/test configs). No size cap: a lockfile is large and is exactly the
+  // kind of file worth pinning.
+  if (config.guards.protect.length > 0) {
+    const protectedEntries: Record<string, { sha: string }> = {};
+    for (const rel of walk(config.root, makeTestFileMatcher(config.guards.protect))) {
+      try {
+        const key = rel.split(path.sep).join('/');
+        protectedEntries[key] = { sha: sha256(fs.readFileSync(path.join(config.root, rel))) };
+      } catch {
+        // unreadable — skip
+      }
+    }
+    baseline.protected_files = protectedEntries;
+  }
+
   return baseline;
 }
 
diff --git a/src/check.ts b/src/check.ts
index 658a669..b290e82 100644
--- a/src/check.ts
+++ b/src/check.ts
@@ -1,5 +1,6 @@
 import type { CheckRunSummary, DoneConfig, Receipt } from './types.js';
-import { loadConfig } from './donefile.js';
+import { DonefileError, loadConfig } from './donefile.js';
+import { refExists } from './git.js';
 import { resolveComparison, runGuards } from './guards.js';
 import { runChecks } from './runner.js';
 import { buildReceipt, writeReceipt } from './receipt.js';
@@ -11,6 +12,13 @@ export interface CheckOptions {
   only?: string[];
   /** Skip tamper guards entirely. */
   noGuards?: boolean;
+  /** Skip checks entirely — guards only (the subagent-boundary fast path). */
+  noChecks?: boolean;
+  /**
+   * Compare against this git ref instead of the session baseline / HEAD /
+   * merge-base (the CLI's `--against`). Judge mode: evaluates a diff.
+   */
+  comparisonRef?: string;
   via?: Receipt['via'];
   onCheckResult?: (result: CheckResult, index: number) => void;
   /** Pre-loaded config (skips discovery). */
@@ -32,9 +40,17 @@ export async function verify(options: CheckOptions = {}): Promise<CheckRunSummar
   const config = options.config ?? loadConfig(cwd);
   const startedAt = new Date();
 
-  const comparison = await resolveComparison(config);
+  if (options.comparisonRef && !(await refExists(options.comparisonRef, config.root))) {
+    // A judge that silently judges nothing is worse than no judge — a bad ref
+    // would make every git diff come back empty and every guard pass.
+    throw new DonefileError(`--against ref "${options.comparisonRef}" is not a commit in this repository`);
+  }
 
-  const checks = await runChecks(config.checks, config.root, options.onCheckResult, options.only);
+  const comparison = await resolveComparison(config, options.comparisonRef);
+
+  const checks = options.noChecks
+    ? []
+    : await runChecks(config.checks, config.root, options.onCheckResult, options.only);
 
   const guards = options.noGuards ? [] : await runGuards(config, comparison);
 
diff --git a/src/cli.ts b/src/cli.ts
index 187d714..e38814a 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -47,11 +47,13 @@ ${bold('COMMANDS')}
 
 ${bold('OPTIONS')}
   check:     --only <names>   run a subset (comma-separated)
+             --against <ref>  judge the diff vs an explicit git ref
              --no-guards      skip tamper guards
              --json           print the receipt as JSON
              --quiet          verdict only
   install:   --global         install to ~/.claude, ~/.codex, or ~/.cursor
   baseline:  --if-missing     only record when no baseline exists
+  hook:      --subagent       guards-only gate (SubagentStop boundaries)
   all:       -h, --help, -V, --version
 
 ${bold('EXIT CODES')}
@@ -133,10 +135,11 @@ async function cmdInit(argv: string[]): Promise<number> {
 }
 
 async function cmdCheck(argv: string[]): Promise<number> {
-  const flags = parseFlags(argv, ['only']);
+  const flags = parseFlags(argv, ['only', 'against']);
   const json = flags.bool.has('json');
   const quiet = flags.bool.has('quiet');
   const only = flags.values.get('only')?.split(',').map((s) => s.trim()).filter(Boolean);
+  const against = flags.values.get('against');
 
   const config = loadConfig(process.cwd());
   if (only) {
@@ -157,6 +160,7 @@ async function cmdCheck(argv: string[]): Promise<number> {
     config,
     only,
     noGuards: flags.bool.has('no-guards'),
+    comparisonRef: against,
     via: 'cli',
     onCheckResult: (result) => {
       if (!json && !quiet) process.stdout.write(renderCheckLine(result) + '\n');
@@ -283,12 +287,17 @@ async function cmdReceipt(argv: string[]): Promise<number> {
 }
 
 async function cmdHook(argv: string[]): Promise<number> {
-  const agent = argv[0] as HookAgent | undefined;
+  const flags = parseFlags(argv);
+  const agent = flags.positional[0] as HookAgent | undefined;
   if (!agent || !['claude', 'codex', 'cursor'].includes(agent)) {
-    fail('usage: donegate hook <claude | codex | cursor>');
+    fail('usage: donegate hook <claude | codex | cursor> [--subagent]');
+  }
+  const subagent = flags.bool.has('subagent');
+  if (subagent && agent !== 'claude') {
+    fail('--subagent is only supported for claude (SubagentStop hooks)');
   }
   const stdin = await readStdin();
-  const outcome = await runStopHook(agent, stdin);
+  const outcome = await runStopHook(agent, stdin, { subagent });
   if (outcome.stdout) process.stdout.write(outcome.stdout + '\n');
   if (outcome.stderr) process.stderr.write(outcome.stderr + '\n');
   return outcome.exitCode;
diff --git a/src/donefile.ts b/src/donefile.ts
index c8db980..592b961 100644
--- a/src/donefile.ts
+++ b/src/donefile.ts
@@ -36,8 +36,10 @@ const DEFAULT_GUARDS: GuardsConfig = {
   no_disabled_lint: true,
   no_new_todos: 'warn',
   no_debug_artifacts: 'warn',
+  no_protected_edits: true,
   test_globs: DEFAULT_TEST_GLOBS,
   exclude: [],
+  protect: [],
 };
 
 /** Bounce budget used when there is no (readable) donefile to say otherwise. */
@@ -156,7 +158,12 @@ export function parseDonefileSource(source: string, sourcePath: string, root: st
     checks.push({ name, run, timeout });
   }
 
-  const guards: GuardsConfig = { ...DEFAULT_GUARDS, test_globs: [...DEFAULT_TEST_GLOBS], exclude: [] };
+  const guards: GuardsConfig = {
+    ...DEFAULT_GUARDS,
+    test_globs: [...DEFAULT_TEST_GLOBS],
+    exclude: [],
+    protect: [],
+  };
   if (data.guards !== undefined) {
     if (!isRecord(data.guards)) throw new DonefileError('"guards" must be a map');
     for (const [key, value] of Object.entries(data.guards)) {
@@ -167,10 +174,12 @@ export function parseDonefileSource(source: string, sourcePath: string, root: st
         case 'no_disabled_lint':
         case 'no_new_todos':
         case 'no_debug_artifacts':
+        case 'no_protected_edits':
           guards[key] = asGuardLevel(value, key);
           break;
         case 'test_globs':
         case 'exclude':
+        case 'protect':
           if (!Array.isArray(value) || value.some((v) => typeof v !== 'string')) {
             throw new DonefileError(`guards.${key} must be a list of glob strings`);
           }
diff --git a/src/guards.ts b/src/guards.ts
index 02abab9..78ff8ac 100644
--- a/src/guards.ts
+++ b/src/guards.ts
@@ -83,9 +83,18 @@ function snippet(text: string): string {
  *  1. a session baseline recorded by `donegate baseline` (hooks do this automatically)
  *  2. HEAD, when there is uncommitted work
  *  3. merge-base with the default branch, when the tree is clean
+ *
+ * An `explicitRef` (the CLI's `--against`) overrides all of that, including
+ * the session baseline: judge mode judges a diff, not a session. The caller
+ * is responsible for validating that the ref exists.
  */
-export async function resolveComparison(config: DoneConfig): Promise<ComparisonContext> {
+export async function resolveComparison(config: DoneConfig, explicitRef?: string): Promise<ComparisonContext> {
   const root = config.root;
+
+  if (explicitRef) {
+    return { kind: 'explicit', ref: explicitRef, baseline: null };
+  }
+
   const inGit = await isGitRepo(root);
   const baseline = loadBaseline(root);
 
@@ -176,9 +185,13 @@ function skippedAll(config: DoneConfig, note: string): GuardResult[] {
     'no_disabled_lint',
     'no_new_todos',
     'no_debug_artifacts',
+    'no_protected_edits',
   ] as const;
   return names
     .filter((n) => config.guards[n] !== false)
+    // An empty guards.protect means the guard is unconfigured, not skipped —
+    // don't add noise to every receipt that never opted in.
+    .filter((n) => n !== 'no_protected_edits' || config.guards.protect.length > 0)
     .map((name) => ({ name, status: 'skipped' as const, findings: [], note }));
 }
 
@@ -224,6 +237,52 @@ export async function runGuards(config: DoneConfig, comparison: ComparisonContex
     results.push(makeResult('no_done_edits', config.guards.no_done_edits, findings));
   }
 
+  // ── no_protected_edits ─────────────────────────────────────────────────────
+  // DONE.md says `run: npm test`, but what `npm test` *means* lives in files
+  // the agent can edit (package.json, lint/test configs). guards.protect pins
+  // them: changed, deleted, or newly shadowed → finding.
+  if (config.guards.protect.length > 0) {
+    const findings: GuardFinding[] = [];
+    const isProtected = makeTestFileMatcher(config.guards.protect);
+    const blessHint = 'if this change is the human\'s, bless it with `donegate baseline`';
+    if (baseline?.protected_files) {
+      const seen = new Set(Object.keys(baseline.protected_files));
+      for (const [file, entry] of Object.entries(baseline.protected_files)) {
+        try {
+          const current = sha256(fs.readFileSync(path.join(config.root, file)));
+          if (current !== entry.sha) {
+            findings.push({
+              file,
+              detail: `protected file modified since the baseline — it defines what the checks mean (${blessHint})`,
+            });
+          }
+        } catch {
+          findings.push({ file, detail: 'protected file is missing — it existed when the baseline was taken' });
+        }
+      }
+      // A *new* file matching protect globs can shadow an existing config
+      // (e.g. a more-local eslint config) — that's a change in meaning too.
+      for (const file of inputs.added.keys()) {
+        if (isProtected(file) && !seen.has(file)) {
+          findings.push({ file, detail: `new file matches guards.protect (${blessHint})` });
+        }
+      }
+    } else {
+      // No baseline (CI, --against, plain diffs): fall back to the git diff.
+      const flagged = new Set<string>();
+      for (const file of inputs.added.keys()) {
+        if (isProtected(file)) flagged.add(file);
+      }
+      for (const file of [...inputs.modifiedPaths, ...inputs.deletedPaths]) {
+        if (isProtected(file)) flagged.add(file);
+      }
+      for (const file of flagged) {
+        findings.push({ file, detail: 'protected file changed in this diff — it defines what the checks mean' });
+      }
+    }
+    results.push(makeResult('no_protected_edits', config.guards.no_protected_edits, findings));
+  }
+
   // ── no_deleted_tests ───────────────────────────────────────────────────────
   {
     const findings: GuardFinding[] = [];
diff --git a/src/hooks.ts b/src/hooks.ts
index 4a0c404..b9043d2 100644
--- a/src/hooks.ts
+++ b/src/hooks.ts
@@ -20,7 +20,7 @@ interface HookPayload {
 }
 
 interface BounceState {
-  sessions: Record<string, { bounces: number; updated_at: string }>;
+  sessions: Record<string, { bounces: number; updated_at: string; best?: number }>;
 }
 
 function statePath(root: string): string {
@@ -168,26 +168,58 @@ function findOrphanedBaseline(cwd: string): { root: string; baseline: Baseline }
   }
 }
 
-/** Block the stop (incrementing the session's bounce count), or give up loudly once the budget is spent. */
+/**
+ * Block the stop (incrementing the session's bounce count), or give up loudly
+ * once the budget is spent.
+ *
+ * The budget counts *consecutive bounces without new progress*. When `score`
+ * is provided (failing checks + tripped guards), a score strictly below the
+ * session's best refreshes the budget: an agent steadily fixing a long list
+ * shouldn't be cut off mid-fix. Best-ever (not last-attempt) is the bar, so
+ * oscillating between two failure sets can't farm refreshes — total bounces
+ * stay bounded by max_bounces × (initial score + 1).
+ */
 function bounceOrGiveUp(options: {
   agent: HookAgent;
   root: string;
   sessionId: string;
   maxBounces: number;
+  score?: number;
   reason: (attempt: number) => string;
   giveUp: (bounces: number) => string;
 }): HookOutcome {
   const state = loadState(options.root);
-  const bounces = state.sessions[options.sessionId]?.bounces ?? 0;
+  const entry = state.sessions[options.sessionId];
+  let bounces = entry?.bounces ?? 0;
+  let best = entry?.best;
+  let refreshed = false;
+
+  if (typeof options.score === 'number') {
+    if (typeof best !== 'number') {
+      best = options.score; // first scored attempt sets the bar
+    } else if (options.score < best) {
+      best = options.score;
+      refreshed = true;
+      bounces = 0;
+    }
+  }
 
   if (bounces >= options.maxBounces) {
     return { stdout: null, stderr: options.giveUp(bounces), exitCode: 0 };
   }
 
   const attempt = bounces + 1;
-  state.sessions[options.sessionId] = { bounces: attempt, updated_at: new Date().toISOString() };
+  state.sessions[options.sessionId] = {
+    bounces: attempt,
+    updated_at: new Date().toISOString(),
+    ...(typeof best === 'number' ? { best } : {}),
+  };
   saveState(options.root, state);
-  const reason = options.reason(attempt);
+
+  let reason = options.reason(attempt);
+  if (refreshed) {
+    reason += '\n\n(donegate noticed progress since the last attempt — the bounce budget was refreshed.)';
+  }
 
   if (options.agent === 'cursor') {
     return { stdout: JSON.stringify({ followup_message: reason }), stderr: null, exitCode: 0 };
@@ -196,7 +228,11 @@ function bounceOrGiveUp(options: {
   return { stdout: JSON.stringify({ decision: 'block', reason }), stderr: null, exitCode: 0 };
 }
 
-export async function runStopHook(agent: HookAgent, rawStdin: string): Promise<HookOutcome> {
+export async function runStopHook(
+  agent: HookAgent,
+  rawStdin: string,
+  mode: { subagent?: boolean } = {},
+): Promise<HookOutcome> {
   const payload = parsePayload(rawStdin);
   const cwd = resolveCwd(payload);
 
@@ -206,6 +242,9 @@ export async function runStopHook(agent: HookAgent, rawStdin: string): Promise<H
   }
 
   const sessionId = payload.session_id ?? payload.conversation_id ?? 'default';
+  // Subagent boundaries get their own bounce ledger — a noisy fan-out must
+  // not burn the budget the terminal stop gate relies on.
+  const stateKey = mode.subagent ? `${sessionId}:subagent` : sessionId;
 
   // No DONE.md → never interfere. A globally-installed hook must be a no-op in
   // repos that haven't opted in. The exception is a session baseline whose
@@ -218,7 +257,7 @@ export async function runStopHook(agent: HookAgent, rawStdin: string): Promise<H
     return bounceOrGiveUp({
       agent,
       root: orphan.root,
-      sessionId,
+      sessionId: stateKey,
       // The donefile (and its gate.max_bounces with it) is gone — use the default.
       maxBounces: DEFAULT_MAX_BOUNCES,
       reason: (attempt) =>
@@ -253,7 +292,7 @@ export async function runStopHook(agent: HookAgent, rawStdin: string): Promise<H
         return bounceOrGiveUp({
           agent,
           root: found.root,
-          sessionId,
+          sessionId: stateKey,
           // The config is unreadable, so its gate.max_bounces is too — use the default.
           maxBounces: DEFAULT_MAX_BOUNCES,
           reason: (attempt) =>
@@ -273,18 +312,27 @@ export async function runStopHook(agent: HookAgent, rawStdin: string): Promise<H
   }
 
   // Always verify — the receipt should reflect reality even when we've stopped
-  // blocking. We give up on bouncing, never on checking.
-  const summary = await verify({ cwd, config, via: agent });
+  // blocking. We give up on bouncing, never on checking. Subagent boundaries
+  // run guards only: a tamper scan is cheap enough to pay per subagent, a test
+  // suite is not — checks belong to the terminal stop.
+  const summary = await verify({
+    cwd,
+    config,
+    via: mode.subagent ? 'subagent' : agent,
+    noChecks: mode.subagent,
+  });
 
   if (summary.exitCode === 0) {
     const state = loadState(config.root);
-    if (state.sessions[sessionId]) {
-      delete state.sessions[sessionId];
+    if (state.sessions[stateKey]) {
+      delete state.sessions[stateKey];
       saveState(config.root, state);
     }
     return {
       stdout: null,
-      stderr: `donegate: ✓ DONE — ${summary.receipt.checks.length} checks passed, guards clean (receipt: ${path.join(DONEGATE_DIR, 'receipts', 'latest.json')})`,
+      stderr: mode.subagent
+        ? `donegate: ✓ subagent boundary clean — guards pass (receipt: ${path.join(DONEGATE_DIR, 'receipts', 'latest.json')})`
+        : `donegate: ✓ DONE — ${summary.receipt.checks.length} checks passed, guards clean (receipt: ${path.join(DONEGATE_DIR, 'receipts', 'latest.json')})`,
       exitCode: 0,
     };
   }
@@ -292,8 +340,11 @@ export async function runStopHook(agent: HookAgent, rawStdin: string): Promise<H
   return bounceOrGiveUp({
     agent,
     root: config.root,
-    sessionId,
+    sessionId: stateKey,
     maxBounces: config.gate.max_bounces,
+    // Progress = strictly fewer failing checks + tripped guards than the
+    // session's best so far; progress refreshes the bounce budget.
+    score: summary.checksFailed + summary.guardsFailed,
     reason: (attempt) => buildReason(summary, attempt, config.gate.max_bounces),
     giveUp: (bounces) =>
       `donegate: ✗ still NOT DONE after ${bounces} bounce${bounces > 1 ? 's' : ''} — giving up and allowing the stop. The receipt is red: ${path.join(DONEGATE_DIR, 'receipts', 'latest.json')}`,
diff --git a/src/install.ts b/src/install.ts
index dbb2c90..ea0886c 100644
--- a/src/install.ts
+++ b/src/install.ts
@@ -4,8 +4,17 @@ import path from 'node:path';
 
 export type InstallTarget = 'claude' | 'codex' | 'cursor' | 'ci';
 
-export const HOOK_COMMANDS: Record<Exclude<InstallTarget, 'ci'>, { stop: string; baseline: string }> = {
-  claude: { stop: 'npx -y donegate hook claude', baseline: 'npx -y donegate baseline --if-missing --quiet' },
+export const HOOK_COMMANDS: Record<
+  Exclude<InstallTarget, 'ci'>,
+  { stop: string; baseline: string; subagentStop?: string }
+> = {
+  claude: {
+    stop: 'npx -y donegate hook claude',
+    baseline: 'npx -y donegate baseline --if-missing --quiet',
+    // Guards-only tamper scan at every subagent boundary — fast (git diffs,
+    // no checks), so fan-out workflows are gated per node, not just at the end.
+    subagentStop: 'npx -y donegate hook claude --subagent',
+  },
   codex: { stop: 'npx -y donegate hook codex', baseline: 'npx -y donegate baseline --if-missing --quiet' },
   cursor: { stop: 'npx -y donegate hook cursor', baseline: 'npx -y donegate baseline --if-missing --quiet' },
 };
@@ -17,6 +26,8 @@ export const HOOK_COMMANDS: Record<Exclude<InstallTarget, 'ci'>, { stop: string;
  */
 const STOP_TIMEOUT_SECONDS = 1800;
 const BASELINE_TIMEOUT_SECONDS = 120;
+/** Guards only — no checks run — but big-repo git diffs and a cold npx need headroom. */
+const SUBAGENT_TIMEOUT_SECONDS = 300;
 
 export interface InstallResult {
   target: InstallTarget;
@@ -140,6 +151,9 @@ export function installAgent(
   } else {
     changed = mergeNestedHooks(config, 'Stop', commands.stop, STOP_TIMEOUT_SECONDS) || changed;
     changed = mergeNestedHooks(config, 'SessionStart', commands.baseline, BASELINE_TIMEOUT_SECONDS) || changed;
+    if (commands.subagentStop) {
+      changed = mergeNestedHooks(config, 'SubagentStop', commands.subagentStop, SUBAGENT_TIMEOUT_SECONDS) || changed;
+    }
   }
 
   if (!changed) return { target, file, action: 'already-installed' };
@@ -170,7 +184,7 @@ export function uninstallAgent(
       }
     }
   } else {
-    for (const event of ['Stop', 'SessionStart']) {
+    for (const event of ['Stop', 'SessionStart', 'SubagentStop']) {
       if (removeNestedHooks(config, event)) changed = true;
     }
   }
diff --git a/src/types.ts b/src/types.ts
index 4e697ff..af8785d 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -21,6 +21,8 @@ export interface GuardsConfig {
   no_disabled_lint: GuardLevel;
   no_new_todos: GuardLevel;
   no_debug_artifacts: GuardLevel;
+  /** Findings when files matching `protect` change mid-session. */
+  no_protected_edits: GuardLevel;
   /** Glob patterns that identify test files. */
   test_globs: string[];
   /**
@@ -28,6 +30,13 @@ export interface GuardsConfig {
    * skip/suppression patterns (lint configs, pattern scanners, donegate itself).
    */
   exclude: string[];
+  /**
+   * Globs for files the verdict depends on but the gate doesn't run — the
+   * files that define what the check commands *mean* (package.json, lint/
+   * test/build configs). Hashed into the baseline; changes trip
+   * `no_protected_edits`.
+   */
+  protect: string[];
 }
 
 export interface GateConfig {
@@ -74,7 +83,7 @@ export interface GuardResult {
   note?: string;
 }
 
-export type BaselineKind = 'session' | 'head' | 'merge-base' | 'none';
+export type BaselineKind = 'session' | 'head' | 'merge-base' | 'explicit' | 'none';
 
 export interface BaselineFileEntry {
   sha: string;
@@ -90,6 +99,8 @@ export interface Baseline {
   donefile_sha: string;
   donefile_path: string;
   test_files: Record<string, BaselineFileEntry>;
+  /** Hashes of files matching guards.protect (absent when protect is empty). */
+  protected_files?: Record<string, { sha: string }>;
 }
 
 export interface ComparisonContext {
@@ -128,7 +139,7 @@ export interface Receipt {
   checks: CheckResult[];
   guards: GuardResult[];
   /** Which surface produced the receipt. */
-  via: 'cli' | 'claude' | 'codex' | 'cursor' | 'run';
+  via: 'cli' | 'claude' | 'codex' | 'cursor' | 'run' | 'subagent';
   /** sha256 of the receipt body (excluding this field). */
   receipt_sha: string;
 }
diff --git a/test/donefile.test.ts b/test/donefile.test.ts
index dc1c863..a3afc32 100644
--- a/test/donefile.test.ts
+++ b/test/donefile.test.ts
@@ -135,3 +135,13 @@ test('findDonefile walks upward and prefers DONE.md', () => {
     cleanup(root);
   }
 });
+
+test('parses guards.protect and no_protected_edits', () => {
+  const config = parseDonefileSource(
+    'checks:\n  - name: a\n    run: x\nguards:\n  no_protected_edits: warn\n  protect:\n    - package.json\n    - "*.config.js"\n',
+    '/repo/done.yml',
+    '/repo',
+  );
+  assert.deepEqual(config.guards.protect, ['package.json', '*.config.js']);
+  assert.equal(config.guards.no_protected_edits, 'warn');
+});
diff --git a/test/guards.test.ts b/test/guards.test.ts
index 914aa14..a0f8fd7 100644
--- a/test/guards.test.ts
+++ b/test/guards.test.ts
@@ -2,9 +2,10 @@ import { test } from 'node:test';
 import assert from 'node:assert/strict';
 import { loadConfig } from '../src/donefile.js';
 import { writeBaseline } from '../src/baseline.js';
+import { verify } from '../src/check.js';
 import { resolveComparison, runGuards } from '../src/guards.js';
 import type { GuardResult } from '../src/types.js';
-import { BASIC_DONEFILE, cleanup, gitCommitAll, gitInit, read, rm, tmpdir, write } from './helpers.js';
+import { BASIC_DONEFILE, cleanup, gitCommitAll, gitHead, gitInit, read, rm, tmpdir, write } from './helpers.js';
 
 const TEST_FILE = `import { test } from 'node:test';
 
@@ -296,3 +297,106 @@ test('outside git WITH baseline: snapshot comparisons still work', async () => {
     cleanup(root);
   }
 });
+
+const PROTECT_DONEFILE = `# DoD
+\`\`\`yaml
+checks:
+  - name: ok
+    run: node -e "process.exit(0)"
+guards:
+  protect:
+    - package.json
+    - "*.config.js"
+\`\`\`
+`;
+
+const PKG_JSON = '{ "scripts": { "test": "node run-tests.js" } }\n';
+
+async function setupProtectRepo(): Promise<string> {
+  const root = tmpdir();
+  gitInit(root);
+  write(root, 'DONE.md', PROTECT_DONEFILE);
+  write(root, 'package.json', PKG_JSON);
+  gitCommitAll(root, 'base');
+  return root;
+}
+
+test('no_protected_edits: pinned files cannot be changed, deleted, or shadowed quietly', async () => {
+  const root = await setupProtectRepo();
+  try {
+    const config = loadConfig(root);
+    await writeBaseline(config);
+
+    // modified — redefining what `npm test` means
+    write(root, 'package.json', '{ "scripts": { "test": "exit 0" } }\n');
+    let g = guard(await runGuards(config, await resolveComparison(config)), 'no_protected_edits');
+    assert.equal(g.status, 'fail');
+    assert.match(g.findings[0]!.detail, /modified since the baseline/);
+
+    // deleted
+    rm(root, 'package.json');
+    g = guard(await runGuards(config, await resolveComparison(config)), 'no_protected_edits');
+    assert.equal(g.status, 'fail');
+    assert.match(g.findings[0]!.detail, /missing/);
+
+    // restored byte-for-byte → clean again; a NEW file matching protect globs is not
+    write(root, 'package.json', PKG_JSON);
+    g = guard(await runGuards(config, await resolveComparison(config)), 'no_protected_edits');
+    assert.equal(g.status, 'pass');
+    write(root, 'extra.config.js', 'module.exports = {};\n');
+    g = guard(await runGuards(config, await resolveComparison(config)), 'no_protected_edits');
+    assert.equal(g.status, 'fail');
+    assert.match(g.findings[0]!.detail, /new file matches/);
+  } finally {
+    cleanup(root);
+  }
+});
+
+test('no_protected_edits: falls back to the git diff when there is no baseline (CI mode)', async () => {
+  const root = await setupProtectRepo();
+  try {
+    const config = loadConfig(root);
+    write(root, 'package.json', '{ "scripts": { "test": "exit 0" } }\n');
+    const g = guard(await runGuards(config, await resolveComparison(config)), 'no_protected_edits');
+    assert.equal(g.status, 'fail');
+    assert.match(g.findings[0]!.detail, /changed in this diff/);
+  } finally {
+    cleanup(root);
+  }
+});
+
+test('check --against judges an explicit ref — even past a re-blessed baseline', async () => {
+  const root = await setupRepo();
+  try {
+    const base = gitHead(root);
+    await writeBaseline(loadConfig(root));
+
+    // skip a test, commit it, and re-bless the baseline: a session comparison
+    // is now blind to the skip. The explicit ref is not.
+    write(root, 'test/app.test.ts', TEST_FILE.replace("test('two'", "test.skip('two'"));
+    gitCommitAll(root, 'sneaky');
+    await writeBaseline(loadConfig(root));
+
+    const blessed = await verify({ cwd: root, config: loadConfig(root), via: 'cli' });
+    assert.equal(blessed.exitCode, 0);
+
+    const judged = await verify({ cwd: root, config: loadConfig(root), comparisonRef: base, via: 'cli' });
+    assert.equal(judged.receipt.baseline.kind, 'explicit');
+    assert.equal(judged.exitCode, 3);
+    assert.ok(judged.receipt.guards.some((g) => g.name === 'no_new_skips' && g.status === 'fail'));
+  } finally {
+    cleanup(root);
+  }
+});
+
+test('check --against refuses a ref that does not exist', async () => {
+  const root = await setupRepo();
+  try {
+    await assert.rejects(
+      verify({ cwd: root, config: loadConfig(root), comparisonRef: 'not-a-ref', via: 'cli' }),
+      /not a commit/,
+    );
+  } finally {
+    cleanup(root);
+  }
+});
diff --git a/test/helpers.ts b/test/helpers.ts
index 548f714..693d82e 100644
--- a/test/helpers.ts
+++ b/test/helpers.ts
@@ -35,6 +35,10 @@ export function gitCommitAll(root: string, message = 'commit'): void {
   execFileSync('git', ['commit', '-q', '-m', message], { cwd: root, stdio: 'pipe' });
 }
 
+export function gitHead(root: string): string {
+  return execFileSync('git', ['rev-parse', 'HEAD'], { cwd: root, stdio: 'pipe' }).toString().trim();
+}
+
 export const BASIC_DONEFILE = `# Definition of Done
 
 \`\`\`yaml
diff --git a/test/hooks.test.ts b/test/hooks.test.ts
index c0c8927..bd8597b 100644
--- a/test/hooks.test.ts
+++ b/test/hooks.test.ts
@@ -227,3 +227,82 @@ test('cursor: aborted turns are not gated even when the donefile is gone', async
     cleanup(root);
   }
 });
+
+// Assembled at runtime so the repo's own no_new_skips guard never sees the
+// literal marker in this (non-excluded) test file.
+const SKIP_CALL = ['test', 'skip'].join('.');
+
+test('subagent boundary: guards-only — failing checks do not block, tampering does', async () => {
+  const root = await setup(FAILING_DONEFILE);
+  try {
+    write(root, 'test/app.test.ts', "import { test } from 'node:test';\ntest('one', () => {});\ntest('two', () => {});\n");
+    gitCommitAll(root);
+    await runBaselineHook({ ifMissing: false, quiet: true, cwd: root });
+
+    // the donefile's check always fails, but the boundary doesn't run checks
+    const clean = await runStopHook('claude', payload(root), { subagent: true });
+    assert.equal(clean.stdout, null);
+    assert.match(clean.stderr ?? '', /subagent boundary clean/);
+
+    // tamper at the boundary → blocked with the guard finding
+    write(root, 'test/app.test.ts', `import { test } from 'node:test';\ntest('one', () => {});\n${SKIP_CALL}('two', () => {});\n`);
+    const tampered = await runStopHook('claude', payload(root), { subagent: true });
+    assert.ok(tampered.stdout, 'expected a block');
+    const response = JSON.parse(tampered.stdout) as { decision: string; reason: string };
+    assert.equal(response.decision, 'block');
+    assert.match(response.reason, /no_new_skips/);
+
+    // subagent bounces live in their own ledger — the terminal gate still starts fresh
+    const main = await runStopHook('claude', payload(root));
+    assert.match(JSON.parse(main.stdout!).reason as string, /attempt 1\/2/);
+  } finally {
+    cleanup(root);
+  }
+});
+
+const PROGRESS_DONEFILE = `# DoD
+\`\`\`yaml
+checks:
+  - name: c1
+    run: node -e "process.exit(require('fs').existsSync('fix1.txt') ? 0 : 1)"
+  - name: c2
+    run: node -e "process.exit(require('fs').existsSync('fix2.txt') ? 0 : 1)"
+gate:
+  max_bounces: 2
+\`\`\`
+`;
+
+test('progress refreshes the bounce budget; stalling exhausts it', async () => {
+  const root = await setup(PROGRESS_DONEFILE);
+  try {
+    const block = async () => {
+      const outcome = await runStopHook('claude', payload(root));
+      assert.ok(outcome.stdout, 'expected a block');
+      return JSON.parse(outcome.stdout) as { decision: string; reason: string };
+    };
+
+    // two failing checks, no movement: the budget counts down
+    assert.match((await block()).reason, /attempt 1\/2/);
+    assert.match((await block()).reason, /attempt 2\/2/);
+
+    // fixing one check is progress → budget refreshed, loudly
+    write(root, 'fix1.txt', 'fixed\n');
+    const refreshed = await block();
+    assert.match(refreshed.reason, /attempt 1\/2/);
+    assert.match(refreshed.reason, /bounce budget was refreshed/);
+
+    // stalling at the new best exhausts the refreshed budget
+    assert.match((await block()).reason, /attempt 2\/2/);
+    const spent = await runStopHook('claude', payload(root));
+    assert.equal(spent.stdout, null);
+    assert.match(spent.stderr ?? '', /giving up/);
+
+    // finishing the job still works and clears the session
+    write(root, 'fix2.txt', 'fixed\n');
+    const done = await runStopHook('claude', payload(root));
+    assert.equal(done.stdout, null);
+    assert.match(done.stderr ?? '', /✓ DONE/);
+  } finally {
+    cleanup(root);
+  }
+});
diff --git a/test/install.test.ts b/test/install.test.ts
index 4fd2ae4..8972ebd 100644
--- a/test/install.test.ts
+++ b/test/install.test.ts
@@ -171,3 +171,29 @@ test('ensureGitignore appends once', () => {
     cleanup(root);
   }
 });
+
+test('claude install wires the subagent boundary; uninstall removes it', () => {
+  const root = tmpdir();
+  try {
+    installAgent('claude', root);
+    const config = JSON.parse(read(root, '.claude/settings.json'));
+    assert.match(config.hooks.SubagentStop[0].hooks[0].command, /donegate hook claude --subagent/);
+
+    uninstallAgent('claude', root);
+    const after = JSON.parse(read(root, '.claude/settings.json'));
+    assert.equal(after.hooks.SubagentStop, undefined);
+  } finally {
+    cleanup(root);
+  }
+});
+
+test('codex and cursor get no subagent hook (no such event)', () => {
+  const root = tmpdir();
+  try {
+    installAgent('codex', root);
+    const config = JSON.parse(read(root, '.codex/hooks.json'));
+    assert.equal(config.hooks.SubagentStop, undefined);
+  } finally {
+    cleanup(root);
+  }
+});