|
| 1 | +// Shared detection for unbacked success claims — consumed by BOTH |
| 2 | +// `stop-claim-verify-reminder` (Stop-time nudge) and |
| 3 | +// `unbacked-claim-commit-guard` (PreToolUse block on commit/push). One matcher, |
| 4 | +// two enforcement points, no drift. |
| 5 | +// |
| 6 | +// The fleet rule (CLAUDE.md "Judgment & self-evaluation" → "Verify before you |
| 7 | +// claim"): never assert "tests pass" / "builds" / "typechecks" / "lint passes" |
| 8 | +// / "render verified" without a tool call THIS SESSION that ran or read it. |
| 9 | +// A claim fires only when NONE of its backing-command patterns appear in any |
| 10 | +// Bash command run this session. |
| 11 | + |
| 12 | +import { |
| 13 | + extractToolUseBlocks, |
| 14 | + readLines, |
| 15 | + resolveRoleAndContent, |
| 16 | + stripCodeFences, |
| 17 | +} from './transcript.mts' |
| 18 | + |
| 19 | +export interface ClaimRule { |
| 20 | + // Category label. |
| 21 | + readonly label: string |
| 22 | + // Matches the self-claim in the assistant's prose. |
| 23 | + readonly claim: RegExp |
| 24 | + // Substrings that, in ANY Bash command this session, back the claim. |
| 25 | + readonly backedBy: readonly RegExp[] |
| 26 | + // One-line hint. |
| 27 | + readonly hint: string |
| 28 | +} |
| 29 | + |
| 30 | +export const CLAIM_RULES: readonly ClaimRule[] = [ |
| 31 | + { |
| 32 | + label: 'tests pass', |
| 33 | + claim: |
| 34 | + /\b(?:all )?tests?\b[^.!?\n]{0,30}\b(?:pass(?:ed|ing)?|green|succeed(?:ed)?)\b/i, |
| 35 | + backedBy: [/\bvitest\b/, /\bpnpm\s+(?:run\s+)?test\b/, /\bnode\s+--test\b/], |
| 36 | + hint: 'run the test command (`pnpm test` / `vitest run <file>`) or qualify the claim', |
| 37 | + }, |
| 38 | + { |
| 39 | + label: 'build succeeds', |
| 40 | + claim: |
| 41 | + /\bbuild(?:s|ed)?\b[^.!?\n]{0,30}\b(?:succeed(?:ed|s)?|clean|pass(?:ed|es)?|work(?:s|ed)?)\b/i, |
| 42 | + backedBy: [/\bpnpm\s+(?:run\s+)?build\b/, /\brun\s+build\b/, /\brolldown\b/], |
| 43 | + hint: 'run the build or qualify the claim', |
| 44 | + }, |
| 45 | + { |
| 46 | + label: 'typechecks', |
| 47 | + claim: |
| 48 | + /\b(?:type[- ]?checks?\b[^.!?\n]{0,20}\b(?:pass(?:es|ed)?|clean)|no type errors)\b/i, |
| 49 | + backedBy: [/\btsgo\b/, /\btsc\b/, /\bpnpm\s+(?:run\s+)?check\b/], |
| 50 | + hint: 'run tsgo / `pnpm run check` or qualify the claim', |
| 51 | + }, |
| 52 | + { |
| 53 | + label: 'lint passes', |
| 54 | + claim: /\blint(?:ing)?\b[^.!?\n]{0,25}\b(?:pass(?:es|ed)?|clean|green)\b/i, |
| 55 | + backedBy: [ |
| 56 | + /\boxlint\b/, |
| 57 | + /\bpnpm\s+(?:run\s+)?lint\b/, |
| 58 | + /\bpnpm\s+(?:run\s+)?check\b/, |
| 59 | + ], |
| 60 | + hint: 'run `pnpm run lint` / `pnpm run check` or qualify the claim', |
| 61 | + }, |
| 62 | + { |
| 63 | + label: 'render verified', |
| 64 | + // A self-claim that the UI / popup / page was visually checked — "verified |
| 65 | + // the popup", "the UI renders correctly", "looks good on screen", "rendered |
| 66 | + // to PNG", "visually verified". Backed ONLY by an actual render this session. |
| 67 | + claim: |
| 68 | + /\b(?:visually verif(?:y|ied)|verif(?:y|ied)\b[^.!?\n]{0,30}\b(?:popup|render|ui\b|screen|pixels?)|(?:popup|ui|render(?:ed|s)?|page|screen)\b[^.!?\n]{0,30}\b(?:looks? (?:good|correct|right)|renders? (?:correctly|fine)|verified))\b/i, |
| 69 | + backedBy: [ |
| 70 | + /\bscreenshot\.mts\b/, |
| 71 | + /\brendering-chromium-to-png\b/, |
| 72 | + /\bplaywright\b/, |
| 73 | + /\bchromium\b/, |
| 74 | + ], |
| 75 | + hint: 'render the page to a PNG (rendering-chromium-to-png / screenshot.mts) and Read the pixels this session, or qualify the claim — bundle/build success is not visual verification', |
| 76 | + }, |
| 77 | +] |
| 78 | + |
| 79 | +export interface UnbackedClaim { |
| 80 | + readonly label: string |
| 81 | + readonly hint: string |
| 82 | +} |
| 83 | + |
| 84 | +// Every Bash command string the assistant ran across the whole session. |
| 85 | +export function sessionBashCommands( |
| 86 | + transcriptPath: string | undefined, |
| 87 | +): string[] { |
| 88 | + const lines = readLines(transcriptPath) |
| 89 | + const commands: string[] = [] |
| 90 | + for (let i = 0, { length } = lines; i < length; i += 1) { |
| 91 | + let evt: unknown |
| 92 | + try { |
| 93 | + evt = JSON.parse(lines[i]!) |
| 94 | + } catch { |
| 95 | + continue |
| 96 | + } |
| 97 | + const r = resolveRoleAndContent(evt) |
| 98 | + if (!r || r.role !== 'assistant') { |
| 99 | + continue |
| 100 | + } |
| 101 | + const tools = extractToolUseBlocks(r.content) |
| 102 | + for (let j = 0, { length: tl } = tools; j < tl; j += 1) { |
| 103 | + const t = tools[j]! |
| 104 | + if (t.name !== 'Bash') { |
| 105 | + continue |
| 106 | + } |
| 107 | + const cmd = t.input['command'] |
| 108 | + if (typeof cmd === 'string') { |
| 109 | + commands.push(cmd) |
| 110 | + } |
| 111 | + } |
| 112 | + } |
| 113 | + return commands |
| 114 | +} |
| 115 | + |
| 116 | +// Claims in `assistantText` that no Bash command this session backs. |
| 117 | +export function findUnbackedClaims( |
| 118 | + assistantText: string, |
| 119 | + bashCommands: readonly string[], |
| 120 | +): UnbackedClaim[] { |
| 121 | + const text = stripCodeFences(assistantText) |
| 122 | + const joined = bashCommands.join('\n') |
| 123 | + const out: UnbackedClaim[] = [] |
| 124 | + for (let i = 0, { length } = CLAIM_RULES; i < length; i += 1) { |
| 125 | + const rule = CLAIM_RULES[i]! |
| 126 | + if (!rule.claim.test(text)) { |
| 127 | + continue |
| 128 | + } |
| 129 | + const backed = rule.backedBy.some(re => re.test(joined)) |
| 130 | + if (!backed) { |
| 131 | + out.push({ label: rule.label, hint: rule.hint }) |
| 132 | + } |
| 133 | + } |
| 134 | + return out |
| 135 | +} |
0 commit comments