diff --git a/README.md b/README.md index ea930472..e6896e37 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ The easiest way to manage policies that keep your AI agents reliable, on-task, and running autonomously - for **Claude Code** & the **Agents SDK**. -- **26 Built-in Policies** - Catch common agent failure modes out of the box. Block destructive commands, prevent secret leakage, keep agents inside project boundaries, detect loops, and more. +- **30 Built-in Policies** - Catch common agent failure modes out of the box. Block destructive commands, prevent secret leakage, keep agents inside project boundaries, detect loops, and more. - **Custom Policies** - Write your own reliability rules in JavaScript. Use the `allow`/`deny`/`instruct` API to enforce conventions, prevent drift, gate operations, or integrate with external systems. - **Easy Configuration** - Tune any policy without writing code. Set allowlists, protected branches, thresholds per-project or globally. Three-scope config merges automatically. - **Agent Monitor** - See what your agents did while you were away. Browse sessions, inspect every tool call, and review exactly where policies fired. @@ -197,8 +197,9 @@ failproofai policies --install --custom ./my-policies.js | Function | Effect | |----------|--------| -| `allow()` | Permit the tool call | -| `deny(message)` | Block the tool call; message shown to Claude | +| `allow()` | Permit the operation | +| `allow(message)` | Permit and send informational context to Claude *(beta)* | +| `deny(message)` | Block the operation; message shown to Claude | | `instruct(message)` | Add context to Claude's prompt; does not block | ### Context object (`ctx`) @@ -234,13 +235,26 @@ FAILPROOFAI_TELEMETRY_DISABLED=1 failproofai | Guide | Description | |-------|-------------| | [Getting Started](docs/getting-started.mdx) | Installation and first steps | -| [Built-in Policies](docs/built-in-policies.mdx) | All 26 built-in policies with parameters | -| [Custom Hooks](docs/custom-hooks.mdx) | Write your own policies | +| [Built-in Policies](docs/built-in-policies.mdx) | All 30 built-in policies with parameters | +| [Custom Policies](docs/custom-policies.mdx) | Write your own policies | | [Configuration](docs/configuration.mdx) | Config file format and scope merging | | [Dashboard](docs/dashboard.mdx) | Monitor sessions and review policy activity | | [Architecture](docs/architecture.mdx) | How the hook system works | | [Testing](docs/testing.mdx) | Running tests and writing new ones | +### Run docs locally + +```bash +docker build -f Dockerfile.docs -t failproofai-docs . +docker run --rm -p 3000:3000 failproofai-docs +``` + +Opens the Mintlify docs site at `http://localhost:3000`. The container watches for changes if you mount the docs directory: + +```bash +docker run --rm -p 3000:3000 -v $(pwd)/docs:/app/docs failproofai-docs +``` + --- ## Contributing diff --git a/__tests__/hooks/builtin-policies.test.ts b/__tests__/hooks/builtin-policies.test.ts index d8f08e6b..1b9083cb 100644 --- a/__tests__/hooks/builtin-policies.test.ts +++ b/__tests__/hooks/builtin-policies.test.ts @@ -1,7 +1,7 @@ // @vitest-environment node import { describe, it, expect, beforeEach, vi, afterEach } from "vitest"; import { readFile } from "node:fs/promises"; -import { execSync } from "node:child_process"; +import { execSync, execFileSync } from "node:child_process"; import { BUILTIN_POLICIES, registerBuiltinPolicies, clearGitBranchCache } from "../../src/hooks/builtin-policies"; import { getPoliciesForEvent, clearPolicies } from "../../src/hooks/policy-registry"; import type { PolicyContext } from "../../src/hooks/policy-types"; @@ -15,6 +15,7 @@ vi.mock("node:fs/promises", () => ({ vi.mock("node:child_process", () => ({ execSync: vi.fn(), + execFileSync: vi.fn(), })); function makeCtx(overrides: Partial = {}): PolicyContext { @@ -33,8 +34,8 @@ describe("hooks/builtin-policies", () => { }); describe("BUILTIN_POLICIES", () => { - it("has 26 built-in policies", () => { - expect(BUILTIN_POLICIES).toHaveLength(26); + it("has 30 built-in policies", () => { + expect(BUILTIN_POLICIES).toHaveLength(30); }); it("has 11 default-enabled policies", () => { @@ -1591,4 +1592,684 @@ describe("hooks/builtin-policies", () => { }); }); }); + + describe("workflow policy metadata", () => { + const workflowPolicies = BUILTIN_POLICIES.filter((p) => p.category === "Workflow"); + + it("all 4 workflow policies exist", () => { + expect(workflowPolicies).toHaveLength(4); + const names = workflowPolicies.map((p) => p.name).sort(); + expect(names).toEqual([ + "require-ci-green-before-stop", + "require-commit-before-stop", + "require-pr-before-stop", + "require-push-before-stop", + ]); + }); + + it("all workflow policies are marked beta", () => { + for (const p of workflowPolicies) { + expect(p.beta).toBe(true); + } + }); + + it("all workflow policies are disabled by default", () => { + for (const p of workflowPolicies) { + expect(p.defaultEnabled).toBe(false); + } + }); + + it("all workflow policies match the Stop event only", () => { + for (const p of workflowPolicies) { + expect(p.match.events).toEqual(["Stop"]); + } + }); + + it("only require-push-before-stop has params schema", () => { + const withParams = workflowPolicies.filter((p) => p.params); + expect(withParams).toHaveLength(1); + expect(withParams[0].name).toBe("require-push-before-stop"); + expect(withParams[0].params!.remote).toBeDefined(); + expect(withParams[0].params!.remote.default).toBe("origin"); + }); + }); + + describe("require-commit-before-stop", () => { + const policy = BUILTIN_POLICIES.find((p) => p.name === "require-commit-before-stop")!; + + afterEach(() => { + vi.mocked(execSync).mockReset(); + }); + + it("denies when there are modified files", async () => { + vi.mocked(execSync).mockReturnValue("M src/index.ts\n"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("uncommitted changes"); + }); + + it("denies when there are untracked files", async () => { + vi.mocked(execSync).mockReturnValue("?? newfile.ts\n"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + }); + + it("denies when there are staged but uncommitted files", async () => { + vi.mocked(execSync).mockReturnValue("A staged-file.ts\n"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + }); + + it("denies when there are deleted files", async () => { + vi.mocked(execSync).mockReturnValue("D removed.ts\n"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + }); + + it("denies when there are renamed files", async () => { + vi.mocked(execSync).mockReturnValue("R old.ts -> new.ts\n"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + }); + + it("denies with mixed status output (modified + untracked)", async () => { + vi.mocked(execSync).mockReturnValue("M src/index.ts\n?? newfile.ts\n A staged.ts\n"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("uncommitted changes"); + }); + + it("allows with message when working directory is clean", async () => { + vi.mocked(execSync).mockReturnValue(""); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("committed"); + }); + + it("allows when status is whitespace only (treated as clean)", async () => { + vi.mocked(execSync).mockReturnValue(" \n \n"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); + + it("allows with reason when not in a git repo", async () => { + vi.mocked(execSync).mockImplementation(() => { throw new Error("fatal: not a git repository"); }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/not-a-repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("Not a git repository"); + }); + + it("allows with reason when cwd is not available", async () => { + const ctx = makeCtx({ eventType: "Stop" }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No working directory"); + }); + + it("allows with reason when session has no cwd", async () => { + const ctx = makeCtx({ eventType: "Stop", session: undefined }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No working directory"); + }); + + it("fail-open when execSync throws an unexpected error", async () => { + vi.mocked(execSync).mockImplementation(() => { throw new Error("Permission denied"); }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); + + it("passes cwd to execSync", async () => { + vi.mocked(execSync).mockReturnValue(""); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/my/project" } }); + await policy.fn(ctx); + expect(execSync).toHaveBeenCalledWith( + "git status --porcelain", + expect.objectContaining({ cwd: "/my/project" }), + ); + }); + }); + + describe("require-push-before-stop", () => { + const policy = BUILTIN_POLICIES.find((p) => p.name === "require-push-before-stop")!; + + afterEach(() => { + vi.mocked(execSync).mockReset(); + vi.mocked(execFileSync).mockReset(); + clearGitBranchCache(); + }); + + // Helper: execSync handles git remote + rev-parse --abbrev-ref; + // execFileSync handles rev-parse --verify + git log (safer arg passing) + function mockPushScenario(opts: { + remote?: string; + branch?: string; + hasTracking?: boolean; + unpushedOutput?: string; + }) { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("git remote")) return `${opts.remote ?? "origin"}\n`; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return `${opts.branch ?? "feat/branch"}\n`; + return ""; + }); + vi.mocked(execFileSync).mockImplementation((_cmd: string, args?: readonly string[]) => { + const joined = args?.join(" ") ?? ""; + if (joined.includes("rev-parse") && joined.includes("--verify")) { + if (opts.hasTracking === false) throw new Error("not found"); + return "abc\n"; + } + if (joined.includes("log")) return opts.unpushedOutput ?? ""; + return ""; + }); + } + + it("denies when there are unpushed commits (plural message)", async () => { + mockPushScenario({ unpushedOutput: "abc123 fix\ndef456 update\n" }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("2 unpushed commits"); + expect(result.reason).toContain("git push"); + }); + + it("denies with singular message for 1 unpushed commit", async () => { + mockPushScenario({ unpushedOutput: "abc123 fix\n" }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("1 unpushed commit"); + expect(result.reason).not.toContain("commits"); + }); + + it("denies when no tracking branch exists", async () => { + mockPushScenario({ branch: "new-feature", hasTracking: false }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("push -u"); + expect(result.reason).toContain("new-feature"); + }); + + it("deny message includes branch name and remote", async () => { + mockPushScenario({ branch: "my-feature", hasTracking: false }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain('"my-feature"'); + expect(result.reason).toContain('"origin"'); + }); + + it("allows with message when all commits are pushed", async () => { + mockPushScenario({ unpushedOutput: "" }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("pushed"); + expect(result.reason).toContain('"origin"'); + }); + + it("allows with reason when no remote configured", async () => { + vi.mocked(execSync).mockReturnValue(""); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No git remote"); + }); + + it("allows with reason when git remote returns only whitespace", async () => { + vi.mocked(execSync).mockReturnValue(" \n"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No git remote"); + }); + + it("allows with reason on detached HEAD", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("git remote")) return "origin\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "HEAD\n"; + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("Detached HEAD"); + }); + + it("allows with reason when cwd is not available", async () => { + const ctx = makeCtx({ eventType: "Stop" }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No working directory"); + }); + + it("uses custom remote param", async () => { + mockPushScenario({ remote: "upstream", branch: "feat", hasTracking: false }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" }, params: { remote: "upstream" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("upstream"); + }); + + it("uses custom remote in execFileSync verify command", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("git remote")) return "upstream\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "feat\n"; + return ""; + }); + vi.mocked(execFileSync).mockImplementation((_cmd: string, args?: readonly string[]) => { + const joined = args?.join(" ") ?? ""; + if (joined.includes("--verify") && joined.includes("upstream/feat")) return "abc\n"; + if (joined.includes("--verify")) throw new Error("not found"); + if (joined.includes("upstream/feat..HEAD")) return ""; + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" }, params: { remote: "upstream" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain('"upstream"'); + }); + + it("fail-open when outer try/catch fires (git remote throws)", async () => { + vi.mocked(execSync).mockImplementation(() => { throw new Error("git: command not found"); }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("Could not check push status"); + }); + + it("handles multiple remotes (uses first line)", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("git remote")) return "origin\nupstream\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "feat\n"; + return ""; + }); + vi.mocked(execFileSync).mockReturnValue(""); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); + + it("handles branch with slash characters", async () => { + mockPushScenario({ branch: "feat/deep/nested/branch", unpushedOutput: "" }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); + }); + + describe("require-pr-before-stop", () => { + const policy = BUILTIN_POLICIES.find((p) => p.name === "require-pr-before-stop")!; + + afterEach(() => { + vi.mocked(execSync).mockReset(); + clearGitBranchCache(); + }); + + it("denies when no PR exists", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "feat/branch\n"; + if (typeof cmd === "string" && cmd.includes("gh pr view")) throw new Error("no pull requests found"); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("No pull request"); + expect(result.reason).toContain("gh pr create"); + }); + + it("deny message includes the branch name", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "my-feature\n"; + if (typeof cmd === "string" && cmd.includes("gh pr view")) throw new Error("no PR"); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain('"my-feature"'); + }); + + it("allows with message when PR is open", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "feat/branch\n"; + if (typeof cmd === "string" && cmd.includes("gh pr view")) return JSON.stringify({ number: 42, url: "https://github.com/org/repo/pull/42", state: "OPEN" }); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("PR #42"); + expect(result.reason).toContain("https://github.com/org/repo/pull/42"); + }); + + it("denies when PR is closed", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "feat/branch\n"; + if (typeof cmd === "string" && cmd.includes("gh pr view")) return JSON.stringify({ number: 42, url: "https://github.com/org/repo/pull/42", state: "CLOSED" }); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("closed"); + expect(result.reason).toContain("gh pr create"); + }); + + it("denies when PR is merged", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "feat/branch\n"; + if (typeof cmd === "string" && cmd.includes("gh pr view")) return JSON.stringify({ number: 42, url: "https://github.com/org/repo/pull/42", state: "MERGED" }); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("merged"); + }); + + it("allows with reason when gh is not installed", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) throw new Error("not found"); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("gh"); + }); + + it("allows with reason on detached HEAD", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "HEAD\n"; + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("Detached HEAD"); + }); + + it("allows with reason when cwd is not available", async () => { + const ctx = makeCtx({ eventType: "Stop" }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No working directory"); + }); + + it("allows with reason when getCurrentBranch returns null (not a git repo)", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) throw new Error("not a git repo"); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + // getCurrentBranch returns null -> outer catch -> fail-open + expect(result.decision).toBe("allow"); + }); + + it("fail-open when gh pr view returns malformed JSON", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "feat/branch\n"; + if (typeof cmd === "string" && cmd.includes("gh pr view")) return "not json {{{"; + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + // JSON.parse throws -> outer catch -> fail-open + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("Could not check PR status"); + }); + + it("handles branch names with special characters", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "/usr/bin/gh\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "user/fix-123-issue\n"; + if (typeof cmd === "string" && cmd.includes("gh pr view")) return JSON.stringify({ number: 99, url: "https://github.com/org/repo/pull/99", state: "OPEN" }); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("PR #99"); + }); + }); + + describe("require-ci-green-before-stop", () => { + const policy = BUILTIN_POLICIES.find((p) => p.name === "require-ci-green-before-stop")!; + + afterEach(() => { + vi.mocked(execSync).mockReset(); + vi.mocked(execFileSync).mockReset(); + clearGitBranchCache(); + }); + + function mockCiScenario(branch: string, ghRunListResult: string | Error) { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "gh version 2.40.0\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return `${branch}\n`; + return ""; + }); + if (ghRunListResult instanceof Error) { + vi.mocked(execFileSync).mockImplementation(() => { throw ghRunListResult; }); + } else { + vi.mocked(execFileSync).mockReturnValue(ghRunListResult); + } + } + + it("denies when CI checks are failing", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "completed", conclusion: "failure", name: "test" }, + { status: "completed", conclusion: "success", name: "build" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("failing"); + expect(result.reason).toContain('"test"'); + }); + + it("denies listing multiple failed checks by name", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "completed", conclusion: "failure", name: "test" }, + { status: "completed", conclusion: "failure", name: "lint" }, + { status: "completed", conclusion: "success", name: "build" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain('"test"'); + expect(result.reason).toContain('"lint"'); + }); + + it("denies when CI checks are in progress", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "in_progress", conclusion: "", name: "test" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("still running"); + expect(result.reason).toContain('"test"'); + }); + + it("denies when CI checks are queued", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "queued", conclusion: "", name: "deploy" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("still running"); + }); + + it("denies when CI checks are waiting", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "waiting", conclusion: "", name: "approval-gate" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("still running"); + }); + + it("denies when CI has a cancelled conclusion (not success/skipped)", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "completed", conclusion: "cancelled", name: "deploy" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("failing"); + }); + + it("failing checks take priority over pending checks", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "completed", conclusion: "failure", name: "test" }, + { status: "in_progress", conclusion: "", name: "build" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + // Failure check comes first in code, so message says "failing" not "running" + expect(result.reason).toContain("failing"); + }); + + it("allows with message when all CI checks pass", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "completed", conclusion: "success", name: "test" }, + { status: "completed", conclusion: "success", name: "build" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("All CI checks passed"); + expect(result.reason).toContain('"feat/branch"'); + }); + + it("treats skipped conclusions as success", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "completed", conclusion: "skipped", name: "optional-check" }, + { status: "completed", conclusion: "success", name: "build" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); + + it("allows when all checks are skipped", async () => { + mockCiScenario("feat/branch", JSON.stringify([ + { status: "completed", conclusion: "skipped", name: "deploy" }, + { status: "completed", conclusion: "skipped", name: "e2e" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); + + it("allows with reason when gh is not installed", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) throw new Error("not found"); + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("gh"); + }); + + it("allows with reason when no CI runs exist (empty array)", async () => { + mockCiScenario("feat/branch", "[]"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No CI runs"); + }); + + it("allows with reason when gh run list returns empty string", async () => { + mockCiScenario("feat/branch", ""); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No CI runs"); + }); + + it("allows with reason on detached HEAD", async () => { + vi.mocked(execSync).mockImplementation((cmd: string) => { + if (typeof cmd === "string" && cmd.includes("gh --version")) return "gh version 2.40.0\n"; + if (typeof cmd === "string" && cmd.includes("rev-parse --abbrev-ref")) return "HEAD\n"; + return ""; + }); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("Detached HEAD"); + }); + + it("allows with reason when cwd is not available", async () => { + const ctx = makeCtx({ eventType: "Stop" }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("No working directory"); + }); + + it("allows with reason on malformed JSON from gh", async () => { + mockCiScenario("feat/branch", "not json"); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("Could not check CI status"); + }); + + it("fail-open when gh run list command throws", async () => { + mockCiScenario("feat/branch", new Error("network timeout")); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(result.reason).toContain("Could not check CI status"); + }); + + it("includes branch name in deny message", async () => { + mockCiScenario("my-feature", JSON.stringify([ + { status: "completed", conclusion: "failure", name: "lint" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain('"my-feature"'); + }); + + it("passes branch name to execFileSync gh run list", async () => { + mockCiScenario("feat/test", JSON.stringify([ + { status: "completed", conclusion: "success", name: "test" }, + ])); + const ctx = makeCtx({ eventType: "Stop", session: { cwd: "/repo" } }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + expect(execFileSync).toHaveBeenCalledWith( + "gh", + expect.arrayContaining(["--branch", "feat/test"]), + expect.anything(), + ); + }); + }); }); diff --git a/__tests__/hooks/policy-evaluator.test.ts b/__tests__/hooks/policy-evaluator.test.ts index 4a172d15..6d5b93c7 100644 --- a/__tests__/hooks/policy-evaluator.test.ts +++ b/__tests__/hooks/policy-evaluator.test.ts @@ -189,6 +189,78 @@ describe("hooks/policy-evaluator", () => { expect(result.reason).toBe("first warning"); }); + describe("allow with message", () => { + it("returns additionalContext when allow has a reason", async () => { + registerPolicy("info", "desc", () => ({ + decision: "allow", + reason: "All checks passed", + }), { events: ["PreToolUse"] }); + + const result = await evaluatePolicies("PreToolUse", { tool_name: "Read" }); + expect(result.exitCode).toBe(0); + expect(result.decision).toBe("allow"); + expect(result.reason).toBe("All checks passed"); + const parsed = JSON.parse(result.stdout); + expect(parsed.hookSpecificOutput.additionalContext).toBe("All checks passed"); + }); + + it("combines multiple allow messages with newline", async () => { + registerPolicy("info1", "desc", () => ({ + decision: "allow", + reason: "Commit check passed", + }), { events: ["Stop"] }); + registerPolicy("info2", "desc", () => ({ + decision: "allow", + reason: "Push check passed", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}); + expect(result.exitCode).toBe(0); + expect(result.decision).toBe("allow"); + const parsed = JSON.parse(result.stdout); + expect(parsed.hookSpecificOutput.additionalContext).toBe("Commit check passed\nPush check passed"); + }); + + it("returns empty stdout when allow has no reason (backward-compatible)", async () => { + registerPolicy("ok", "desc", () => ({ decision: "allow" }), { events: ["PreToolUse"] }); + + const result = await evaluatePolicies("PreToolUse", { tool_name: "Bash" }); + expect(result.exitCode).toBe(0); + expect(result.stdout).toBe(""); + expect(result.reason).toBeNull(); + }); + + it("deny still takes precedence over allow with message", async () => { + registerPolicy("info", "desc", () => ({ + decision: "allow", + reason: "looks good", + }), { events: ["PreToolUse"] }); + registerPolicy("blocker", "desc", () => ({ + decision: "deny", + reason: "blocked", + }), { events: ["PreToolUse"] }); + + const result = await evaluatePolicies("PreToolUse", { tool_name: "Bash" }); + expect(result.decision).toBe("deny"); + expect(result.policyName).toBe("blocker"); + }); + + it("instruct takes precedence over allow with message", async () => { + registerPolicy("info", "desc", () => ({ + decision: "allow", + reason: "looks good", + }), { events: ["PreToolUse"] }); + registerPolicy("advisor", "desc", () => ({ + decision: "instruct", + reason: "be careful", + }), { events: ["PreToolUse"] }); + + const result = await evaluatePolicies("PreToolUse", { tool_name: "Bash" }); + expect(result.decision).toBe("instruct"); + expect(result.policyName).toBe("advisor"); + }); + }); + describe("params injection", () => { it("injects schema defaults into ctx.params when no policyParams in config", async () => { let capturedParams: unknown = null; @@ -252,4 +324,150 @@ describe("hooks/policy-evaluator", () => { expect(capturedParams).toEqual({}); }); }); + + describe("Stop event deny format", () => { + it("Stop deny uses exit code 2 with empty stdout", async () => { + registerPolicy("stop-blocker", "desc", () => ({ + decision: "deny", + reason: "changes not committed", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}); + expect(result.exitCode).toBe(2); + expect(result.stdout).toBe(""); + expect(result.stderr).toBe(""); + expect(result.decision).toBe("deny"); + expect(result.reason).toBe("changes not committed"); + }); + + it("Stop deny short-circuits subsequent policies", async () => { + const secondPolicyCalled = { value: false }; + registerPolicy("blocker", "desc", () => ({ + decision: "deny", + reason: "blocked first", + }), { events: ["Stop"] }); + registerPolicy("second", "desc", () => { + secondPolicyCalled.value = true; + return { decision: "allow" }; + }, { events: ["Stop"] }); + + await evaluatePolicies("Stop", {}); + expect(secondPolicyCalled.value).toBe(false); + }); + }); + + describe("workflow policy chain integration", () => { + it("first deny short-circuits — later workflow policies do not run", async () => { + const policyCalls: string[] = []; + + registerPolicy("require-commit", "desc", () => { + policyCalls.push("commit"); + return { decision: "deny", reason: "uncommitted changes" }; + }, { events: ["Stop"] }); + registerPolicy("require-push", "desc", () => { + policyCalls.push("push"); + return { decision: "deny", reason: "unpushed commits" }; + }, { events: ["Stop"] }); + registerPolicy("require-pr", "desc", () => { + policyCalls.push("pr"); + return { decision: "deny", reason: "no PR" }; + }, { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}); + expect(result.decision).toBe("deny"); + expect(result.policyName).toBe("require-commit"); + expect(policyCalls).toEqual(["commit"]); + }); + + it("all workflow policies allow with messages — messages accumulate", async () => { + registerPolicy("wf-commit", "desc", () => ({ + decision: "allow", + reason: "All changes committed", + }), { events: ["Stop"] }); + registerPolicy("wf-push", "desc", () => ({ + decision: "allow", + reason: "All commits pushed", + }), { events: ["Stop"] }); + registerPolicy("wf-pr", "desc", () => ({ + decision: "allow", + reason: "PR #42 exists", + }), { events: ["Stop"] }); + registerPolicy("wf-ci", "desc", () => ({ + decision: "allow", + reason: "All CI checks passed", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}); + expect(result.exitCode).toBe(0); + expect(result.decision).toBe("allow"); + const parsed = JSON.parse(result.stdout); + const ctx = parsed.hookSpecificOutput.additionalContext; + expect(ctx).toContain("All changes committed"); + expect(ctx).toContain("All commits pushed"); + expect(ctx).toContain("PR #42 exists"); + expect(ctx).toContain("All CI checks passed"); + }); + + it("allow messages from early policies are discarded when a later policy denies", async () => { + registerPolicy("wf-commit", "desc", () => ({ + decision: "allow", + reason: "All changes committed", + }), { events: ["Stop"] }); + registerPolicy("wf-push", "desc", () => ({ + decision: "deny", + reason: "unpushed commits", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}); + expect(result.decision).toBe("deny"); + expect(result.policyName).toBe("wf-push"); + expect(result.reason).toBe("unpushed commits"); + }); + + it("instruct on Stop takes precedence over allow messages", async () => { + registerPolicy("wf-commit", "desc", () => ({ + decision: "allow", + reason: "All committed", + }), { events: ["Stop"] }); + registerPolicy("wf-instruct", "desc", () => ({ + decision: "instruct", + reason: "Please verify tests", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}); + expect(result.decision).toBe("instruct"); + expect(result.exitCode).toBe(2); + expect(result.stderr).toBe("Please verify tests"); + }); + + it("mixed allow (no message) and allow (with message) — only messages returned", async () => { + registerPolicy("silent", "desc", () => ({ + decision: "allow", + }), { events: ["Stop"] }); + registerPolicy("informative", "desc", () => ({ + decision: "allow", + reason: "CI is green", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}); + expect(result.exitCode).toBe(0); + expect(result.decision).toBe("allow"); + const parsed = JSON.parse(result.stdout); + expect(parsed.hookSpecificOutput.additionalContext).toBe("CI is green"); + }); + + it("policy that throws is skipped — subsequent policies still run", async () => { + registerPolicy("thrower", "desc", () => { + throw new Error("unexpected crash"); + }, { events: ["Stop"] }); + registerPolicy("checker", "desc", () => ({ + decision: "deny", + reason: "uncommitted", + }), { events: ["Stop"] }); + + const result = await evaluatePolicies("Stop", {}); + expect(result.decision).toBe("deny"); + expect(result.policyName).toBe("checker"); + }); + }); }); diff --git a/docs/architecture.mdx b/docs/architecture.mdx index 677c46a2..a70e0be2 100644 --- a/docs/architecture.mdx +++ b/docs/architecture.mdx @@ -102,6 +102,22 @@ The handler enforces a 1 MB stdin limit. Payloads exceeding this are discarded a - Exit code: `0` - Empty stdout +**Allow with message (beta):** + +Since v0.0.2-beta.3, `allow(message)` lets a policy send informational context back to Claude even when the operation is permitted. The hook handler writes the following JSON to **stdout** (not a config file — this is the handler's response to Claude Code, just like deny and instruct responses above): + +```json +// Written to stdout by the hook handler process +{ + "hookSpecificOutput": { + "additionalContext": "All CI checks passed on branch 'feat/my-feature'." + } +} +``` +- Exit code: `0` (operation is allowed) +- When multiple policies return `allow` with a message, their messages are joined with newlines into a single `additionalContext` string +- If no policy provides a message, stdout is empty (same as before) + ### Processing pipeline `src/hooks/handler.ts` implements the full pipeline: @@ -117,6 +133,7 @@ stdin JSON → evaluate all policies (builtins first, then custom) → first deny short-circuits → instruct decisions accumulate + → allow messages accumulate → write JSON decision to stdout → persist event to ~/.failproofai/hook-activity.jsonl → exit diff --git a/docs/built-in-policies.mdx b/docs/built-in-policies.mdx index c19e6880..8a3376ad 100644 --- a/docs/built-in-policies.mdx +++ b/docs/built-in-policies.mdx @@ -1,10 +1,10 @@ --- title: Built-in Policies -description: "All 26 built-in policies that catch common agent failure modes" +description: "All 30 built-in policies that catch common agent failure modes" icon: shield --- -failproofai ships with 26 built-in policies that catch common agent failure modes. Each policy fires on a specific hook event type and tool name. Eight policies accept parameters that let you tune their behavior without writing code. +failproofai ships with 30 built-in policies that catch common agent failure modes. Each policy fires on a specific hook event type and tool name. Nine policies accept parameters that let you tune their behavior without writing code. Four beta workflow policies enforce a commit → push → PR → CI pipeline before Claude stops. --- @@ -21,8 +21,12 @@ Policies are grouped into categories: | [Git](#git) | block-push-master, block-work-on-main, block-force-push, warn-git-amend, warn-git-stash-drop, warn-all-files-staged | PreToolUse | | [Database](#database) | warn-destructive-sql, warn-schema-alteration | PreToolUse | | [Warnings](#warnings) | warn-large-file-write, warn-package-publish, warn-background-process, warn-global-package-install | PreToolUse | +| [Workflow (beta)](#workflow-beta) | require-commit-before-stop, require-push-before-stop, require-pr-before-stop, require-ci-green-before-stop | Stop | -Policies prefixed with `block-` stop the agent from proceeding. Policies prefixed with `warn-` give the agent additional context so it can self-correct. Policies prefixed with `sanitize-` scrub sensitive data from tool output before the agent sees it. +- **`block-`** — stop the agent from proceeding. +- **`warn-`** — give the agent additional context so it can self-correct. +- **`sanitize-`** — scrub sensitive data from tool output before the agent sees it. +- **`require-`** — block the Stop event until conditions are met. --- @@ -429,15 +433,102 @@ No parameters. --- +## Workflow (beta) + +Enforce a disciplined end-of-session workflow. These policies fire on the **Stop** event and deny Claude from stopping until each condition is met. They follow a natural dependency chain: commit → push → PR → CI. If a policy denies, later policies in the chain are skipped (deny short-circuits). + +All workflow policies are **fail-open**: if the required tool is not available (e.g. `gh` not installed, no git remote), the policy allows with an informational message explaining why the check was skipped. + +### `require-commit-before-stop` + +**Event:** Stop +**Default:** Denies stopping when there are uncommitted changes (modified, staged, or untracked files). Returns an informational message when the working directory is clean. + +No parameters. + +--- + +### `require-push-before-stop` + +**Event:** Stop +**Default:** Denies stopping when there are unpushed commits or when the current branch has no remote tracking branch. Suggests `git push -u` to create a tracking branch if needed. Fails open if no remote is configured. + +**Parameters:** + +| Param | Type | Default | Description | +|-------|------|---------|-------------| +| `remote` | `string` | `"origin"` | Remote name to push to. | + +**Example:** + +```json +{ + "policyParams": { + "require-push-before-stop": { + "remote": "upstream" + } + } +} +``` + +--- + +### `require-pr-before-stop` + +**Event:** Stop +**Default:** Denies stopping when no pull request exists for the current branch, or when the existing PR is closed/merged. Instructs Claude to create a PR with `gh pr create`. + +No parameters. + + +This policy requires [GitHub CLI](https://cli.github.com/) (`gh`) to be installed and authenticated. +Run `gh auth login` with a personal access token that has `repo` scope for read access to +pull requests. If `gh` is not installed or not authenticated, the policy fails open and reports the reason to Claude. + + +--- + +### `require-ci-green-before-stop` + +**Event:** Stop +**Default:** Denies stopping when CI checks are failing or still running on the current branch. Treats `skipped` conclusions as success. Returns an informational message when all checks pass. + +No parameters. + + +This policy requires [GitHub CLI](https://cli.github.com/) (`gh`) to be installed and authenticated. +Run `gh auth login` with a personal access token that has `repo` scope for read access to +Actions workflow runs. If `gh` is not installed or not authenticated, the policy fails open and reports the reason to Claude. + + +--- + ## Beta policies -Some policies are marked `beta` and are not installed by default. To include them: +Some policies are marked `beta` and are not installed by default. Beta policies may have rough edges or generate false positives. They graduate to stable in future releases. + +**Current beta policies:** + +- `require-commit-before-stop` — commit all changes before stopping +- `require-push-before-stop` — push all commits to remote +- `require-pr-before-stop` — ensure a PR exists for the branch +- `require-ci-green-before-stop` — all CI checks must pass + +Together, these enforce a **commit → push → PR → CI** workflow. + +To install all beta policies: ```bash failproofai policies --install --beta ``` -Beta policies may have rough edges or generate false positives. Run `failproofai policies` to see which policies carry the beta flag. +Or install specific workflow policies: + +```bash +failproofai policies --install require-commit-before-stop require-push-before-stop require-pr-before-stop require-ci-green-before-stop +``` + +Run `failproofai policies` to see which policies carry the beta flag. --- diff --git a/docs/custom-policies.mdx b/docs/custom-policies.mdx index 4e933551..4302eb2b 100644 --- a/docs/custom-policies.mdx +++ b/docs/custom-policies.mdx @@ -79,13 +79,47 @@ customPolicies.add({ | Function | Effect | Use when | |----------|--------|----------| -| `allow()` | Permit the tool call | The action is safe to proceed | -| `deny(message)` | Block the tool call | The agent should not take this action | -| `instruct(message)` | Add context to Claude's prompt | Give the agent extra context to stay on track | +| `allow()` | Permit the operation silently | The action is safe, no message needed | +| `deny(message)` | Block the operation | The agent should not take this action | +| `instruct(message)` | Add context without blocking | Give the agent extra context to stay on track | -`deny(message)` - the message appears to Claude prefixed with `"Blocked by failproofai:"`. +`deny(message)` - the message appears to Claude prefixed with `"Blocked by failproofai:"`. A single `deny` short-circuits all further evaluation. -`instruct(message)` - the message is appended to Claude's context for the current tool call. Multiple `instruct` returns from different hooks accumulate; a single `deny` short-circuits all further evaluation. +`instruct(message)` - the message is appended to Claude's context for the current tool call. The first `instruct` wins — subsequent `instruct` returns from other policies are ignored. + +### Informational allow messages (beta) + + +`allow(message)` is a beta feature available since v0.0.2-beta.3. The API may change in future releases. Earlier versions only support `allow()` without arguments. + + +`allow(message)` permits the operation **and** sends an informational message back to Claude. The message is delivered as `additionalContext` in the hook handler's stdout response — the same mechanism used by `instruct`, but semantically different: it's a status update, not a warning. + +| Function | Effect | Use when | +|----------|--------|----------| +| `allow(message)` | Permit and send context to Claude | Confirm a check passed, or explain why a check was skipped | + +Use cases: +- **Status confirmations:** `allow("All CI checks passed.")` — tells Claude everything is green +- **Fail-open explanations:** `allow("GitHub CLI not installed, skipping CI check.")` — tells Claude why a check was skipped so it has full context +- **Multiple messages accumulate:** if several policies each return `allow(message)`, all messages are joined with newlines and delivered together + +```js +customPolicies.add({ + name: "confirm-branch-status", + match: { events: ["Stop"] }, + fn: async (ctx) => { + const cwd = ctx.session?.cwd; + if (!cwd) return allow("No working directory, skipping branch check."); + + // ... check branch status ... + if (allPushed) { + return allow("Branch is up to date with remote."); + } + return deny("Unpushed changes detected."); + }, +}); +``` ### `PolicyContext` fields @@ -124,7 +158,7 @@ Policies are evaluated in this order: 2. Custom policies (in `.add()` order) -The first `deny` short-circuits all subsequent policies. All `instruct` returns accumulate into a single message regardless of which policy produced them. +The first `deny` short-circuits all subsequent policies. The first `instruct` wins — subsequent `instruct` returns are ignored. --- diff --git a/package.json b/package.json index 04085cff..7c1cc11b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "failproofai", - "version": "0.0.2", + "version": "0.0.2-beta.3", "description": "The easiest way to manage policies that keep your AI agents reliable, on-task, and running autonomously — for Claude Code & the Agents SDK", "bin": { "failproofai": "./dist/cli.mjs" diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index 6e1f03eb..5be07e02 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -3,7 +3,7 @@ */ import { resolve, join } from "node:path"; import { readFile, writeFile, stat, open } from "node:fs/promises"; -import { execSync } from "node:child_process"; +import { execSync, execFileSync } from "node:child_process"; import { homedir } from "node:os"; import type { BuiltinPolicyDefinition, PolicyContext, PolicyResult, PolicyParamsSchema } from "./policy-types"; import { allow, deny, instruct } from "./policy-helpers"; @@ -145,12 +145,29 @@ const TMUX_DETACH_RE = /\btmux\s+(?:new-session|new)\b[^|&;]*-d\b/; const DISOWN_RE = /\bdisown\b/; const BACKGROUND_AMPERSAND_RE = /(?(); +function getCurrentBranch(cwd: string): string | null { + try { + let branch = gitBranchCache.get(cwd); + if (branch === undefined) { + branch = execSync("git rev-parse --abbrev-ref HEAD", { + cwd, + encoding: "utf8", + timeout: 3000, + }).trim(); + gitBranchCache.set(cwd, branch); + } + return branch || null; + } catch { + return null; + } +} + /** * Check if a command matches an allow pattern using token-by-token comparison. * The "*" token is a wildcard. Extra command tokens beyond the pattern are allowed, @@ -627,24 +644,14 @@ function blockWorkOnMain(ctx: PolicyContext): PolicyResult { const cwd = ctx.session?.cwd; if (!cwd) return allow(); - try { - let branch = gitBranchCache.get(cwd); - if (branch === undefined) { - branch = execSync("git rev-parse --abbrev-ref HEAD", { - cwd, - encoding: "utf8", - timeout: 3000, - }).trim(); - gitBranchCache.set(cwd, branch); - } - const protectedBranches = ((ctx.params?.protectedBranches ?? ["main", "master"]) as string[]); - if (protectedBranches.includes(branch)) { - return deny( - `Git ${cmd.match(/git\s+(\S+)/)?.[1] ?? "operation"} on ${branch} is blocked. Create a feature branch first.`, - ); - } - } catch { - return allow(); + const branch = getCurrentBranch(cwd); + if (!branch) return allow(); + + const protectedBranches = ((ctx.params?.protectedBranches ?? ["main", "master"]) as string[]); + if (protectedBranches.includes(branch)) { + return deny( + `Git ${cmd.match(/git\s+(\S+)/)?.[1] ?? "operation"} on ${branch} is blocked. Create a feature branch first.`, + ); } return allow(); } @@ -786,6 +793,195 @@ function warnBackgroundProcess(ctx: PolicyContext): PolicyResult { return allow(); } +// -- Workflow (Stop event) policies -- + +function requireCommitBeforeStop(ctx: PolicyContext): PolicyResult { + const cwd = ctx.session?.cwd; + if (!cwd) return allow("No working directory available, skipping commit check."); + + try { + const status = execSync("git status --porcelain", { + cwd, + encoding: "utf8", + timeout: 5000, + }).trim(); + + if (status.length > 0) { + return deny( + "You have uncommitted changes in the working directory. Commit all changes before stopping.", + ); + } + return allow("All changes are committed."); + } catch { + return allow("Not a git repository, skipping commit check."); + } +} + +function requirePushBeforeStop(ctx: PolicyContext): PolicyResult { + const cwd = ctx.session?.cwd; + if (!cwd) return allow("No working directory available, skipping push check."); + + try { + const remotes = execSync("git remote", { + cwd, + encoding: "utf8", + timeout: 3000, + }).trim(); + + if (!remotes) return allow("No git remote configured, skipping push check."); + + const remote = (ctx.params?.remote as string) ?? "origin"; + + const branch = getCurrentBranch(cwd); + if (!branch || branch === "HEAD") return allow("Detached HEAD, skipping push check."); + + // Check if remote tracking branch exists + let hasTracking = false; + try { + execFileSync("git", ["rev-parse", "--verify", `${remote}/${branch}`], { + cwd, + encoding: "utf8", + timeout: 3000, + }); + hasTracking = true; + } catch { + // Remote tracking branch does not exist + } + + if (!hasTracking) { + return deny( + `Branch "${branch}" has not been pushed to remote "${remote}". ` + + `Push your branch with: git push -u ${remote} ${branch}`, + ); + } + + // Check for unpushed commits + const unpushed = execFileSync("git", ["log", `${remote}/${branch}..HEAD`, "--oneline"], { + cwd, + encoding: "utf8", + timeout: 5000, + }).trim(); + + if (unpushed.length > 0) { + const commitCount = unpushed.split("\n").length; + return deny( + `You have ${commitCount} unpushed commit${commitCount > 1 ? "s" : ""} on branch "${branch}". ` + + `Push your changes with: git push`, + ); + } + + return allow(`All commits pushed to "${remote}".`); + } catch { + return allow("Could not check push status, skipping."); + } +} + +function requirePrBeforeStop(ctx: PolicyContext): PolicyResult { + const cwd = ctx.session?.cwd; + if (!cwd) return allow("No working directory available, skipping PR check."); + + try { + // Check if gh CLI is available + try { + execSync("gh --version", { cwd, encoding: "utf8", timeout: 3000 }); + } catch { + return allow("GitHub CLI (gh) not installed, skipping PR check."); + } + + const branch = getCurrentBranch(cwd); + if (!branch || branch === "HEAD") return allow("Detached HEAD, skipping PR check."); + + // Check if a PR exists for this branch + let prJson: string; + try { + prJson = execSync("gh pr view --json number,url,state", { + cwd, + encoding: "utf8", + timeout: 15000, + }).trim(); + } catch { + // gh pr view exits non-zero when no PR exists + return deny( + `No pull request found for branch "${branch}". ` + + `Create one with: gh pr create`, + ); + } + + const pr = JSON.parse(prJson) as { number: number; url: string; state: string }; + + if (pr.state === "OPEN") { + return allow(`PR #${pr.number} exists: ${pr.url}`); + } + + return deny( + `Pull request for branch "${branch}" is ${pr.state.toLowerCase()}. Create a new PR with: gh pr create`, + ); + } catch { + return allow("Could not check PR status, skipping."); + } +} + +function requireCiGreenBeforeStop(ctx: PolicyContext): PolicyResult { + const cwd = ctx.session?.cwd; + if (!cwd) return allow("No working directory available, skipping CI check."); + + try { + // Check if gh CLI is available + try { + execSync("gh --version", { cwd, encoding: "utf8", timeout: 3000 }); + } catch { + return allow("GitHub CLI (gh) not installed, skipping CI check."); + } + + const branch = getCurrentBranch(cwd); + if (!branch || branch === "HEAD") return allow("Detached HEAD, skipping CI check."); + + const runsJson = execFileSync( + "gh", + ["run", "list", "--branch", branch, "--limit", "5", "--json", "status,conclusion,name"], + { + cwd, + encoding: "utf8", + timeout: 15000, + }, + ).trim(); + + if (!runsJson || runsJson === "[]") return allow(`No CI runs found for branch "${branch}".`); + + const runs = JSON.parse(runsJson) as Array<{ + status: string; + conclusion: string; + name: string; + }>; + + if (runs.length === 0) return allow(`No CI runs found for branch "${branch}".`); + + const failing = runs.filter( + (r) => r.status === "completed" && r.conclusion !== "success" && r.conclusion !== "skipped", + ); + if (failing.length > 0) { + const names = failing.map((r) => `"${r.name}"`).join(", "); + return deny( + `CI checks are failing on branch "${branch}": ${names}. Fix the failing checks before stopping.`, + ); + } + + const pending = runs.filter( + (r) => r.status === "in_progress" || r.status === "queued" || r.status === "waiting", + ); + if (pending.length > 0) { + const names = pending.map((r) => `"${r.name}"`).join(", "); + return deny( + `CI checks are still running on branch "${branch}": ${names}. Wait for all checks to complete and verify they pass.`, + ); + } + + return allow(`All CI checks passed on branch "${branch}".`); + } catch { + return allow("Could not check CI status, skipping."); + } +} + // -- Registry -- export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ @@ -1053,6 +1249,49 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ defaultEnabled: false, category: "AI Behavior", }, + { + name: "require-commit-before-stop", + description: "Require all changes to be committed before Claude stops", + fn: requireCommitBeforeStop, + match: { events: ["Stop"] }, + defaultEnabled: false, + category: "Workflow", + beta: true, + }, + { + name: "require-push-before-stop", + description: "Require all commits to be pushed to remote before Claude stops", + fn: requirePushBeforeStop, + match: { events: ["Stop"] }, + defaultEnabled: false, + category: "Workflow", + beta: true, + params: { + remote: { + type: "string", + description: "Remote name to push to (default: origin)", + default: "origin", + }, + } satisfies PolicyParamsSchema, + }, + { + name: "require-pr-before-stop", + description: "Require a pull request to exist for the current branch before Claude stops", + fn: requirePrBeforeStop, + match: { events: ["Stop"] }, + defaultEnabled: false, + category: "Workflow", + beta: true, + }, + { + name: "require-ci-green-before-stop", + description: "Require CI checks to pass on the current branch before Claude stops", + fn: requireCiGreenBeforeStop, + match: { events: ["Stop"] }, + defaultEnabled: false, + category: "Workflow", + beta: true, + }, ]; export function registerBuiltinPolicies(enabledNames: string[]): void { diff --git a/src/hooks/policy-evaluator.ts b/src/hooks/policy-evaluator.ts index f5bb1d70..2a210b97 100644 --- a/src/hooks/policy-evaluator.ts +++ b/src/hooks/policy-evaluator.ts @@ -51,6 +51,9 @@ export async function evaluatePolicies( let instructPolicyName: string | null = null; let instructReason: string | null = null; + // Track informational messages from allow decisions + const allowMessages: string[] = []; + for (const policy of policies) { // Inject params: merge policyParams[policy.name] over schema defaults const schema = POLICY_PARAMS_MAP.get(policy.name); @@ -133,6 +136,11 @@ export async function evaluatePolicies( instructReason = result.reason ?? `Instruction from policy: ${policy.name}`; hookLogInfo(`instruct by "${policy.name}": ${instructReason}`); } + + // Accumulate informational messages from allow decisions + if (result.decision === "allow" && result.reason) { + allowMessages.push(result.reason); + } } // No deny — check if we accumulated an instruct @@ -166,6 +174,16 @@ export async function evaluatePolicies( }; } - // All policies allowed + // All policies allowed — pass along any informational messages + if (allowMessages.length > 0) { + const combined = allowMessages.join("\n"); + const response = { + hookSpecificOutput: { + hookEventName: eventType, + additionalContext: combined, + }, + }; + return { exitCode: 0, stdout: JSON.stringify(response), stderr: "", policyName: null, reason: combined, decision: "allow" }; + } return { exitCode: 0, stdout: "", stderr: "", policyName: null, reason: null, decision: "allow" }; } diff --git a/src/hooks/policy-helpers.ts b/src/hooks/policy-helpers.ts index 9d3a7d77..878b446f 100644 --- a/src/hooks/policy-helpers.ts +++ b/src/hooks/policy-helpers.ts @@ -3,8 +3,8 @@ */ import type { PolicyResult } from "./policy-types"; -export function allow(): PolicyResult { - return { decision: "allow" }; +export function allow(reason?: string): PolicyResult { + return reason ? { decision: "allow", reason } : { decision: "allow" }; } export function deny(reason: string): PolicyResult {