From 2c9b5c2c3e3c4f4ffde406d7e3ed077ab2bb5465 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Tue, 21 Apr 2026 11:31:02 +0000 Subject: [PATCH 01/34] restore: add back integration test files and documentation --- INTEGRATION_EXECUTION_GUIDE.md | 885 ++++++++++++++++++ INTEGRATION_PLAN.md | 267 ++++++ __tests__/INTEGRATION_TEST_CASES.md | 869 +++++++++++++++++ .../e2e/hooks/copilot-integration.e2e.test.ts | 241 +++++ .../e2e/hooks/cursor-integration.e2e.test.ts | 178 ++++ 5 files changed, 2440 insertions(+) create mode 100644 INTEGRATION_EXECUTION_GUIDE.md create mode 100644 INTEGRATION_PLAN.md create mode 100644 __tests__/INTEGRATION_TEST_CASES.md create mode 100644 __tests__/e2e/hooks/copilot-integration.e2e.test.ts create mode 100644 __tests__/e2e/hooks/cursor-integration.e2e.test.ts diff --git a/INTEGRATION_EXECUTION_GUIDE.md b/INTEGRATION_EXECUTION_GUIDE.md new file mode 100644 index 00000000..32ba3039 --- /dev/null +++ b/INTEGRATION_EXECUTION_GUIDE.md @@ -0,0 +1,885 @@ +# Integration Execution Guide + +> Snapshot for branch `feat/cursor-integration`. +> +> This guide is grounded in the current source and test surface in this branch, not in wishful architecture. +> +> Refresh this file whenever integration source, integration tests, or integration fixtures materially change. +> +> Primary truth sources for this document: +> `src/hooks/types.ts`, `src/hooks/integrations.ts`, `src/hooks/handler.ts`, +> `__tests__/hooks/integrations.test.ts`, `__tests__/hooks/handler.test.ts`, +> `__tests__/e2e/helpers/payloads.ts`, +> `__tests__/e2e/hooks/*.test.ts`, +> `__tests__/INTEGRATION_TEST_CASES.md`. + +--- + +## 1. Purpose And How To Use This Guide + +**Simple View** + +This file is the main playbook for non-Claude integrations in this repo: +Cursor, Gemini, GitHub Copilot, Codex, OpenCode, and Pi. + +Use it when you need to answer four questions: + +1. What already exists in this branch? +2. What is still missing? +3. Which regressions matter most? +4. What tests should be written next? + +If you are new to the repo, read this file top to bottom once. +If you are doing implementation work, jump in this order: + +1. Current Branch Truth Snapshot +2. Cross-Integration Pending Matrix +3. The playbook for your integration +4. How To Convert A Pending Row Into Tests + +**Expert View** + +This file is not a replacement for `__tests__/INTEGRATION_TEST_CASES.md`. + +Use this guide for: + +- current branch truth +- prioritization +- gap tracking +- next-test planning +- regression awareness + +Use `__tests__/INTEGRATION_TEST_CASES.md` for: + +- the deeper edge-case contract +- long-form assertions +- named regression references +- exhaustive future test ideas + +If this guide and the source code disagree, trust the source code first and update this guide second. + +### Important Words In Simple Language + +| Term | Meaning | +|---|---| +| Integration | One external agent or tool that failproofai connects to, such as Cursor or Copilot | +| Native event name | The exact event name used by that tool, such as `preToolUse` or `pre_tool_use` | +| Canonical event name | The shared internal failproofai event name, such as `PreToolUse` | +| Payload | The JSON data sent into the hook handler | +| Normalize | Convert different payload styles into one common internal shape | +| Regression test | A test for a bug that already happened once and must not return | +| Gap table | A truth table that says what is implemented, what is tested, and what is still missing | + +### Where To Look In This Repo + +| File Or Area | Why It Matters | +|---|---| +| `src/hooks/types.ts` | integration ids, native event lists, native-to-canonical maps | +| `src/hooks/integrations.ts` | install, uninstall, detect, normalize, command generation, helper logic | +| `src/hooks/handler.ts` | stdin parsing, attribution, session extraction, persistence, transcript logic | +| `src/hooks/manager.ts` | CLI install and uninstall flow | +| `__tests__/hooks/integrations.test.ts` | unit tests for per-integration object behavior | +| `__tests__/hooks/handler.test.ts` | unit tests for runtime and handler behavior | +| `__tests__/e2e/helpers/payloads.ts` | reusable event payload builders | +| `__tests__/e2e/hooks/*.test.ts` | true end-to-end integration flows | +| `__tests__/INTEGRATION_TEST_CASES.md` | deeper contract and regression checklist | +| `AGENTS.md` | repo rules for testing, Docker smoke tests, CI, and branch hygiene | + +--- + +## 2. What “Done” Means For Any Integration + +**Simple View** + +An integration is not done just because one event appears in the dashboard. + +An integration is done only when all of these are true: + +- install works +- uninstall works +- reinstall does not duplicate hooks +- native events fire correctly +- event names map to the right canonical names +- payload data is normalized correctly +- session id is extracted correctly +- dashboard shows the right integration and the right session +- policies still allow, deny, or instruct correctly +- important regressions have named tests +- the relevant tests pass + +**Expert View** + +Minimum done-bar for any integration: + +- source registration exists in `types.ts` and `integrations.ts` +- install and uninstall behavior is covered +- integration identity is reliable with and without `--integration` +- session id fallback is safe +- persistence fields are correct +- one broken branch does not silently relabel data as another integration +- regression-prone behavior has dedicated tests, not only incidental coverage + +Things that are not enough: + +- “events show in dashboard” +- “manual testing looked okay once” +- “unit section exists” +- “one e2e test passes” + +Done means stable, attributable, test-backed behavior. + +### One Big Rule + +Do one integration fully before moving to the next one. + +Bad pattern: + +- Gemini half done +- Cursor half done +- Copilot half done +- Codex half done + +Good pattern: + +1. Pick one integration +2. Make it stable +3. Add regression tests +4. Only then move to the next one + +This is the fastest safe way to work on this repo. + +--- + +## 3. Current Branch Truth Snapshot + +**Simple View** + +This branch already has real implementations for all 6 non-Claude integrations. +The problem is not “nothing exists.” +The problem is that the code is ahead of the tests. + +Today: + +- Cursor, Gemini, and Copilot have the strongest test surface +- Codex and OpenCode have unit coverage but no dedicated e2e file +- Pi has source code, but the weakest visible test surface +- handler-level integration coverage is shallow across the branch + +**Expert View** + +Status meanings used below: + +- `Yes`: a dedicated surface exists +- `Shallow`: only light or indirect coverage exists +- `No`: no dedicated surface was found + +| Integration | Source Implemented | Unit Coverage | Handler Coverage | E2E Coverage | Payload Fixtures | Highest-Risk Pending Area | +|---|---|---|---|---|---|---| +| Cursor | Yes | Yes | Shallow | Yes | Yes | Twin-fire dedup, cwd/workspace attribution, MCP deep cases | +| Gemini | Yes | Yes | Shallow | Yes | Yes | Deep extraction, attribution without flag, transcript and dashboard deep cases | +| Copilot | Yes | Yes | Shallow | Yes | Yes | `toolArgs` parsing, sync/snap branches, session-id and dashboard regressions, silence guard | +| Codex | Yes | Yes | Shallow | No dedicated file | No dedicated section | Snake_case identity, handler mapping, trace-related behavior | +| OpenCode | Yes | Yes | Shallow | No dedicated file | No dedicated section | Plugin blocking, stderr silence, session persistence | +| Pi | Yes | No dedicated section found | Shallow | No dedicated file | No dedicated section | Extension session handling, UI feedback, recursive isolation | + +Global handler note: +`__tests__/hooks/handler.test.ts` currently does not provide deep, integration-specific coverage across these six integrations. +Treat handler behavior as an active gap area unless a dedicated assertion is clearly present. + +Current file-evidence summary: + +- Dedicated e2e files exist today for `cursor`, `gemini`, and `copilot` +- Dedicated payload helper sections exist today for `CursorPayloads`, `GeminiPayloads`, and `CopilotPayloads` +- Dedicated `integrations.test.ts` sections exist today for `cursor`, `gemini`, `copilot`, `codex`, and `opencode` +- No dedicated `integrations.test.ts` section was found for `pi` + +Confirmed missing surfaces discussed in planning: + +- Codex has source and unit coverage, but still has no dedicated e2e lane +- OpenCode has source and unit coverage, but still has no dedicated e2e lane +- Pi is the weakest current surface: no dedicated unit section, no dedicated e2e lane, no dedicated payload helpers +- Codex, OpenCode, and Pi still do not have dedicated payload-helper sections in `__tests__/e2e/helpers/payloads.ts` +- integration-specific handler coverage is shallow across the whole branch +- persistence, dashboard-field, transcript-path, and virtual-mirror assertions are still weak or missing across several integrations +- cross-version compatibility remains mostly unproven +- scope interactions and dedup behavior still need stronger regression coverage + +--- + +## 4. Cross-Integration Pending Matrix + +**Simple View** + +Most of the missing work is not in “writing integration source from zero.” +Most of the missing work is in proving the source with the right tests. + +Use this matrix when you want to decide where to work next by layer instead of by integration. + +**Expert View** + +Status meanings: + +- `Partially Covered`: dedicated tests exist, but deep checklist coverage is still missing +- `Weakly Covered`: some direct or incidental coverage exists, but the layer is not reliable yet +- `Largely Missing`: the layer has little or no dependable test surface + +| Layer | Current Status | What Matters | What Still Needs Tests | +|---|---|---|---| +| Install / uninstall | Partially covered | Hooks must install in the right file, preserve user config, stay idempotent, and uninstall cleanly | Stronger idempotence, byte-preservation, no-project-file, and cross-scope safety checks across all integrations | +| Command format and binary resolution | Partially covered | Native event names and command shape are part of integration identity | More assertions for native event casing, `FAILPROOFAI_DIST_PATH`, quoting, platform path behavior, and older-handler compatibility | +| Event firing reality | Partially covered | A supported event list is useless if real native events do not reach the handler correctly | More event-by-event coverage, especially for Codex, OpenCode, and Pi, plus empty-stdin and block-path behavior | +| Canonical event mapping | Partially covered | Native event names must map to the same internal event language used by policies and the dashboard | More regression tests for Copilot camelCase, Codex snake_case, Gemini PascalCase, and unknown-event fallback | +| Payload normalization | Weakly covered | Policy logic depends on normalized tool name, tool input, cwd, and session fields | More deep-shape, malformed-value, nested-data, null-handling, and stringified-JSON tests | +| Detection and attribution | Weakly covered | The handler must know which integration a payload belongs to, even when signals conflict | More explicit precedence tests for `--integration`, `payload.integration`, unique event names, and negative detect samples | +| Session ID extraction | Weakly covered | Wrong or blank session ids break grouping, persistence, and dashboard navigation | More empty-stdin, env fallback, nested session-field, and same-session-across-events coverage | +| Policy evaluation | Partially covered | After normalization, allow, deny, and instruct must still behave correctly per integration protocol | More protocol-specific decision-format tests, non-git stop behavior, and normalized command parsing coverage | +| Deduplication | Weakly covered | Two hooks must not double-log the same event, but real distinct events must still be recorded | More lifecycle-window, cross-scope, same-command, and integration-in-fingerprint coverage | +| Persistence / dashboard fields | Largely missing | The stored record is what the dashboard actually renders | More checks for integration label, session id, raw hook name, canonical event name, stats, and decision fields | +| Transcript and virtual mirror behavior | Largely missing | Non-Claude sessions must still connect to transcript paths and mirrored project views | More tests for transcript derivation, mirror paths, and dashboard session-detail expectations | +| Scope interactions | Weakly covered | User, project, and local installs change real runtime behavior and duplication risk | More multi-scope install, precedence, dev-dist, and dedup interaction tests | +| Cross-version compatibility | Largely missing | Project-scope installs and older published handlers must still attribute events correctly | More tests around native event self-identification, `npx -y failproofai`, and old-handler fallback behavior | + +Interpretation: + +- The branch is strongest in install basics, basic mapping, and core policy plumbing +- The branch is weakest in handler attribution depth, session behavior, persistence, transcripts, mirrors, and cross-version safety + +--- + +## 5. Per-Integration Playbooks + +### Cursor + +**Simple View** + +Cursor is an IDE-style integration. +Its main challenge is that hooks can fire from more than one scope, and the payload often describes workspace roots instead of the exact working directory you care about. + +This means Cursor work is less about “does it run?” and more about “does it attribute the event correctly and avoid duplicate behavior?” + +**Expert View** + +**What makes this integration different** + +- Cursor-native hook formats and event names +- IDE-style behavior with user and project hooks both capable of firing +- `--stdin` is part of the command contract +- workspace roots are often the first cwd signal +- MCP events must map correctly to canonical tool events + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- dedicated `cursor-integration.e2e.test.ts` exists +- dedicated `CursorPayloads` helper exists +- handler coverage for Cursor-specific attribution and dedup behavior is still shallow + +**Known regression risks** + +- twin-fire dedup across user and project scope +- cwd lifted from `workspace_roots[0]` +- more specific subfolder cwd overriding workspace root when tool input contains it +- MCP event mapping into `PreToolUse` and `PostToolUse` +- non-Claude policy behavior staying correct under Cursor protocol + +**Tests that must exist** + +- unit tests for detection, event mapping, settings-path shape, and command generation +- handler tests for attribution precedence, session fallback, dedup, and cwd override behavior +- e2e tests for real deny, allow, install, uninstall, and protocol-compliant decision handling +- payload fixtures that cover shell, file, MCP, and subfolder-cwd cases + +**What is still pending right now** + +- deeper handler attribution coverage +- stronger dedup regression tests +- more persistence and dashboard-field assertions +- mirror and transcript-related coverage +- more MCP deep-case coverage +- fuller event-reality coverage for Cursor-native events such as shell, file, and MCP paths +- stronger scope-interaction coverage for user plus project hook coexistence + +**Exact next work order** + +1. Extend fixtures only where current Cursor payloads are still too shallow +2. Fill unit gaps in `__tests__/hooks/integrations.test.ts` +3. Add Cursor-specific handler tests in `__tests__/hooks/handler.test.ts` +4. Add only the highest-value new e2e flows after the handler gaps are proven +5. Fix source only after a failing test shows the exact break + +### Gemini + +**Simple View** + +Gemini is the deep-data integration. +Its danger is not only event identity. +Its danger is that useful values can be buried in nested payload shapes. + +If Gemini work is done badly, policies still run, but they run on the wrong extracted data. + +**Expert View** + +**What makes this integration different** + +- Gemini-native PascalCase event names +- deeply nested payload shapes +- deep extraction from fields like `parts`, `arguments`, and `call.method` +- transcript paths derived into Gemini-specific chat storage + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- dedicated `gemini-integration.e2e.test.ts` exists +- dedicated `GeminiPayloads` helper exists +- handler coverage for Gemini-specific attribution and session behavior is still shallow + +**Known regression risks** + +- deep extraction from nested data +- PascalCase identity guard when `--integration` is missing +- transcript-path resolution for dashboard use +- wrong or partial normalization of text, args, or tool name +- fallback to Claude identity when only Gemini-native event naming should decide + +**Tests that must exist** + +- unit tests for deep extraction, detection, event mapping, and settings path +- handler tests for attribution without explicit flag, session handling, and transcript derivation +- e2e tests for deny and allow flows plus richer native-event coverage +- payload fixtures that cover nested method calls, `parts`, `arguments`, and odd-shaped values + +**What is still pending right now** + +- richer deep fixtures +- more attribution-without-flag tests +- deeper persistence, transcript, and dashboard assertions +- stronger session-fallback coverage +- more complete event-by-event native-shape coverage +- more realistic nested payload coverage for `parts`, `arguments`, and `call.method` +- stronger transcript-path and mirror-path regression coverage + +**Exact next work order** + +1. Deep fixtures first +2. Unit normalization and detection tests second +3. Handler attribution, transcript, and session tests third +4. E2E additions last +5. Fix source only after a failing test proves the branch that broke + +### Copilot + +**Simple View** + +Copilot is the most branch-sensitive integration right now. +It has real code and real tests, but it also has the heaviest history of regressions. + +Its biggest dangers are: + +- being mislabeled as Claude +- losing the session id in the dashboard +- parsing `toolArgs` incorrectly +- sync and snap behavior quietly damaging the install + +**Expert View** + +**What makes this integration different** + +- camelCase native event names +- settings surface at `~/.copilot/config.json` +- sync engine that merges project hooks into the user config +- snap revision repair behavior +- stringified JSON normalization through fields like `toolArgs` + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- dedicated `copilot-integration.e2e.test.ts` exists +- dedicated `CopilotPayloads` helper exists +- Copilot utility coverage exists for sync helpers +- handler coverage for deeper Copilot attribution, session fallback, persistence, and silence-guard behavior is still shallow + +**Known regression risks** + +- Copilot events labeled as Claude +- blank session id on the dashboard +- user-scope hooks wiped by `synchronizeCopilotProjectHooks` +- malformed or stringified `toolArgs` +- silence guard for legacy wrong-Claude hook firings +- snap revision hook-path behavior +- heuristic detection when explicit integration metadata is missing + +**Tests that must exist** + +- unit tests for sync helpers, event mapping, native command shape, `toolArgs` parsing, and detect logic +- handler tests for session fallback, persistence labeling, silence guard, env recovery, and transcript path derivation +- e2e tests for allow, deny, install, uninstall, sync safety, and regression-heavy payload shapes +- payload fixtures that cover good JSON, bad JSON, nested data, empty input, and env fallback cases + +**What is still pending right now** + +- malformed `toolArgs` handling tests +- env fallback and session synthesis tests +- stronger persistence assertions for integration label and session id +- snap and sync branch coverage +- deeper silence-guard and heuristic-detect coverage +- fuller event-reality coverage for all 8 Copilot native events +- stronger transcript-path derivation coverage for `~/.copilot/session-state//events.jsonl` +- better install-command regression coverage for camelCase native hook names + +**Exact next work order** + +1. Expand Copilot fixtures +2. Fill unit gaps in normalize, detect, and sync helpers +3. Add handler tests for session, attribution, silence guard, and persistence +4. Add targeted e2e regression flows +5. Fix source only after a failing test shows the specific break + +### Codex + +**Simple View** + +Codex is implemented in source, but its proof surface is incomplete. +The biggest current issue is not that Codex has no logic. +The biggest issue is that Codex does not yet have its own dedicated e2e lane or payload helper lane in this branch. + +**Expert View** + +**What makes this integration different** + +- snake_case native events +- handler mapping from snake_case to canonical PascalCase +- legacy CLI compatibility concerns +- trace-related metadata and parsing expectations + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- no dedicated `codex-integration.e2e.test.ts` was found +- no dedicated Codex payload helper section was found in `__tests__/e2e/helpers/payloads.ts` +- handler coverage for Codex identity and session behavior is still shallow + +**Known regression risks** + +- snake_case identity stability +- wrong attribution to another integration when the handler must decide from event naming +- cross-version fallback when older handlers ignore the integration flag +- trace-related behavior and metadata expectations +- past risk of lifecycle events being misattributed + +**Tests that must exist** + +- unit tests for Codex detection, mapping, settings path, and native command shape +- handler tests for attribution, session extraction, fallback, and persistence labeling +- dedicated `codex-integration.e2e.test.ts` +- dedicated Codex payload fixtures for pre-tool, post-tool, session, and stop-like events + +**What is still pending right now** + +- dedicated e2e surface is missing +- dedicated payload helper surface is missing +- deeper handler attribution and session coverage is missing +- persistence, transcript, and cross-version compatibility coverage is weak +- explicit event-reality coverage for `pre_tool_use`, `post_tool_use`, `session_start`, `session_end`, `user_prompt_submitted`, `agent_stop`, and `notification` +- stronger trace-related and old-handler fallback coverage is still missing + +**Exact next work order** + +1. Create Codex fixture shapes first +2. Expand unit coverage second +3. Add Codex-specific handler tests third +4. Add the first dedicated Codex e2e vertical slice last +5. Fix source only when the new failing test proves the gap + +### OpenCode + +**Simple View** + +OpenCode is plugin-shaped, not just config-shaped. +That means the integration is only healthy when the wrapper and the handler both behave correctly. + +Its biggest dangers are: + +- blocking must be honored immediately +- stderr noise can break the plugin protocol +- session state must remain stable across calls + +**Expert View** + +**What makes this integration different** + +- plugin-based wrapper flow +- synchronous CLI blocking behavior +- dotted native event names +- stderr/JSON protocol sensitivity +- session state may be held or forwarded by the plugin wrapper + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- no dedicated `opencode-integration.e2e.test.ts` was found +- no dedicated OpenCode payload helper section was found +- handler coverage for OpenCode-specific silence, session, and persistence behavior is still shallow + +**Known regression risks** + +- wrapper must block correctly when the CLI denies +- stderr leakage can break OpenCode protocol handling +- session persistence across plugin calls +- dotted-event attribution and canonicalization +- session-created versus later tool events staying grouped together + +**Tests that must exist** + +- unit tests for detection, mapping, settings path, and wrapper-related assumptions +- handler tests for attribution, session grouping, silence on success, and persistence fields +- dedicated `opencode-integration.e2e.test.ts` +- dedicated OpenCode payload helpers for session start, tool before, tool after, and chat/message flows + +**What is still pending right now** + +- dedicated e2e surface is missing +- dedicated payload helper surface is missing +- stronger handler silence and session tests are missing +- persistence and dashboard-focused coverage is weak +- explicit event-reality coverage for `session.created`, `session.idle`, `tool.execute.before`, `tool.execute.after`, and `chat.message` is still missing +- stronger wrapper-blocking and stderr-cleanliness regression coverage is still missing + +**Exact next work order** + +1. Add plugin-style payload fixtures first +2. Expand unit behavior coverage second +3. Add handler silence, attribution, and session tests third +4. Add OpenCode e2e blocking and success flows last +5. Fix source only after the failing test identifies the broken branch + +### Pi + +**Simple View** + +Pi is the weakest-tested integration in this branch. +The source exists, but the supporting proof surfaces are thin. + +That makes Pi high risk even before you find a bug. + +**Expert View** + +**What makes this integration different** + +- extension-based wrapper +- session handoff from extension context +- IDE-style user feedback through status UI +- recursive isolation concerns +- inheritance-style keys such as `codex_session_id` + +**What already exists in this branch** + +- source implementation exists +- no dedicated Pi section was found in `__tests__/hooks/integrations.test.ts` +- no dedicated `pi-integration.e2e.test.ts` was found +- no dedicated Pi payload helper section was found +- handler coverage for Pi-specific session, attribution, and status behavior is still shallow + +**Known regression risks** + +- missing or unstable session ids +- deny flow not surfacing status UI feedback +- recursive self-trigger loops +- inherited metadata keys not being honored +- wrong attribution or grouping when the wrapper sends sparse payloads + +**Tests that must exist** + +- first dedicated Pi unit section in `__tests__/hooks/integrations.test.ts` +- handler tests for session extraction, attribution, recursive isolation, and persistence +- dedicated `pi-integration.e2e.test.ts` +- dedicated Pi payload helpers for session start, tool call, tool result, and UI-feedback deny cases + +**What is still pending right now** + +- dedicated unit surface is missing +- dedicated e2e surface is missing +- dedicated payload helper surface is missing +- deeper handler coverage is missing +- persistence and session-label behavior remains weakly proven +- explicit event-reality coverage for `session_start`, `tool_call`, `tool_result`, and `input` is still missing +- UI feedback behavior and recursive-isolation regressions still need dedicated proof +- inherited metadata handling such as `codex_session_id` and `codex_event` still needs direct tests + +**Exact next work order** + +1. Define Pi payload shapes first +2. Add the first dedicated Pi unit section second +3. Add Pi-specific handler tests third +4. Add the first Pi e2e flow last +5. Fix source only after a failing test makes the break concrete + +--- + +## 6. How To Convert A Pending Row Into Tests + +**Simple View** + +Do not fix a pending gap by jumping straight into source code. +First convert the gap into the smallest useful test shape. + +Use this order every time: + +1. fixture first +2. unit second +3. handler third +4. e2e last +5. fix code only after the failing test proves the bug + +**Expert View** + +### Map each kind of missing work to the right file + +| Missing Work Type | Put It Here | What It Should Prove | +|---|---|---| +| Integration object behavior | `__tests__/hooks/integrations.test.ts` | settings paths, event maps, detect logic, command shape, helper utilities | +| Handler and runtime behavior | `__tests__/hooks/handler.test.ts` | attribution precedence, session extraction, fallback logic, persistence fields, silence guard, transcript path | +| Payload builders | `__tests__/e2e/helpers/payloads.ts` | realistic native payload shapes for each integration | +| End-to-end integration flows | `__tests__/e2e/hooks/-integration.e2e.test.ts` | install, run, allow, deny, protocol contract, uninstall, high-value regressions | + +### Biggest missing surfaces right now + +- `codex-integration.e2e.test.ts` does not exist +- `opencode-integration.e2e.test.ts` does not exist +- `pi-integration.e2e.test.ts` does not exist +- Codex payload helpers do not have a dedicated section +- OpenCode payload helpers do not have a dedicated section +- Pi payload helpers do not have a dedicated section +- Pi does not have a dedicated integration unit section +- integration-specific handler coverage is still shallow across the branch +- persistence, dashboard, transcript, and virtual-mirror coverage remains weak or missing across several integrations + +### First missing files and first tests to add later + +| First Missing File Or Area | First High-Value Tests | +|---|---| +| `__tests__/e2e/helpers/payloads.ts` for Codex | snake_case native event payloads, session fallback payloads, stop/session lifecycle payloads | +| `__tests__/e2e/helpers/payloads.ts` for OpenCode | plugin-style session-created and tool-before/tool-after payloads, stderr-sensitive success payloads | +| `__tests__/e2e/helpers/payloads.ts` for Pi | extension session payloads, deny-with-status payloads, recursive-isolation payloads | +| `__tests__/e2e/hooks/codex-integration.e2e.test.ts` | install, one deny flow, one allow flow, old-handler/native-event attribution regression | +| `__tests__/e2e/hooks/opencode-integration.e2e.test.ts` | wrapper blocking, clean success path, silence-on-success regression | +| `__tests__/e2e/hooks/pi-integration.e2e.test.ts` | session propagation, deny feedback, recursive isolation | +| `__tests__/hooks/integrations.test.ts` for Pi | detect logic, settings path, event mapping, payload normalization entry points | +| `__tests__/hooks/handler.test.ts` across all | attribution precedence, session fallback, persistence fields, transcript-path derivation, silence-guard regressions | + +### Practical conversion examples + +If the pending row is “wrong integration label on dashboard”: + +- add or expand handler tests first +- assert persisted `integration` +- then add e2e only if the bug depends on full CLI flow + +If the pending row is “payload shape is weird”: + +- add fixtures first +- add unit normalization tests second +- add handler tests third + +If the pending row is “install or uninstall broke user config”: + +- add unit tests around helper behavior +- add e2e only if file-on-disk flow matters + +If the pending row is “agent did not stop on deny”: + +- add e2e because full protocol behavior matters +- add unit or handler tests only for the branches that explain why it broke + +### How To Build A Gap Table + +Before implementation, write a small truth table for the integration you are touching. + +Use columns like: + +| Check | Status | +|---|---| +| Install works | yes / no | +| Uninstall works | yes / no | +| Events fire | yes / no | +| Session id correct | yes / no | +| Dashboard integration correct | yes / no | +| Policies work | yes / no | +| Unit tests exist | yes / no | +| Handler tests exist | yes / no | +| E2E tests exist | yes / no | + +If you want a richer version, use: + +| Case | Source Exists | Unit Test Exists | Handler Test Exists | E2E Test Exists | Status | +|---|---|---|---|---|---| +| Copilot `toolArgs` JSON parse | yes | yes / no | yes / no | yes / no | green / yellow / red | + +Simple status meanings: + +- `green`: implemented and tested well +- `yellow`: implemented, but weakly tested +- `red`: missing or still risky + +### What A Good Test Looks Like + +Use one test name for one behavior. + +Good examples: + +- `maps errorOccurred to Stop` +- `preserves user scope when no project file` +- `uses COPILOT_SESSION_ID when payload is empty` + +Bad tests usually have these problems: + +- too many unrelated assertions +- too much fixture setup repeated inline +- failure message does not explain the bug + +The best test order in this repo is still: + +1. payload fixture +2. unit test +3. handler test +4. e2e + +### Useful Commands + +These are the most useful commands when doing the real implementation work later: + +```bash +git branch --show-current +bunx vitest run __tests__/hooks/integrations.test.ts +bunx vitest run __tests__/hooks/handler.test.ts +bunx vitest run --config vitest.config.e2e.mts __tests__/e2e/hooks/copilot-integration.e2e.test.ts +bun run test:run +bun run test:e2e +bun run lint +bunx tsc --noEmit +``` + +After non-trivial changes in `src/hooks/` or `package.json`, also run the Docker smoke test from `AGENTS.md`. + +Before pushing, follow the repo rules in `AGENTS.md`: + +```bash +git fetch origin && git log --oneline origin/main ^HEAD +gh pr list --head "$(git branch --show-current)" +gh run list --limit 3 +``` + +### Reusable Pattern For Future Integrations + +For future integrations, reuse this same build order: + +1. add or confirm native event definitions in `src/hooks/types.ts` +2. add or confirm integration behavior in `src/hooks/integrations.ts` +3. confirm handler attribution and session flow in `src/hooks/handler.ts` +4. add payload builders +5. add unit tests +6. add handler tests +7. add the first dedicated e2e file + +That pattern is safer than shipping “mostly working” source without proof. + +--- + +## 7. Named Regression Index + +**Simple View** + +These are not abstract risks. +These are the kinds of bugs that can confuse users, hide events, or make a working integration look broken. + +Every row below should stay tied to a named test or named test area. + +**Expert View** + +| Regression | Affected Integration(s) | Test Name / Test Area | User-Visible Symptom | +|---|---|---|---| +| Copilot events labeled as Claude | Copilot | `copilot > native camelCase event names install` and handler attribution coverage | Dashboard shows Copilot activity as Claude activity | +| Copilot session id blank on dashboard | Copilot | `copilot > fallback sessionId synthesized` and handler session extraction coverage | Session page shows blank or dash-style session id | +| Copilot sync wiping user-scope hooks | Copilot | `copilot-sync > preserves user scope when no project file` | Copilot hooks disappear after install, sync, or terminal startup | +| Copilot `toolArgs` string handling | Copilot | `copilot > normalize parses toolArgs JSON` and malformed-string variants | Policies see raw strings, wrong commands, or crash-prone input | +| Codex / Copilot / Gemini attribution mistakes | Codex, Copilot, Gemini | handler attribution precedence coverage | Events land under the wrong integration and dashboard/policies look inconsistent | +| Codex SessionStart mis-attributed to Gemini | Codex, Gemini | `handler > --integration flag wins over event-name` | Session activity shows under the wrong agent family | +| Old handler fallback with `npx -y failproofai` | Codex, Copilot, older published handler paths | `cross-version > event-name fallback attributes correctly on old handler` | Project-scope installs behave differently on older published versions | +| Lifecycle dedup swallowing real events | All, especially session lifecycle flows | `dedup > lifecycle uses 5s window + sessionId` | Real session start or stop events disappear from logs | +| Cursor non-Claude policy bypass behavior | Cursor | `policy > warn-repeated-tool-calls tunes for non-Claude` and related policy/evaluator coverage | Policy feels active in Claude but not in Cursor | +| OpenCode / Pi stderr protocol leakage | OpenCode, Pi | `opencode/pi > handler silent on success` | Wrapper or client protocol breaks because unexpected stderr appears | +| Convention hooks not loading | Convention policy system across integrations | `custom-hooks > convention files loaded per scope` | Policy files exist, but nothing runs and the user thinks hooks are broken | + +Interpretation: + +- if a row here has no obvious dedicated test, that is active debt +- if a regression returns, update both the test surface and this index + +--- + +## 8. Closeout Checklist Before Saying “Done” + +**Simple View** + +Do not stop when the code “looks fine.” +Stop when the branch truth has improved and the proof matches the claim. + +**Expert View** + +- [ ] I used this file as a snapshot of the current branch, not as a fantasy roadmap +- [ ] I checked `__tests__/INTEGRATION_TEST_CASES.md` for deeper contract details +- [ ] I know which integration I am working on +- [ ] I know which layer is actually failing: install, mapping, normalization, attribution, session, persistence, or e2e protocol +- [ ] I created or updated payload fixtures before writing high-level tests +- [ ] I added or planned the right unit coverage in `__tests__/hooks/integrations.test.ts` +- [ ] I added or planned the right handler coverage in `__tests__/hooks/handler.test.ts` +- [ ] I added or planned the right e2e coverage in `__tests__/e2e/hooks/-integration.e2e.test.ts` +- [ ] I did not count shallow incidental coverage as proof +- [ ] I did not fix source code before a failing test made the break specific +- [ ] I checked whether the change affects dashboard fields, transcripts, mirrors, or dedup behavior +- [ ] I updated the guide again if the branch truth changed materially + +Final reminder: +this guide is strongest when it stays honest. +If the branch still has a gap, write the gap down clearly instead of hiding it behind “mostly working.” + +### Common Mistakes To Avoid + +- trusting the dashboard alone +- calling something “done” because one event appeared once +- skipping handler tests because unit tests already pass +- writing source changes before a failing test proves the branch that broke +- working on many integrations at the same time +- assuming shallow coverage is the same as strong coverage + +### If You Only Have 2 Days + +Do not try to finish six integrations badly. + +Use the time like this: + +1. Pick the riskiest single integration +2. Build the gap table +3. Add fixtures +4. Fill unit and handler gaps +5. Add or expand one real e2e lane +6. Fix only the bugs the tests expose + +That creates one reliable template instead of many unstable partial wins. + +### Final Instruction For The Next Person + +If you are unsure what to do next, do this exact sequence: + +1. Open this guide +2. Find your integration in Section 5 +3. Read its “What is still pending right now” block +4. Turn the first pending item into a fixture, unit test, handler test, or e2e test +5. Run the smallest useful test first +6. Fix source only after the failing test proves the bug + +That is the safest path through this codebase. diff --git a/INTEGRATION_PLAN.md b/INTEGRATION_PLAN.md new file mode 100644 index 00000000..76c8115d --- /dev/null +++ b/INTEGRATION_PLAN.md @@ -0,0 +1,267 @@ +# Improved Integration Plan: Gemini CLI + GitHub Copilot + +> [!NOTE] +> **V2 IMPROVEMENTS**: This plan introduces a modular architecture where each integration (Claude, Cursor, Gemini, Copilot) owns its own detection and normalization logic. This fixes 5 existing bugs and adds deep regression guards for Claude/Cursor. + +--- + +## Known Bugs in the Previous Plan (Read First) + +| Bug | Severity | Root Cause | Fixed In Phase | +|---|---|---|---| +| **1. Detection Collision** | **CRITICAL** | Gemini's `SessionStart` overlaps with Claude Code. | Phase 3 (Modular Detection) | +| **2. Raw Event Logging** | **MEDIUM** | Logs used raw `--hook` arg instead of mapped canonical names. | Phase 3 (Canonical Mapping) | +| **3. Log Formatting** | **LOW** | Disconnect between console output and actual evaluation name. | Phase 3 (Unified Logging) | +| **4. Copilot Deny Branch** | **LOW** | Missing specific block format for `PostToolUse`. | Phase 4 (Evaluator) | +| **5. Copilot Allow Spam** | **MEDIUM** | `permissionDecision: allow` was sent on every single event. | Phase 4 (Evaluator) | +| **6. Cursor Normalization** | **MEDIUM** | Hardcoded `workspace_roots` check in handler was fragile. | Phase 2 (Modular Integration) | + +--- + +## 1. Modular Architecture Overview + +Instead of hardcoding "integration detection" in the main handler, we extend the `Integration` interface: + +```typescript +export interface Integration { + // ... existing methods ... + /** Detect if this payload belongs to this integration */ + detect(payload: Record): boolean; + /** Normalize payload fields (e.g. camelCase -> snake_case) */ + normalizePayload(payload: Record): void; + /** Map raw hook names to canonical PascalCase (PreToolUse, etc.) */ + getCanonicalEventName(payload: Record, cliArg: string): string; +} +``` + +### Flow: +1. `handler.ts` receives payload. +2. Iterates over `INTEGRATIONS.detect(payload)`. +3. First match wins (Copilot -> Gemini -> Cursor -> Claude Code). +4. `integration.normalizePayload(payload)` is called. +5. `integration.getCanonicalEventName(payload, cliArg)` is called. +6. Execution proceeds with perfectly clean, canonical state. + +--- + +## Phase 1 — `src/hooks/types.ts` + +**Changes**: Add `"gemini"` and `"copilot"` to `INTEGRATION_TYPES`. Add Gemini/Copilot event maps and types. + +Update [types.ts](file:///home/yashu/fp/failproofai/src/hooks/types.ts): + +```typescript +// Line 8: +export const INTEGRATION_TYPES = ["claude-code", "cursor", "gemini", "copilot"] as const; + +// ... Append at end of file ... + +// ── Gemini CLI ──────────────────────────────────────────────────────────────── +export const GEMINI_HOOK_EVENT_TYPES = [ + "BeforeTool", "AfterTool", "BeforeAgent", "AfterAgent", "BeforeModel", + "AfterModel", "BeforeToolSelection", "SessionStart", "SessionEnd", + "Notification", "PreCompress" +] as const; + +export type GeminiHookEventType = (typeof GEMINI_HOOK_EVENT_TYPES)[number]; + +export const GEMINI_EVENT_MAP: Record = { + BeforeTool: "PreToolUse", AfterTool: "PostToolUse", + BeforeAgent: "SessionStart", AfterAgent: "Stop", + BeforeModel: "UserPromptSubmit", AfterModel: "PostToolUse", + BeforeToolSelection: "PreToolUse", SessionStart: "SessionStart", + SessionEnd: "SessionEnd", Notification: "Notification", + PreCompress: "PreCompact", +}; + +// ── GitHub Copilot ──────────────────────────────────────────────────────────── +export const COPILOT_HOOK_EVENT_TYPES = [ + "sessionStart", "sessionEnd", "userPromptSubmitted", + "preToolUse", "postToolUse", "agentStop", "subagentStop", "errorOccurred" +] as const; + +export type CopilotHookEventType = (typeof COPILOT_HOOK_EVENT_TYPES)[number]; + +export const COPILOT_EVENT_MAP: Record = { + sessionStart: "SessionStart", sessionEnd: "SessionEnd", + userPromptSubmitted: "UserPromptSubmit", preToolUse: "PreToolUse", + postToolUse: "PostToolUse", agentStop: "Stop", + subagentStop: "SubagentStop", errorOccurred: "Stop", +}; +``` + +--- + +## Phase 2 — `src/hooks/integrations.ts` + +**Changes**: Update `Integration` interface and implement new methods for all four integrations. + +### 2.1 Interface Update +```typescript +export interface Integration { + // ... (existing methods: getSettingsPath, readSettings, etc.) ... + detect(payload: Record): boolean; + normalizePayload(payload: Record): void; + getCanonicalEventName(payload: Record, cliArg: string): string; +} +``` + +### 2.2 Claude Code Implementation +```typescript +const claudeCode: Integration = { + // ... existing ... + detect: () => true, // Fallback + normalizePayload: () => {}, // Claude uses snake_case natively + getCanonicalEventName: (_, cliArg) => cliArg, +}; +``` + +### 2.3 Cursor Implementation (Modularized) +```typescript +const cursor: Integration = { + // ... existing ... + detect(payload) { + const hookName = (payload.hook_event_name as string) || ""; + return ( + Array.isArray(payload.workspace_roots) || + hookName.startsWith("before") || + hookName.startsWith("after") || + hookName === "preToolUse" || + hookName === "postToolUse" + ); + }, + normalizePayload(payload) { + if (!payload.cwd && Array.isArray(payload.workspace_roots) && payload.workspace_roots.length > 0) { + payload.cwd = payload.workspace_roots[0]; + } + }, + getCanonicalEventName: (_, cliArg) => cliArg, +}; +``` + +### 2.4 Gemini Implementation +```typescript +const gemini: Integration = { + // ... existing ... + detect(payload) { + const h = payload.hook_event_name as string; + // Exclusive detection: avoid SessionStart/SessionEnd collisions + return ["BeforeTool", "AfterTool", "BeforeAgent", "AfterAgent", "BeforeModel", "AfterModel", "BeforeToolSelection"].includes(h); + }, + normalizePayload: () => {}, // Gemini uses snake_case + getCanonicalEventName(payload, cliArg) { + const h = payload.hook_event_name as GeminiHookEventType; + return GEMINI_EVENT_MAP[h] ?? cliArg; + } +}; +``` + +--- + +## Phase 3 — `src/hooks/handler.ts` (Modularized) + +**Changes**: Clean up the detection logic and fix logging bugs. + +```typescript +// ... Inside handleHookEvent ... + + // 1. Modular Detection + let integrationType: IntegrationType = (parsed.integration as IntegrationType); + if (!integrationType) { + // Priority: Copilot -> Gemini -> Cursor -> Claude Code (default) + if (copilot.detect(parsed)) integrationType = "copilot"; + else if (gemini.detect(parsed)) integrationType = "gemini"; + else if (cursor.detect(parsed)) integrationType = "cursor"; + else integrationType = "claude-code"; + } + + const integ = getIntegration(integrationType); + + // 2. Modular Normalization + integ.normalizePayload(parsed); + + // 3. Modular Canonical Mapping (Fix Bug 1, 2, 3) + const canonicalEventName = integ.getCanonicalEventName(parsed, eventType); + + // 4. Update session metadata + const session: SessionMetadata = { + sessionId: parsed.session_id as string, + integration: integrationType, + // ... other fields ... + }; + + hookLogInfo(`event=${canonicalEventName} integration=${integrationType} ...`); + + // 5. Evaluate (Fix Bug 2) + const result = await evaluatePolicies(canonicalEventName as HookEventType, parsed, session, config); + + // 6. Persist (Fix Bug 2) + persistHookActivity({ + ...result, + eventType: canonicalEventName, + integration: integrationType, + }); +``` + +--- + +## Phase 4 — `src/hooks/policy-evaluator.ts` + +**Changes**: Fix Bug 4 & 5 and format Gemini action blocks. + +```typescript +// Line 39: Empty policy final allow (Fix Bug 5) +if (policies.length === 0) { + let stdout = ""; + if (session?.integration === "cursor") { + stdout = JSON.stringify({ continue: true, permission: "allow" }); + } else if (session?.integration === "copilot" && eventType === "PreToolUse") { + stdout = JSON.stringify({ permissionDecision: "allow" }); + } + return { exitCode: 0, stdout, ... }; +} + +// Inside PreToolUse deny (Add Gemini action: "BLOCK") +if (session?.integration === "gemini") { + return { + exitCode: 0, + stdout: JSON.stringify({ action: "BLOCK", reason: blockMessage }), + ... + }; +} +``` + +--- + +## Phase 9 — Regression Suite (New Phase) + +To ensure no impact on Claude Code or Cursor: + +1. **Claude Regression**: Mock a "Bash" tool event from Claude. + - Verify `integration === "claude-code"`. + - Verify `canonicalEventName === "PreToolUse"`. + - Verify empty stdout on allow. + +2. **Cursor Regression**: Mock a `workspace_roots` payload. + - Verify `integration === "cursor"`. + - Verify `parsed.cwd` is correctly extracted from `workspace_roots[0]`. + - Verify `stdout` contains `continue/permission` fields. + +3. **Log Visibility**: + - Verify that activity persisted for **both** Gemini and Copilot contains the `integration` field. + - This ensures the Dashboard `/policies` activity tab correctly shows which integration triggered each block. + +--- + +## Phase 10 — Manual Smoke Tests + +```bash +# Gemini Allow Check +echo '{"hook_event_name":"BeforeTool","tool_name":"ls"}' | failproofai --hook PreToolUse +# Result: exit 0, empty stdout + +# Copilot Deny Check (Simulate block) +# Force a deny policy (e.g. block-sudo) +echo '{"sessionId":"123","toolName":"sudo","hookEventName":"preToolUse"}' | failproofai --hook PreToolUse +# Result: exit 0, stdout = {"permissionDecision":"deny", ...} +``` diff --git a/__tests__/INTEGRATION_TEST_CASES.md b/__tests__/INTEGRATION_TEST_CASES.md new file mode 100644 index 00000000..0973e34d --- /dev/null +++ b/__tests__/INTEGRATION_TEST_CASES.md @@ -0,0 +1,869 @@ +# Integration Test Cases — Non-Claude Integrations + +A comprehensive checklist of edge cases the test suite must cover for every non-Claude +integration (Cursor, Gemini, GitHub Copilot, Codex, OpenCode, Pi). Cases are grouped by +layer — installation, hook firing, payload normalization, attribution, dashboard display, +and cross-cutting concerns. Each case is written as a testable assertion. + +Symbols: ✅ = must-pass assertion, ⚠️ = regression guard (has broken before), 🔁 = parameterize across all integrations. + +--- + +## 0. Index + +1. [Installation & Uninstallation](#1-installation--uninstallation) +2. [Hook Command Format & Binary Resolution](#2-hook-command-format--binary-resolution) +3. [Event Firing & Trigger Reality](#3-event-firing--trigger-reality) +4. [Event Name Canonicalization](#4-event-name-canonicalization) +5. [Payload Normalization](#5-payload-normalization) +6. [Integration Detection & Attribution](#6-integration-detection--attribution) +7. [Session ID Extraction & Fallback](#7-session-id-extraction--fallback) +8. [Policy Evaluation per Integration](#8-policy-evaluation-per-integration) +9. [Deduplication](#9-deduplication) +10. [Persistence to hook-activity Store](#10-persistence-to-hook-activity-store) +11. [Dashboard Display Gaps](#11-dashboard-display-gaps) +12. [Sync & Merge Functions](#12-sync--merge-functions) +13. [Scopes: user / project / local](#13-scopes-user--project--local) +14. [Cross-Version Compatibility](#14-cross-version-compatibility) +15. [Integration-Specific Deep Cases](#15-integration-specific-deep-cases) + +--- + +## 1. Installation & Uninstallation + +🔁 For every integration {cursor, gemini, copilot, codex, opencode, pi}: + +- ✅ `policies --install --integration ` writes the correct settings file at the correct path for each scope. +- ✅ Fresh install on a machine with **no prior config file** creates parent directories (e.g. `~/.copilot/`, `~/.config/github-copilot/hooks/`, `~/.gemini/`, `.github/hooks/`). +- ✅ Install **preserves existing user settings** in the same file (e.g. Copilot's `copilotTokens`, `loggedInUsers` must remain untouched after installing hooks). +- ⚠️ Re-running install is idempotent — no duplicate hook entries. +- ⚠️ Install followed by uninstall leaves the settings file in a state byte-identical to before install (modulo whitespace). No orphan `hooks: {}` block if none existed. +- ⚠️ Uninstall removes **only** failproofai entries, not other user-authored hooks. +- ⚠️ Install of integration A does not touch integration B's settings file. +- ✅ `policies --install all --integration ` enables all policy names and registers hooks for every event type in that integration's `eventTypes` list. +- ⚠️ Running uninstall with `--scope project` when **no project file exists** exits 0 gracefully. +- 🔁 `hooksInstalledInSettings(scope)` returns true iff any failproofai marker is present. +- ⚠️ **Copilot regression**: after user-scope install, `synchronizeCopilotProjectHooks` (postInstall) must not wipe the just-written user entries when no project file exists. + +--- + +## 2. Hook Command Format & Binary Resolution + +🔁 For each integration: + +- ✅ Project-scope hook command uses portable `npx -y failproofai` (no machine-specific path) so the file is safe to commit. +- ✅ User-scope hook command uses absolute local binary path (`process.execPath` + resolved dist entry), so it works without `PATH` setup. +- ⚠️ `FAILPROOFAI_DIST_PATH` env var overrides the resolved binary path. +- ⚠️ When `FAILPROOFAI_DIST_PATH` is unset, `findDistIndex()` walks from the running binary's directory up to find `dist/`. +- ✅ Each installed command contains `--integration ` and `--hook `. +- ⚠️ **Copilot regression**: event name in the command is native camelCase (`sessionStart`), NOT PascalCase. +- ⚠️ **Codex**: event name is snake_case (`pre_tool_use`). +- ⚠️ **Gemini**: event name is Gemini's unique PascalCase (`BeforeTool`, `BeforeModel`). +- ⚠️ **Cursor**: event name matches Cursor's native format; command includes `--stdin`. +- ⚠️ Shell-quoting: paths containing spaces are double-quoted in the generated bash. +- ⚠️ Windows path separators: on win32, binary path uses backslashes but is wrapped in quotes so bash can execute it. + +--- + +## 3. Event Firing & Trigger Reality + +🔁 For each integration: simulate each native event type and verify the hook handler is invoked. + +- ✅ **Cursor**: `beforeShellExecution`, `afterFileEdit`, `beforeReadFile`, `beforeMCPExecution`, `stop` all fire via the JSONL pipe and produce one handler invocation each. +- ✅ **Gemini**: `BeforeTool`, `AfterTool`, `BeforeModel`, `AfterModel`, `BeforeAgent`, `AfterAgent`, `BeforeToolSelection`, `PreCompress` each fire. +- ✅ **Copilot**: `sessionStart`, `sessionEnd`, `userPromptSubmitted`, `preToolUse`, `postToolUse`, `agentStop`, `subagentStop`, `errorOccurred` each fire. +- ✅ **Codex**: `pre_tool_use`, `post_tool_use`, `session_start`, `session_end`, `user_prompt_submitted`, `agent_stop`, `notification` each fire. +- ✅ **OpenCode**: dotted events (`tool.before`, `tool.after`, `session.start`, `session.end`). +- ✅ **Pi**: snake_case events (`session_start`, `tool_call`, `tool_result`, `input`). +- ⚠️ **No-event-payload**: some CLIs invoke hooks with empty stdin. Handler must not crash; fallback sessionId must be synthesized from cwd. +- ⚠️ Non-zero exit code: handler returning 2 (block) must be honored — integration cancels the tool call. + +--- + +## 4. Event Name Canonicalization + +🔁 For each integration: + +- ✅ Native event name (camelCase/snake_case/dotted) fed into handler becomes the canonical PascalCase name used by builtins and the dashboard. +- ⚠️ **Copilot** regression: `sessionStart` → `SessionStart`, `errorOccurred` → `Stop`. Dashboard row's `eventType` must be PascalCase after persistence. +- ⚠️ Unknown event name — handler falls through without throwing, logs a warning, returns "allow". +- ✅ `ALL_CANONICAL_EVENTS` set in handler.ts includes every mapped value from every integration's EVENT_MAP. + +--- + +## 5. Payload Normalization + +🔁 For each integration: + +- ✅ `session_id` extracted from payload's native key (`sessionId`, `conversation_id`, `tab_id`, …) and assigned to normalized `session_id`. +- ✅ `tool_name` extracted from native keys (`toolName`, `tool`, `name`, `call.method`…). +- ✅ `tool_input` extracted and parsed. +- ⚠️ **Copilot** `toolArgs` that is not valid JSON falls back to raw string, not a crash. +- ⚠️ **Cursor** `conversation_id` appears inside nested `data` object — deep extract finds it. +- ⚠️ **Gemini** deep-extract for text/args/name finds values under `parts`, `arguments`, `call.method` etc. Test with realistic Gemini payload shapes. +- ⚠️ `cwd` extraction: native keys (`workspace_root`, `projectRoot`, `cwd`, `directory`) all normalize. +- ⚠️ Payloads with `null` values in expected-string fields don't become the literal string "null" in the session. + +--- + +## 6. Integration Detection & Attribution + +- ⚠️ **Priority 1**: `--integration ` CLI flag wins over everything else. Test: pass `--integration cursor` with a payload shaped like Copilot → attribution is cursor. +- ⚠️ **Priority 2**: `payload.integration` field (set by some CLIs' own wrappers). +- ⚠️ **Priority 3**: unique event-name fallback: + - `BeforeTool` / `AfterTool` / `BeforeModel` → gemini + - camelCase `COPILOT_HOOK_EVENT_TYPES.includes(eventType)` → copilot + - snake_case `CODEX_HOOK_EVENT_TYPES.includes(eventType)` → codex + - dotted `tool.before` → opencode +- ⚠️ **Priority 4**: payload shape `detect()` — parameterize each integration's detect function with representative payloads and negative samples from every other integration. A detect function must not false-positive on another's payload. +- ⚠️ **Default fallback**: unknown → `claude-code`. Regression-test: payload `{ hook_event_name: "sessionStart" }` with no `--integration` flag must still resolve to `copilot`, not `claude-code`. + +--- + +## 7. Session ID Extraction & Fallback + +🔁 For each integration: + +- ✅ Real session ID present in payload → extracted and passed through unchanged. +- ⚠️ Empty payload + no env session vars → fallback ID is `session--` (never blank, never literal `—`, never `undefined`). +- ⚠️ Env var recovery: `COPILOT_SESSION_ID`, `CURSOR_SESSION_ID`, `GEMINI_SESSION_ID` populate session when payload is empty. +- ⚠️ Same session across events emits **same** sessionId — dashboard groups them into one session row. + +--- + +## 8. Policy Evaluation per Integration + +🔁 For each integration: + +- ✅ Policy fires with correct canonical event name. +- ⚠️ Policy returning `deny` results in exit code 2 and `stderr` containing the reason. +- ⚠️ Policy returning `instruct` results in exit 0 with `stdout` containing the instruction block. +- ⚠️ Stop-event policies (`require-commit-before-stop`) evaluate correctly for **non-git** cwd — they skip with a reason, not crash. +- ⚠️ Block policies (`block-sudo`, `block-rm-rf`, …) parse `tool_input.command` correctly after normalization for each integration. + +--- + +## 9. Deduplication + +- ⚠️ Same logical event fired in two scopes (project + user) produces exactly one persisted entry — the firing lock claims the event and the second process silently exits 0. +- ⚠️ Lifecycle events (SessionStart/SessionEnd/Stop) use the 5s dedup window with sessionId in the fingerprint — rapid re-runs of the same session don't double-log, but two different sessions within 5s each log. +- ⚠️ Non-lifecycle events use DEDUP_BUCKET_MS with tool_input JSON in the fingerprint — two identical Bash commands within the window log once. +- ⚠️ Dedup fingerprint **includes** `integrationType` so a Copilot SessionStart and a Claude SessionStart in the same cwd at the same instant both get logged. + +--- + +## 10. Persistence to hook-activity Store + +🔁 For each integration: + +- ✅ Entry written has `integration`, `sessionId`, `eventType` (canonical), `hookEventName` (raw), `cwd`, `decision`, `timestamp`, `durationMs`. +- ⚠️ **Copilot regression**: persisted `integration` field is `"copilot"`, never undefined and never silently defaulted to `"claude-code"`. +- ⚠️ Stats file (`stats.json`) increments `totalEvents`, `denyCount`, `topPolicy`, `topPolicyCount` accurately per integration. + +--- + +## 11. Dashboard Display Gaps + +- ⚠️ SessionId `—` (em-dash) on the dashboard means the persisted entry literally lacks a sessionId. After the Copilot fix, this should never happen. +- ⚠️ Virtual project mirror: when `integration ∈ {cursor, gemini, codex, pi, opencode}`, events are mirrored into `~/.claude/projects//.jsonl` for cross-integration project views. Test each integration writes to the correct mirror. +- ⚠️ Dashboard's session detail page for a non-Claude session shows the transcript path computed by the handler (Copilot's `~/.copilot/session-state//events.jsonl`, Gemini's `~/.gemini/tmp//chats/session-...`). +- ⚠️ `cwd` trunc displays enough right-edge characters to distinguish nested projects. + +--- + +## 12. Sync & Merge Functions + +- ⚠️ **`synchronizeCopilotProjectHooks`** regression guard: call it with **no** project file present — user-scope hooks in `~/.copilot/config.json` must be preserved byte-for-byte. +- ⚠️ With a project file present, sync merges project entries without duplicating existing ones and without touching user-scope (local-binary) entries. +- ⚠️ `ensureCopilotRevisionSymlink`: on a snap install, creates `snap/copilot-cli//.config/.../hooks` → `common/.config/.../hooks` symlink. On a non-snap install, is a no-op. Test both branches. + +--- + +## 13. Scopes: user / project / local + +🔁 For each integration that supports multiple scopes: + +- ⚠️ Simultaneous install at user + project fires each hook only once (firing-lock dedup). +- ⚠️ Local scope (`.failproofai/policies-config.local.json`) overrides project scope overrides user scope for enabled-policies list. +- ⚠️ Installing at user with `FAILPROOFAI_DIST_PATH` pointing at dev dist produces hooks that reference dev dist; installing at project always uses `npx -y failproofai`. + +--- + +## 14. Cross-Version Compatibility + +- ⚠️ Hook command emitted by version N must be understood by version N's handler AND (at best-effort) by version N-1's handler. The **self-identifying event name** rule (camelCase for Copilot, snake_case for Codex) makes this work. +- ⚠️ Published npm `latest` version compatibility: install at project scope (uses `npx -y failproofai`), run the hooks, confirm the published handler still produces a dashboard-compatible entry. + +--- + +## 15. Integration-Specific Deep Cases + +### 15.1 Cursor (The IDE Native) + +#### The Function Pipeline +1. **Cursor IDE** triggers a hook from `hooks.json`. +2. **Command**: `failproofai --hook --integration cursor --stdin`. +3. **Payload**: JSONL object piped to stdin. +4. **Handler**: `--integration cursor` explicitly guards identity. + +#### Deep Assertions +- 🛠️ **Twin-Fire Deduplication**: Cursor fires both User and Project hooks. + - ✅ **Assertion**: Firing lock MUST claim the first and exit-0 the second immediately. No duplicate Allow JSON should bypass a Deny from the first. +- 🛠️ **Hyper-Specific Attribution**: + - ✅ **Assertion**: `cwd` must be lifted from `workspace_roots[0]` if top-level `cwd` is missing. + - ✅ **Assertion**: If tool_input contains `cwd` (e.g. from terminal executing in a sub-folder), it overrides `workspace_roots`. +- 🛠️ **MCP Protocol**: + - ✅ **Assertion**: `beforeMCPExecution` and `afterMCPExecution` correctly map to `PreToolUse` and `PostToolUse` and block unauthorized MCP tool calls. + +### 15.2 Gemini CLI (Deep Data Mining) + +#### The Function Pipeline +1. **Gemini CLI** triggers a hook from `~/.gemini/settings.json`. +2. **Command**: `failproofai --hook --integration gemini --stdin`. +3. **Normalization**: Performs **Deep Extract Logic**. +4. **Handler**: PascalCase Identity Guard protects native event fallback. + +#### Deep Assertions +- 🛠️ **Deep Extract Logic**: Gemini nests data deeply. + - ✅ **Assertion**: Payload `{ data: { call: { method: "ls" } } }` MUST yield `tool_name: "ls"`. + - ✅ **Assertion**: Payload `{ parts: [{ text: "hi" }] }` MUST yield `tool_input: "hi"`. +- 🛠️ **PascalCase Identity Guard**: + - ✅ **Assertion**: `BeforeTool` MUST BE detected as `gemini` purely by its name if no integration flag is present. +- 🛠️ **Transcript Resolution**: + - ✅ **Assertion**: Dashboard transcript links MUST point to `~/.gemini/tmp//chats/session-.json`. + +### 15.3 GitHub Copilot (Sync & Snap) + +#### The Function Pipeline +1. **Copilot CLI** triggers a hook from `~/.copilot/config.json`. +2. **Command**: `failproofai --hook --integration copilot`. +3. **Normalization**: Parses stringified JSON values. + +#### Deep Assertions +- 🛠️ **JSON-in-String Normalization**: + - ✅ **Assertion**: `toolArgs` formatted as `"{\"command\":\"ls\"}"` MUST be parsed into an object. +- 🛠️ **Waterfall Metadata Extraction**: + - ✅ **Assertion**: Copilot `tool_input` resolution MUST cleanly cascade across inconsistent keys: `toolInput` -> `toolArgs` -> `data.params` -> `message` -> `prompt`. +- 🛠️ **The Sync Engine & Snap Repair**: + - ✅ **Assertion**: `.bashrc` MUST correctly contain `env failproofai copilot-sync 2>/dev/null` allowing snap revisions to access the `common/` hook symlinks without manual intervention. +- ⚠️ **CamelCase Stability**: + - ✅ **Assertion**: Hook command MUST install with `preToolUse` (camelCase) to ensure older handlers correctly classify it as Copilot without flags. +- 🛠️ **Fuzzy Deep Payload Detection (Heuristic)**: + - ✅ **Assertion**: If `--integration copilot` is missing, `detect()` must successfully identify Copilot if keys like `sessionId` or `toolName` exist inside a nested `data` object, PROVIDED the `hookName` does NOT start with PascalCase (which would conflict with Claude). +- 🛠️ **Silence Guard (Double-Dip Protection)**: + - ✅ **Assertion**: If an event arrives marked as `--integration claude-code` (from a corrupted legacy project install) but the event type is exclusively Copilot's (e.g., `sessionStart`), the handler MUST silently abort (exit 0, no dashboard log) to prevent phantom duplicates. +- 🛠️ **Binary Detection**: + - ✅ **Assertion**: `detectInstalled()` accurately verifies Copilot presence by checking `which gh` instead of `copilot`, reflecting its architecture as a GitHub CLI extension. + +### 15.4 OpenCode (Plugin-Based) + +#### The Plugin Pipeline +**OpenCode uses a TypeScript plugin injected at `.opencode/plugins/failproofai.ts`**: +```typescript +import { spawnSync } from "node:child_process"; +export const FailproofAIPlugin = (ctx: any) => { + const callcli = (event: string, args: any) => { + const payload = { ...args, integration: "opencode", cwd: ctx.directory }; + const cmd = 'failproofai --hook ' + event + ' --integration opencode --stdin'; + const res = spawnSync(cmd, { input: JSON.stringify(payload), shell: true, encoding: "utf8" }); + if (res.status !== 0) throw new Error(res.stderr || "Blocked by FailproofAI"); + }; +}; +``` + +#### Deep Assertions +- ✅ **Synchronous Blocking**: The plugin MUST `throw Error` if `spawnSync` exits with code 2, halting the agent workflow definitively. +- ✅ **Session ID Persistence**: `session.created` must set `currentSessionId` used by all subsequent calls in the session. +- ⚠️ **Diagnostic Silence**: The wrapper must not write debug logs to stderr that could break OpenCode's JSON protocol. + +### 15.5 Pi Coding Agent (Extension-Based) + +#### The Extension Pipeline +**Pi uses a TypeScript extension at `.pi/extensions/failproofai.ts`**: +```typescript +export default function (pi: ExtensionAPI) { + const callcli = (event: string, args: any, ctx?: any) => { + const sessionId = ctx?.sessionId || pi.session?.id || "default"; + const payload = { ...args, integration: "pi", cwd: process.cwd(), session_id: sessionId }; + const res = spawnSync('failproofai --hook ' + event + ' --integration pi --stdin', { + input: JSON.stringify(payload), shell: true, encoding: "utf8" + }); + if (res.status !== 0) { + ctx?.ui?.setStatus("FailproofAI: Blocked - " + (res.stderr || res.stdout)); + return { block: true }; + } + }; +} +``` + +#### Deep Assertions +- ✅ **Premium UI Feedback**: Verify `setStatus` is called when a policy denies an action so the user receives IDE UI feedback. +- ✅ **Recursive Isolation**: Verify the extension ignores messages starting with `/failproofai-status` to prevent infinite trigger loops. +- ✅ **Heritage Attribution**: Verify `codex_session_id` and `codex_event` keys (if present) are handled. + +### 15.6 OpenAI Codex (Legacy CLI) + +#### Deep Assertions +- 🛠️ **Case Stability**: + - ✅ **Assertion**: CLI invokes with snake_case `pre_tool_use`, but `handler` maps to PascalCase `PreToolUse`. Config file keys must be PascalCase. +- 🛠️ **Trace Parsing**: + - ✅ **Assertion**: `trace-parser.ts` MUST correctly segment multi-line Codex logs into individual `HookActivityEntry` metadata blocks. + +--- + +## Cross-cutting: Fixture Matrix (Ultimate Payload Gallery) + +For every integration, maintain a fixture directory parameterized in tests. Here are canonical assertions for parsing these core event payloads: + +### 1. Cursor `beforeShellExecution` (Stdin JSON) +```json +{ + "hook_event_name": "beforeShellExecution", + "workspace_roots": ["/home/user/project"], + "command": "rm -rf .env", + "integration": "cursor" +} +``` +**Assertion**: `tool_name` -> `run_terminal_command`, `tool_input` -> `rm -rf .env`, `cwd` -> `/home/user/project`. + +### 2. Gemini `BeforeTool` (Deep Stdin) +```json +{ + "hook_event_name": "BeforeTool", + "data": { + "call": { + "method": "read_file", + "arguments": { "path": "secrets.json" } + } + } +} +``` +**Assertion**: `tool_name` -> `read_file`, `tool_input` -> `{ "path": "secrets.json" }`, `integration` -> `gemini`. + +### 3. Copilot `preToolUse` (CLI Args + toolArgs String) +```json +{ + "hookEventName": "preToolUse", + "sessionId": "550e8400-e29b-41d4-a716-446655440000", + "toolName": "bash", + "toolArgs": "{\"command\":\"ls -la\"}" +} +``` +**Assertion**: `tool_input` MUST be a parsed JSON object `{"command":"ls -la"}`, not the raw string. + +### 4. OpenCode/Pi Plugin (Standard Normalized Stdin) +```json +{ + "integration": "opencode", + "session_id": "ses_123", + "tool_name": "edit_file", + "tool_input": { "content": "..." }, + "cwd": "/abs/path" +} +``` +**Assertion**: `integration` securely hardcoded inside the typescript wrapper, bypassing any CLI guesswork. + +--- + +## 16. Per-Integration End-to-End Deep Coverage + +Every integration is noble in itself — different install surface, different payload shape, +different transcript format, different failure modes. This section enumerates exhaustive +cases **per integration**, covering the full pipeline from CLI trigger to dashboard row. + +Each subsection follows the same structure: + +- **A. Install pipeline** (function chain + every branch) +- **B. Uninstall pipeline** +- **C. Settings-file shape preservation** +- **D. Hook command format** (every token of the generated bash) +- **E. Trigger reality** (what actually fires, what doesn't) +- **F. Payload ingestion** +- **G. Normalization** (every key, every fallback) +- **H. Event canonicalization round-trip** +- **I. Attribution** (without `--integration` flag) +- **J. Session ID extraction** (all keys, env recovery, fallback) +- **K. Cwd / workspace resolution** +- **L. Tool name / tool input extraction** +- **M. Policy evaluation** (block, warn, sanitize, instruct) +- **N. Decision honoring** (CLI cancels on exit 2) +- **O. Stdout/stderr contract** +- **P. Persistence fields** +- **Q. Virtual project mirror** +- **R. Transcript path resolution** +- **S. Dashboard row rendering** +- **T. Dashboard session detail page** +- **U. Scope matrix** (user / project / local) +- **V. Cross-scope duplication / dedup** +- **W. Error paths** (empty stdin, malformed JSON, permission errors) +- **X. Config-file concurrency** (two processes writing simultaneously) +- **Y. Cross-version compatibility** (old published handler sees new install) +- **Z. Known quirks specific to this integration** + +--- + +### 16.1 Cursor — End-to-End + +**CLI**: `cursor` and `cursor-agent`. Settings file: `hooks.json` (user: `~/.cursor/hooks.json`, project: `.cursor/hooks.json`, local: `.cursor/hooks.local.json`). + +#### A. Install pipeline +- ✅ `manager.install("cursor", "user")` resolves path to `~/.cursor/hooks.json`. +- ✅ `manager.install("cursor", "project", cwd)` resolves to `/.cursor/hooks.json`. +- ⚠️ If `.cursor/` directory doesn't exist, `mkdirSync(..., { recursive: true })` creates it; permission error surfaces as `CliError` not silent failure. +- ✅ `readSettings` handles a blank file (returns `{}`), a valid JSON file, and a malformed JSON file (throws a clear `CliError` with the file path). +- ✅ `writeHookEntries` iterates every event in `CURSOR_HOOK_EVENT_TYPES` and calls `buildHookEntry` per event. +- ⚠️ Existing non-failproofai entries under the same event key are preserved; ours is appended. +- ⚠️ A prior failproofai entry at that event is **replaced**, not duplicated — test re-install twice, count must remain 1 per event. +- ✅ `isFailproofaiHook(h)` matches by command substring (no marker field in Cursor's format). +- ⚠️ `postInstall` is a no-op for Cursor (unlike Copilot). Must not invoke any sync. + +#### B. Uninstall pipeline +- ✅ `removeHooksFromFile` removes only entries where `isFailproofaiHook` returns true. +- ⚠️ If an event key becomes empty after removal, the key is deleted; if `hooks` object becomes empty, it is deleted; if the file becomes empty `{}`, it is still written (not deleted) to preserve explicit empty state. +- ⚠️ Uninstall on a file that never had failproofai entries returns `removed: 0` and doesn't modify the file's mtime. +- ⚠️ Uninstalling user scope must not touch project scope and vice versa. + +#### C. Settings-file shape preservation +- ⚠️ Pre-existing top-level keys (not `hooks`) preserved byte-identical. +- ⚠️ Whitespace / trailing newline preserved if `writeJsonFile` uses `JSON.stringify(..., null, 2) + "\n"`. Assert EOF behavior. +- ⚠️ Nested Cursor-specific options (matcher regexes, disabled flags) inside each hook entry preserved. + +#### D. Hook command format +- ✅ Command string: `"${process.execPath}" "${binaryPath}" --hook ${pascalEvent} --integration cursor --stdin`. +- ⚠️ `eventType` fed to `buildHookEntry` is Cursor's native camelCase (`beforeShellExecution` etc.); **mapped to PascalCase** for the `--hook` argument (via `CURSOR_EVENT_MAP`). This is intentional because Cursor's camelCase names overlap with Copilot's — the `--integration cursor` flag + `--stdin` are what disambiguate. +- ⚠️ `timeout: 60` field present (seconds, not ms — Cursor's schema). +- ⚠️ `--stdin` flag is mandatory; without it the handler reads nothing and mis-classifies. +- ⚠️ Windows: `process.execPath` has backslashes; bash wrapping survives Cursor's shell invocation. + +#### E. Trigger reality +- ✅ Each of `beforeShellExecution`, `afterFileEdit`, `beforeReadFile`, `beforeSubmitPrompt`, `beforeMCPExecution`, `afterMCPExecution`, `stop` fires exactly one handler invocation per Cursor event. +- ⚠️ Cursor fires **both** user-scope AND project-scope hooks when both are installed — firing-lock dedup handles (see §V). +- ⚠️ Cursor Agent (headless mode) fires same events as IDE; detect distinguishes via `payload.agent_type` or absence of `editor_context`. +- ⚠️ Cursor does not fire `sessionStart` / `sessionEnd`; our `eventTypes` list reflects reality (test parity). + +#### F–L. Payload ingestion & normalization +- ✅ Stdin JSONL: single line JSON, `\n` terminated. +- ⚠️ `conversation_id` appears under `data.conversation_id` AND top-level — deep extract pulls from either. +- ⚠️ `workspace_roots` is an array; `cwd` normalizes to `workspace_roots[0]` when top-level `cwd` absent. +- ⚠️ Tool input for `beforeShellExecution`: payload key is `command`, not `tool_input.command` — normalizer maps to `{ command: }`. +- ⚠️ Tool input for `afterFileEdit`: `file_path` + `new_content` → `{ file_path, new_content }`. +- ⚠️ `beforeMCPExecution`: `mcp_server`, `mcp_tool`, `arguments` → `tool_name = mcp_server + ":" + mcp_tool`, `tool_input = arguments`. +- ⚠️ PascalCase canonicalization: `beforeShellExecution` → `PreToolUse` (shell is a tool), `afterFileEdit` → `PostToolUse`, `beforeSubmitPrompt` → `UserPromptSubmit`, `stop` → `Stop`. +- ⚠️ `tool_name` defaults: if unknown after normalization, derived from `command` first token (`/usr/bin/ls` → `ls`). + +#### M–O. Policy evaluation & decision honoring +- ⚠️ `block-sudo` on `beforeShellExecution(command: "sudo apt install foo")` → exit 2, stderr contains policy reason; Cursor cancels the exec. +- ⚠️ `warn-repeated-tool-calls` fires with Cursor-specific threshold (Cursor agents are chattier than Claude — policy must detect `session.integration === "cursor"` and raise threshold accordingly). +- ⚠️ `sanitize-api-keys` on `afterFileEdit(new_content: "KEY=sk-...")` → deny; Cursor reverts the edit (assert via Cursor's own transcript). +- ⚠️ Stop-event policies (`require-commit-before-stop` etc.) fire on Cursor's `stop` event; must handle Cursor's lack of `transcript_path`. +- ⚠️ Instruct decision (exit 0 with stdout JSON) is consumed by Cursor's system-prompt injector — assert stdout shape matches Cursor's `{ "systemMessage": "..." }` schema. + +#### P–R. Persistence / transcript +- ✅ Persisted entry has `integration: "cursor"`, canonical PascalCase `eventType`, raw `hookEventName` (camelCase), `sessionId`, `cwd`, `policyName`. +- ⚠️ Virtual project mirror: entry mirrored into `~/.claude/projects//.jsonl` (Cursor ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS). +- ⚠️ Transcript path: Cursor doesn't expose one directly; handler sets `transcriptPath: undefined`. Dashboard detail page handles gracefully (no broken link). + +#### S–T. Dashboard +- ⚠️ Integration badge renders `Cursor` in blue. +- ⚠️ Session detail page lists all events grouped by `sessionId`; if `sessionId` is the synthesized `session-cursor-` (no real UUID), still groups events logically. +- ⚠️ `eventType` column shows `PreToolUse` not `beforeShellExecution` — canonicalization must reach persistence. +- ⚠️ Filter `?integration=cursor` returns only cursor rows; combined with `?decision=deny` narrows further. + +#### U–V. Scope matrix +- ⚠️ User scope: `~/.cursor/hooks.json`. Project: `.cursor/hooks.json`. Local: `.cursor/hooks.local.json` (if supported). +- ⚠️ Twin-fire: both user + project install → Cursor fires both → firing lock picks first, second exits 0 cleanly (no duplicate allow/deny). +- ⚠️ Ordering: project-scope entries evaluated before user-scope (precedence). + +#### W. Error paths +- ⚠️ Empty stdin (Cursor pipes nothing): handler logs warning "stdin is empty for - Cursor Agent might not be piping context", still synthesizes session, returns allow. +- ⚠️ Malformed JSON stdin: handler logs "payload parse failed", treats as empty payload, returns allow. +- ⚠️ `~/.cursor/hooks.json` permission denied: install surfaces clear error, doesn't write partial file. + +#### X. Concurrency +- ⚠️ Two Cursor windows firing simultaneously: writes to `current.jsonl` serialized by advisory lock, no JSONL corruption. + +#### Y. Cross-version +- ⚠️ Old published handler receiving `--hook PreToolUse --integration cursor --stdin` + Cursor-shaped payload: attributes correctly via `--integration` flag even if event-name lookup fails. + +#### Z. Known quirks +- ⚠️ Cursor IDE v0.42+ changed payload shape — regression guard for any hard-coded path keys. +- ⚠️ Cursor Agent emits `beforeSubmitPrompt` with an empty `prompt` field during init — sanitize policies must not flag empty strings. +- ⚠️ Cursor's built-in rules file coexists with our hooks — test that our `stop` policy output doesn't conflict with Cursor's own stop-behavior. + +--- + +### 16.2 Gemini CLI — End-to-End + +**CLI**: `gemini`. Settings file: `~/.gemini/settings.json` (user), `.gemini/settings.json` (project). Gemini uses Claude's settings format (`hooks: { EventName: [{ hooks: [...] }] }`). + +#### A. Install pipeline +- ✅ `getSettingsPath("user")` → `~/.gemini/settings.json`. +- ✅ `getSettingsPath("project", cwd)` → `/.gemini/settings.json`. +- ⚠️ Shared Claude-format settings: `writeHookEntries` inserts matchers under `s.hooks[eventType]`; must not disturb other Gemini-specific top-level keys (`theme`, `models`, `mcpServers`). +- ⚠️ `FAILPROOFAI_HOOK_MARKER` field added to each entry — `isFailproofaiHook` matches on marker AND command substring (belt + suspenders). + +#### B. Uninstall pipeline +- ⚠️ Removes entries whose marker is true OR command contains `failproofai`. Empty matcher arrays collapsed. +- ⚠️ Must not remove user's own custom Gemini hooks that happen to share an event type. + +#### C. Settings-file shape preservation +- ⚠️ Preserves `theme`, `mcpServers`, `approvalMode`, `telemetry`, `selectedAuthType`, `model` blocks. +- ⚠️ `hooks` block ordering preserved where possible (Gemini sometimes reads events in order). + +#### D. Hook command format +- ✅ Command: `"${process.execPath}" "${binaryPath}" --hook ${eventType} --integration gemini --stdin` (user) OR `npx -y failproofai --hook ${eventType} --integration gemini --stdin` (project). +- ⚠️ Event name preserved as Gemini's **unique PascalCase** (`BeforeTool`, `BeforeModel`, etc.) — these names don't overlap with Claude's PascalCase (`PreToolUse`), so attribution works via event-name fallback even without `--integration`. +- ⚠️ `--stdin` flag mandatory. + +#### E. Trigger reality +- ✅ `BeforeTool`, `AfterTool`, `BeforeModel`, `AfterModel`, `BeforeAgent`, `AfterAgent`, `BeforeToolSelection`, `PreCompress`, `Notification`, `SessionStart`, `SessionEnd`, `UserPromptSubmit`, `Stop` (test every event Gemini actually fires). +- ⚠️ Gemini fires `PreCompress` before truncating context — unique to Gemini; policy has access to `parts` count and can instruct or allow. +- ⚠️ `BeforeToolSelection` fires BEFORE `BeforeTool` — handler must not dedup them together (different canonical events? Currently both map to PreToolUse? **DECIDE AND TEST**). + +#### F–L. Payload & normalization +- ⚠️ Gemini nests **everything** under `data` — deep-extract pulls: `data.call.method` → `tool_name`, `data.call.arguments` → `tool_input`, `data.parts[].text` → prompt text. +- ⚠️ `data.session.id` vs top-level `sessionId` vs `data.sessionID` — normalizer tries all. +- ⚠️ `data.workspace.root_path` → `cwd` fallback. +- ⚠️ `data.model.name` → part of tool_name for `BeforeModel` event. +- ⚠️ Gemini's `parts` array may contain mixed text + functionCall entries — normalizer extracts text for UserPromptSubmit, functionCall.name for BeforeTool. +- ⚠️ Arguments may be a JSON object OR a JSON-encoded string — handle both. +- ⚠️ `transcript_path`: Gemini emits `data.transcript_path`; fallback to constructed `~/.gemini/tmp//chats/session--.json`. + +#### M–O. Policy evaluation +- ⚠️ `block-sudo` on `BeforeTool(tool_name=run_shell_command, tool_input.command="sudo ...")` → deny; Gemini cancels. +- ⚠️ `sanitize-api-keys` on `AfterTool` output scans `data.result` / `data.output` text. +- ⚠️ `warn-repeated-tool-calls` threshold tuned for Gemini (tends to retry on model errors). +- ⚠️ Stop-event policies fire on `Stop`; Gemini's Stop has a `reason` field (user-cancel vs model-done) — policy differentiates. +- ⚠️ `PreCompress` policy: custom hook can log size + decide allow/deny (default allow). + +#### P–R. Persistence / transcript +- ✅ `integration: "gemini"`, PascalCase canonical event, sessionId = real Gemini session UUID. +- ⚠️ Virtual project mirror: Gemini writes to `~/.claude/projects//.jsonl` (Gemini ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS). +- ⚠️ Transcript: `~/.gemini/tmp//chats/session-T-.json`. Dashboard link must resolve to existing file. +- ⚠️ Gemini session UUID vs Gemini's internal "chat id" — test both keys map to same session row. + +#### S–T. Dashboard +- ⚠️ Badge: `Gemini` in indigo. +- ⚠️ Session detail page parses Gemini's chat JSON (different shape than Claude's JSONL transcript) — log-entries parser has a Gemini branch. Test with real fixture. +- ⚠️ `BeforeTool` / `AfterTool` shown as PascalCase `PreToolUse` / `PostToolUse`; `BeforeModel` / `AfterModel` shown as... **DECIDE canonical mapping and test.** + +#### U–V. Scope matrix +- ⚠️ User: `~/.gemini/settings.json`. Project: `.gemini/settings.json`. +- ⚠️ No Cursor-style twin-fire; dedup still applies for safety. + +#### W. Error paths +- ⚠️ Empty `data` block: handler doesn't crash on deep-extract; falls back to integration="gemini" + sessionId fallback. +- ⚠️ Gemini CLI invokes hook with non-JSON stdin during auth flow — handler treats as empty, returns allow. + +#### X. Concurrency +- ⚠️ Gemini CLI runs tools sequentially, but notifications + Before/AfterModel may overlap — advisory lock required. + +#### Y. Cross-version +- ⚠️ `BeforeTool` / `AfterTool` etc. are Gemini-unique; event-name fallback attributes correctly on any handler version that lists them in GEMINI_UNIQUE or GEMINI_HOOK_EVENT_TYPES. + +#### Z. Known quirks +- ⚠️ Gemini re-fires `BeforeTool` on retry — `warn-repeated-tool-calls` must not count these as user-initiated repeats. +- ⚠️ Gemini's `Notification` event is transient; dashboard must not surface every one as a major row (consider collapsing). +- ⚠️ Gemini on Windows uses a different tmp path (`%LOCALAPPDATA%\Google\Gemini\tmp\...`); transcript resolution branches. + +--- + +### 16.3 OpenAI Codex — End-to-End + +**CLI**: `codex`. Settings file: user `~/.codex/hooks.json`, project `.codex/hooks.json`. Codex uses Claude-like format but keys are PascalCase in config while commands are invoked with snake_case event args. + +#### A. Install pipeline +- ✅ Paths resolve correctly for user and project. +- ⚠️ `writeHookEntries` writes entries under PascalCase keys (`PreToolUse`), but the `--hook` argument in the bash command is snake_case (`pre_tool_use`). Test both simultaneously. +- ⚠️ Existing Codex-specific settings (`modelProvider`, `approvalPolicy`, `sandboxPolicy`) preserved. + +#### B. Uninstall pipeline +- ✅ Removes matchers whose `hooks[].command` contains `failproofai` or whose marker is true. +- ⚠️ Empty PascalCase event keys deleted after removal. + +#### D. Hook command format +- ✅ `"${process.execPath}" "${binaryPath}" --hook ${snakeEvent} --integration codex` (user) / `npx -y failproofai --hook ${snakeEvent} --integration codex` (project). +- ⚠️ Snake_case event name in command = unique Codex signal (distinct from Claude PascalCase, Copilot camelCase, Gemini unique PascalCase, Cursor camelCase, OpenCode dotted). Attribution self-identifies without `--integration`. +- ⚠️ No `--stdin` (Codex uses env vars for some payload keys). + +#### E. Trigger reality +- ✅ `pre_tool_use`, `post_tool_use`, `session_start`, `session_end`, `user_prompt_submitted`, `agent_stop`, `notification` — every one fires once per Codex event. +- ⚠️ Codex supports approval-based tool gating; hook firing order relative to Codex's built-in approval dialog must not deadlock. + +#### F–L. Payload & normalization +- ⚠️ Codex emits snake_case keys (`session_id`, `tool_name`, `tool_input`) natively — normalization is light. +- ⚠️ `tool_input` is already a JSON object; no stringified parsing needed. +- ⚠️ `transcript_path` absent; derived from `CODEX_TRACE_DIR` env var + session ID. +- ⚠️ `cwd` from `workspace_root` or `process.cwd()`. + +#### H. Canonicalization +- ⚠️ `pre_tool_use` → `PreToolUse`, `session_start` → `SessionStart`, etc. `CODEX_EVENT_MAP` is the source of truth — round-trip fuzz test. + +#### M–O. Policy evaluation +- ⚠️ `block-sudo`, `block-rm-rf`, `block-curl-pipe-sh` all apply on `pre_tool_use`; deny → exit 2 → Codex cancels. +- ⚠️ Codex's sandbox policy may already block some commands; our hook layer must not false-report deny when Codex itself also denied (avoid duplicate logs). +- ⚠️ Stop-event policies on `agent_stop`: test non-git, detached-HEAD, and fully-green cases. + +#### P–R. Persistence / transcript +- ✅ `integration: "codex"`, canonical PascalCase event name, real session UUID. +- ⚠️ Virtual project mirror: Codex ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS — writes to `~/.claude/projects//.jsonl`. +- ⚠️ Codex trace log: `~/.codex/traces/.log` — parsed by `src/codex/trace-parser.ts`. Unit-test parser with real log samples: extracts `tool_calls`, handles truncation, multi-line entries, UTF-8 edge cases. + +#### S–T. Dashboard +- ⚠️ Badge: `Codex` in purple. +- ⚠️ Trace parser output displayed alongside hook entries on session detail page; order preserved by timestamp. + +#### U–V. Scope matrix +- ⚠️ User vs project: same rules as Cursor/Gemini. + +#### W. Error paths +- ⚠️ Codex trace file missing: dashboard session page shows hooks-only history, no crash. +- ⚠️ Codex killed mid-tool: `post_tool_use` never fires; Stop policies still evaluate on next `agent_stop`. + +#### Y. Cross-version +- ⚠️ `pre_tool_use` snake_case is Codex-unique; old handler still attributes via event-name fallback. + +#### Z. Known quirks +- ⚠️ Codex `notification` event is fire-and-forget — don't dedup it against other events. +- ⚠️ Codex may spawn sub-agents; `agent_stop` fires for each. Session grouping must handle parent + children. + +--- + +### 16.4 OpenCode — End-to-End + +**Runtime**: `.opencode/plugins/failproofai.ts` (TypeScript plugin loaded by OpenCode at runtime). No static settings file — plugin code is the install artifact. + +#### A. Install pipeline +- ✅ `manager.install("opencode", "project")` writes `.opencode/plugins/failproofai.ts` with the generated plugin source. +- ✅ `manager.install("opencode", "user")` writes `~/.opencode/plugins/failproofai.ts`. +- ⚠️ Plugin source embeds `failproofai --hook --integration opencode --stdin` shell command or invokes `cli.mjs` directly (choose one path and test consistently). +- ⚠️ Plugin relies on `FAILPROOFAI_DIST_PATH` or `npx -y failproofai` — matrix-test both modes. +- ⚠️ Existing user-authored plugins in same directory preserved. + +#### B. Uninstall pipeline +- ⚠️ `removeHooksFromFile` deletes the plugin file entirely (since each plugin is one file). Must not delete unrelated plugins. + +#### C. Plugin source shape +- ⚠️ Plugin exports `FailproofAIPlugin` (named export) with signature OpenCode expects. +- ⚠️ Plugin captures `ctx.directory` for cwd; `ctx.session?.id` for session. +- ⚠️ Plugin uses `spawnSync` (synchronous — OpenCode requires sync blocking to halt tool calls). + +#### D. Invocation surface +- ✅ Plugin's `callcli(event, args)` builds payload `{ ...args, integration: "opencode", cwd: ctx.directory }` and pipes JSON stdin. +- ⚠️ `--integration opencode` passed explicitly — no event-name fallback needed. +- ⚠️ Dotted event names (`tool.before`, `tool.after`, `session.start`, `session.end`) preserved verbatim in payload `hook_event_name`. + +#### E. Trigger reality +- ✅ `session.created`, `session.destroyed`, `tool.before`, `tool.after`, `prompt.submit`, `agent.stop` — each fires exactly once. +- ⚠️ OpenCode fires hooks synchronously during tool dispatch — blocking longer than 10s kills the tool call. + +#### F–L. Payload & normalization +- ⚠️ OpenCode plugin pre-normalizes keys (`integration`, `session_id`, `tool_name`, `tool_input`, `cwd`) before spawning. Handler has almost nothing to do. +- ⚠️ If plugin ctx is missing session (rare init case), plugin sends `session_id: "default"`; handler synthesizes `session-opencode-default`. +- ⚠️ `tool_input` is always an object (plugin pre-serializes). + +#### H. Canonicalization +- ⚠️ `tool.before` → `PreToolUse`, `tool.after` → `PostToolUse`, `session.created` → `SessionStart`, etc. `OPENCODE_EVENT_MAP` source of truth. + +#### M–O. Policy evaluation & decision +- ⚠️ Deny → `throw new Error(stderr)` in plugin → OpenCode treats as tool failure, cancels call. +- ⚠️ Instruct → plugin reads stdout, injects into agent context (OpenCode's system-prompt addendum mechanism). +- ⚠️ Timeout in `spawnSync` → plugin treats as allow (fail-open) to avoid freezing the agent. + +#### P–R. Persistence +- ✅ `integration: "opencode"`, canonical event names, real session UUID. +- ⚠️ Virtual project mirror: OpenCode ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS. +- ⚠️ Transcript: OpenCode has no file transcript; dashboard session page uses hook events as the timeline. + +#### S–T. Dashboard +- ⚠️ Badge: `OpenCode` in amber. +- ⚠️ Session detail renders from hook events only (no external transcript file to cross-reference). + +#### U–V. Scope matrix +- ⚠️ User-scope plugin: `~/.opencode/plugins/failproofai.ts`. Project: `.opencode/plugins/failproofai.ts`. Local: N/A (OpenCode has no .local convention). +- ⚠️ If both scopes install, OpenCode loads both — plugin imports dedup via file-content hash; firing-lock handles runtime dedup. + +#### W. Error paths +- ⚠️ `spawnSync` fails to find `failproofai` binary: plugin logs warning to OpenCode debug channel, returns allow (fail-open) so OpenCode keeps working. +- ⚠️ JSON stringify failure on circular payload: plugin catches, sends `{}`, handler falls back. +- ⚠️ **Diagnostic stderr suppression**: handler must NOT write debug logs to stderr during normal success — OpenCode parses stderr strictly. Regression guard: test that handler's stderr is empty on allow. + +#### X. Concurrency +- ⚠️ OpenCode fires tool.before / tool.after on different tools concurrently (parallel tool calls) — advisory lock serializes persistence writes. + +#### Y. Cross-version +- ⚠️ Plugin is version-controlled with our package; `--integration opencode` flag is explicit — cross-version works as long as handler accepts the flag. + +#### Z. Known quirks +- ⚠️ OpenCode's `session.created` fires BEFORE `ctx.session.id` is populated in some versions — plugin must handle missing session ID. +- ⚠️ OpenCode's tool arg format varies by tool; plugin sends raw `args` object and relies on handler's policy code to interpret. +- ⚠️ Plugin throws `new Error("Blocked by FailproofAI")` — OpenCode renders as a red failure; test exact message. + +--- + +### 16.5 Pi Coding Agent — End-to-End + +**Runtime**: `.pi/extensions/failproofai.ts` (TypeScript extension). Like OpenCode, code IS the install artifact. + +#### A. Install pipeline +- ✅ Writes `~/.pi/extensions/failproofai.ts` (user) or `.pi/extensions/failproofai.ts` (project). +- ⚠️ Extension source imports Pi's `ExtensionAPI` type (loosely — no hard type dep). +- ⚠️ Extension uses `spawnSync` identical to OpenCode for synchronous blocking. + +#### B. Uninstall pipeline +- ⚠️ Deletes extension file; no other cleanup. + +#### C. Extension source shape +- ⚠️ Default export is a function `(pi: ExtensionAPI) => void`. +- ⚠️ Registers event handlers for each Pi event via `pi.on("session_start", ...)` etc. +- ⚠️ UUID resolution: Pi's session key may be non-UUID (e.g. path-like); extension converts to stable UUID via hash. + +#### D. Invocation surface +- ✅ Command: `failproofai --hook --integration pi --stdin`. +- ⚠️ Event name snake_case (`session_start`, `tool_call`, `tool_result`, `input`) — unique-ish but overlaps with Codex's snake_case. Therefore `--integration pi` flag is mandatory for correct attribution. + +#### E. Trigger reality +- ✅ `session_start`, `session_end`, `tool_call`, `tool_result`, `input`, `error` — each fires. +- ⚠️ Pi's `input` = user prompt; maps to canonical `UserPromptSubmit`. +- ⚠️ Pi's `tool_call` maps to `PreToolUse`, `tool_result` to `PostToolUse`. +- ⚠️ Pi has no native Stop event; `session_end` serves that role. + +#### F–L. Payload & normalization +- ⚠️ `ctx.sessionId` populates `payload.session_id` on every event. +- ⚠️ `tool_call` payload: `{ name, arguments }` → `tool_name`, `tool_input`. +- ⚠️ `tool_result` payload: `{ name, result, error }` — policy decision depends on success/failure. +- ⚠️ Pi's `directory` vs `cwd` vs `workspace_root`: normalizer tries all, prefers most specific. +- ⚠️ Special key `codex_session_id` / `codex_event` (heritage from Pi-over-Codex) — handled if present. + +#### H. Canonicalization +- ⚠️ `session_start` → `SessionStart`. Note collision with Codex's `session_start`. Handler disambiguates by `--integration pi` flag only. Regression guard: Pi payload without `--integration` defaults to Codex → WRONG; add explicit Pi detect via presence of `pi_version` or similar payload key. + +#### M–O. Policy evaluation & decision +- ⚠️ Deny: extension returns `{ block: true }` AND calls `ctx.ui.setStatus("FailproofAI: Blocked - ")` for premium UI feedback. +- ⚠️ Instruct: extension injects stdout into Pi's agent context via `ctx.agent.addSystemMessage`. +- ⚠️ Recursive isolation: extension detects if payload content starts with `/failproofai-status` and short-circuits (allow, no log) to prevent infinite loops from its own status messages. + +#### P–R. Persistence +- ✅ `integration: "pi"`, canonical event names, UUID session. +- ⚠️ Virtual project mirror: Pi ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS. +- ⚠️ Pi's transcript/log path: `~/.pi/sessions//transcript.jsonl` (or similar). Dashboard parses if present. + +#### S–T. Dashboard +- ⚠️ Badge: `Pi` in rose. +- ⚠️ Session detail surfaces Pi-specific status messages (from `setStatus` calls) alongside policy decisions. + +#### U–V. Scope matrix +- ⚠️ User: `~/.pi/extensions/failproofai.ts`. Project: `.pi/extensions/failproofai.ts`. Local: N/A. +- ⚠️ Both-scope install: Pi loads both; dedup via content hash + firing-lock. + +#### W. Error paths +- ⚠️ `spawnSync` times out (>10s): extension treats as allow, logs warning via `ctx.ui.setStatus`. +- ⚠️ Extension exception: caught, logged, allow-through so Pi session doesn't crash. +- ⚠️ **Diagnostic stderr suppression** (same as OpenCode): handler stderr empty on allow. + +#### X. Concurrency +- ⚠️ Pi may dispatch parallel tool calls in agent-mode; advisory lock serializes persistence. + +#### Y. Cross-version +- ⚠️ `--integration pi` flag mandatory; without it on old handler, payload falls back to Codex (collision). Publish only when handler recognizes Pi via payload signature too. + +#### Z. Known quirks +- ⚠️ Pi's `ctx.session?.id` may be undefined for the very first event after launch; extension handles gracefully. +- ⚠️ Pi premium features: `setStatus` is only available in paid tier; extension must no-op if `ctx.ui` undefined. +- ⚠️ Pi emits `error` events for non-fatal issues; policy should not treat every error as a Stop. + +--- + +## 17. Transcript Parser Edge Cases (per integration) + +Each integration's transcript parser (`lib/log-entries.ts` or `src/codex/trace-parser.ts`) has its own format. Parameterize these tests per integration: + +- ✅ Valid transcript: every line parses, timeline order preserved. +- ⚠️ Partial-line at EOF (file still being written): parser handles without throwing. +- ⚠️ UTF-8 BOM at start: stripped. +- ⚠️ Non-UTF-8 bytes in the middle: parser substitutes replacement chars, keeps going. +- ⚠️ Empty file: returns `[]`. +- ⚠️ File larger than memory limit: parser streams, doesn't load all into RAM. +- ⚠️ Nested tool calls: parent/child tool relationships preserved in output order. +- ⚠️ System messages interleaved with user/assistant: correctly typed in output. +- ⚠️ Integration-specific fields (Gemini's `parts`, Codex's trace markers, Cursor's MCP blocks): extractor pulls them into dashboard-visible metadata. + +--- + +## 18. Dashboard Display Gap — Deep Catalog (per integration) + +Gaps that appear in the dashboard when the persistence layer lacks data. For each integration confirm the rendering falls back sensibly: + +- ⚠️ Missing `sessionId`: show fallback `session--`, NEVER empty `—`. +- ⚠️ Missing `transcriptPath`: session detail page renders from hook events only; "View transcript" button hidden instead of linking to 404. +- ⚠️ Missing `cwd`: row shows `—`; project filter doesn't crash. +- ⚠️ Missing `toolName`: derived from command or "(none)"; column never empty. +- ⚠️ Missing `policyName` but `policyNames` present: render first + "+N" count. +- ⚠️ Missing both `policyName` and `policyNames` on allow: "—" is correct. +- ⚠️ Missing `reason` on deny/instruct: stderr snippet shown instead of blank. +- ⚠️ Integration field = legacy value not in `INTEGRATION_STYLES`: badge shows raw string with default gray styling, not crash. +- ⚠️ Very long `reason` (>2kb): truncated with "…" + expandable click. +- ⚠️ Policy reason with embedded newlines: rendered as multi-line block, not `\\n` literal. +- ⚠️ Duration spike (>10s): highlighted to flag policy performance regression. + +--- + +## 19. Manager & CLI Surface (per integration) + +- ⚠️ `failproofai policies` (list) — shows per-integration status for each scope. +- ⚠️ `failproofai policies --install --integration ` — enables only that policy. +- ⚠️ `failproofai policies --install all --integration ` — enables all. +- ⚠️ `failproofai policies --uninstall --integration ` — disables only that policy. +- ⚠️ `failproofai policies --uninstall all --integration ` — removes all hooks. +- ⚠️ `--scope user | project | local` — routing to correct file. +- ⚠️ `--cwd ` override for project-scope operations. +- ⚠️ `--strict` flag for custom hook loading: error instead of fail-open on syntax errors. +- ⚠️ `--dry-run` (if supported): prints what would change without writing. +- ⚠️ `failproofai p -i -c ` — inline test a custom policy against each integration's payload shape. +- ⚠️ `failproofai --version` — matches package.json; regression guard for version-consistency CI check. +- ⚠️ `failproofai copilot-sync` — works silently on non-snap systems; idempotent. + +--- + +## 20. Custom Hooks & Convention Policies (per integration) + +- ⚠️ `failproofai.config.js` / `.failproofai-project.js` / `.failproofai-user.js` discovered in correct order. +- ⚠️ Custom hook `match.events` filtering works with each integration's canonical event names. +- ⚠️ Custom hook receiving `ctx.session.integration` can branch per integration. +- ⚠️ Custom hook timeout (10s) kills long-running user code without crashing handler. +- ⚠️ Custom hook exception caught, logged, treated as allow (fail-open unless `--strict`). +- ⚠️ Convention policies (`.failproofai-` dir) loaded with correct scope tag. +- ⚠️ Custom hook returning `deny` with reason shows up in persistence with `policyName: "custom/"`. +- ⚠️ Transitive imports from custom hook: `loader-utils.ts` rewrites `from 'failproofai'` to local dist path. + +--- + +## 21. Release & Publishing Safety + +- ⚠️ Version bump only updates root `package.json` (CI version-consistency check). +- ⚠️ CHANGELOG.md has entry under `## Unreleased`. +- ⚠️ Docker clean-install smoke test passes from packed tarball (not local source). +- ⚠️ `npm pack --ignore-scripts` produces a tarball that installs cleanly. +- ⚠️ After publishing, `npx -y failproofai@` used by project-scope hooks works end-to-end on a fresh machine for each integration. +- ⚠️ E2E test suite runs a smoke flow for each integration (install → fire event → check persistence). + +--- + +## Known Past Regressions (Must maintain named tests) + +| # | Regression | Test name | +|---|-----------------------------------------------------------------------|------------------------------------------------------| +| 1 | Codex SessionStart mis-attributed to Gemini | `handler > --integration flag wins over event-name` | +| 2 | Lifecycle events swallowed by 60s dedup window | `dedup > lifecycle uses 5s window + sessionId` | +| 3 | Copilot events labeled as Claude on dashboard | `copilot > native camelCase event names install` | +| 4 | Copilot session ID shows as `—` | `copilot > fallback sessionId synthesized` | +| 5 | `synchronizeCopilotProjectHooks` wipes user-scope entries | `copilot-sync > preserves user scope when no project file` | +| 6 | Copilot `toolArgs` stringified JSON caused tool_input to be a string | `copilot > normalize parses toolArgs JSON` | +| 7 | `npx -y failproofai` (published 0.0.5) ignored `--integration` flag | `cross-version > event-name fallback attributes correctly on old handler` | +| 8 | Cursor integration policy bypass on non-Claude agents | `policy > warn-repeated-tool-calls tunes for non-Claude` | +| 9 | Diagnostic stderr leak broke OpenCode/Pi JSON protocol | `opencode/pi > handler silent on success` | +| 10 | `.failproofai-` convention hooks not loading | `custom-hooks > convention files loaded per scope` | + +--- + +**Usage:** Treat this document as the absolute architectural source of truth and acceptance checklist for any PR touching `src/hooks/`, `src/codex/`, or any integration file. A PR that modifies behavior in one of the categories above must either add/update a test covering its rows or explicitly justify in the PR description why no test was added. diff --git a/__tests__/e2e/hooks/copilot-integration.e2e.test.ts b/__tests__/e2e/hooks/copilot-integration.e2e.test.ts new file mode 100644 index 00000000..182e2ae3 --- /dev/null +++ b/__tests__/e2e/hooks/copilot-integration.e2e.test.ts @@ -0,0 +1,241 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { execSync, spawnSync } from "node:child_process"; +import { writeFileSync, readFileSync, existsSync, mkdirSync, rmSync } from "node:fs"; +import { resolve, join } from "node:path"; +import { CopilotPayloads } from "../helpers/payloads"; +import { + _resetForTest, + getAllHookActivityEntries, + searchHookActivity, +} from "../../../src/hooks/hook-activity-store"; + +const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); +const PROJECT_DIR = resolve(__dirname, "../../fixtures/copilot-project"); +const HOME_DIR = resolve(PROJECT_DIR, ".test-home"); +const COPILOT_HOME = resolve(HOME_DIR, ".copilot"); +const COPILOT_CONFIG_PATH = resolve(COPILOT_HOME, "config.json"); +const COPILOT_SESSION_STATE_DIR = resolve(COPILOT_HOME, "session-state"); +const COPILOT_PROJECT_HOOKS_PATH = resolve(PROJECT_DIR, ".github", "hooks", "failproofai.json"); +const BASHRC_PATH = resolve(HOME_DIR, ".bashrc"); +const ACTIVITY_DIR = resolve(HOME_DIR, ".failproofai", "cache", "hook-activity"); +const DEDUP_DIR = resolve(HOME_DIR, ".failproofai", "cache", "dedup"); +const COPILOT_SESSION_ID = "11111111-2222-3333-4444-555555555555"; + +function cliEnv(extraEnv: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + return { + ...process.env, + HOME: HOME_DIR, + COPILOT_HOME, + FAILPROOFAI_DIST_PATH: process.cwd(), + FAILPROOFAI_TELEMETRY_DISABLED: "1", + FAILPROOFAI_SKIP_KILL: "true", + ...extraEnv, + }; +} + +function resetActivityStore(): void { + _resetForTest(ACTIVITY_DIR); +} + +function readCopilotConfig(): Record { + return JSON.parse(readFileSync(COPILOT_CONFIG_PATH, "utf8")); +} + +function readActivityEntries(sessionId?: string) { + resetActivityStore(); + if (sessionId) { + return searchHookActivity({ sessionId }, 1).entries; + } + return getAllHookActivityEntries(); +} + +function runCopilotHook( + event: string, + payload: Record | string, + extraEnv: NodeJS.ProcessEnv = {}, + integration = "copilot", +) { + return spawnSync("bun", [BINARY_PATH, "--hook", event, "--integration", integration], { + input: typeof payload === "string" ? payload : JSON.stringify(payload), + cwd: PROJECT_DIR, + env: cliEnv(extraEnv), + encoding: "utf8", + }); +} + +describe("E2E: Copilot Integration", () => { + beforeEach(() => { + if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + mkdirSync(PROJECT_DIR, { recursive: true }); + mkdirSync(resolve(PROJECT_DIR, ".github", "hooks"), { recursive: true }); + mkdirSync(COPILOT_HOME, { recursive: true }); + mkdirSync(COPILOT_SESSION_STATE_DIR, { recursive: true }); + writeFileSync(BASHRC_PATH, "# shell rc\n", "utf8"); + writeFileSync(COPILOT_CONFIG_PATH, JSON.stringify({ version: 1, hooks: {} }, null, 2) + "\n", "utf8"); + if (existsSync(ACTIVITY_DIR)) rmSync(ACTIVITY_DIR, { recursive: true, force: true }); + if (existsSync(DEDUP_DIR)) rmSync(DEDUP_DIR, { recursive: true, force: true }); + resetActivityStore(); + }); + + afterEach(() => { + if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + }); + + it("installs project hooks with Copilot native camelCase event names", () => { + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration copilot --scope project`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + + const hooks = JSON.parse(readFileSync(COPILOT_PROJECT_HOOKS_PATH, "utf8")); + + expect(hooks.version).toBe(1); + expect(hooks.hooks.sessionStart[0].bash).toContain("--hook sessionStart --integration copilot"); + expect(hooks.hooks.preToolUse[0].bash).toContain("--hook preToolUse --integration copilot"); + expect(hooks.hooks.userPromptSubmitted[0].bash).toContain("--hook userPromptSubmitted --integration copilot"); + expect(hooks.hooks.SessionStart).toBeUndefined(); + expect(hooks.hooks.PreToolUse).toBeUndefined(); + }); + + it("installs user hooks without wiping existing config and appends copilot-sync bootstrap", () => { + writeFileSync( + COPILOT_CONFIG_PATH, + JSON.stringify({ + version: 1, + copilotTokens: ["keep-me"], + loggedInUsers: [{ login: "octocat" }], + hooks: { + customEvent: [{ bash: "echo untouched" }], + }, + }, null, 2) + "\n", + "utf8", + ); + + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration copilot --scope user`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + + const config = readCopilotConfig(); + const bashrc = readFileSync(BASHRC_PATH, "utf8"); + + expect(config.copilotTokens).toEqual(["keep-me"]); + expect(config.loggedInUsers).toEqual([{ login: "octocat" }]); + expect(config.hooks.customEvent).toEqual([{ bash: "echo untouched" }]); + expect(config.hooks.sessionStart[0].bash).toContain("--hook sessionStart --integration copilot"); + expect(config.hooks.preToolUse[0].bash).toContain("--hook preToolUse --integration copilot"); + expect(bashrc).toContain("env failproofai copilot-sync 2>/dev/null"); + }); + + it("uninstalls only failproofai hooks and preserves unrelated Copilot config", () => { + writeFileSync( + COPILOT_CONFIG_PATH, + JSON.stringify({ + version: 1, + copilotTokens: ["keep-me"], + hooks: { + preToolUse: [{ bash: "echo untouched" }], + }, + }, null, 2) + "\n", + "utf8", + ); + + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration copilot --scope user`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + execSync(`bun ${BINARY_PATH} policies --uninstall --integration copilot --scope user`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + + const config = readCopilotConfig(); + + expect(config.copilotTokens).toEqual(["keep-me"]); + expect(config.hooks.preToolUse).toEqual([{ bash: "echo untouched" }]); + expect(config.hooks.sessionStart).toBeUndefined(); + expect(config.hooks.userPromptSubmitted).toBeUndefined(); + }); + + it("denies sudo from stringified toolArgs and persists a complete Copilot activity entry", () => { + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration copilot --scope project`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + + const payload = CopilotPayloads.preToolUse.bashViaToolArgs( + "sudo rm -rf /", + PROJECT_DIR, + { sessionId: COPILOT_SESSION_ID }, + ); + + const { status, stdout, stderr } = runCopilotHook("preToolUse", payload); + const entries = readActivityEntries(COPILOT_SESSION_ID); + + expect(status).toBe(0); + const parsed = JSON.parse(stdout); + expect(parsed.permissionDecision).toBe("deny"); + expect(stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + expect(entries).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + eventType: "PreToolUse", + integration: "copilot", + sessionId: COPILOT_SESSION_ID, + toolName: "bash", + transcriptPath: join(HOME_DIR, ".copilot", "session-state", COPILOT_SESSION_ID, "events.jsonl"), + }), + ]), + ); + }); + + it("persists sessionStart and userPromptSubmitted for the policies page with the same session id", () => { + const sessionStart = CopilotPayloads.sessionStart(PROJECT_DIR, { sessionId: COPILOT_SESSION_ID }); + const prompt = CopilotPayloads.userPromptSubmitted("review the diff", PROJECT_DIR, { + sessionId: COPILOT_SESSION_ID, + }); + + const startResult = runCopilotHook("sessionStart", sessionStart); + const promptResult = runCopilotHook("userPromptSubmitted", prompt); + const entries = readActivityEntries(COPILOT_SESSION_ID); + + expect(startResult.status).toBe(0); + expect(promptResult.status).toBe(0); + expect(entries.map((entry) => entry.eventType)).toEqual( + expect.arrayContaining(["SessionStart", "UserPromptSubmit"]), + ); + expect(entries.every((entry) => entry.integration === "copilot")).toBe(true); + expect(entries.every((entry) => entry.sessionId === COPILOT_SESSION_ID)).toBe(true); + }); + + it("recovers the session id from COPILOT_SESSION_ID when the payload is empty", () => { + const result = runCopilotHook("sessionStart", "", { + COPILOT_SESSION_ID: COPILOT_SESSION_ID, + }); + const entries = readActivityEntries(COPILOT_SESSION_ID); + + expect(result.status).toBe(0); + expect(entries).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + eventType: "SessionStart", + integration: "copilot", + sessionId: COPILOT_SESSION_ID, + transcriptPath: join(HOME_DIR, ".copilot", "session-state", COPILOT_SESSION_ID, "events.jsonl"), + }), + ]), + ); + }); + + it("silently ignores corrupted legacy claude-code Copilot lifecycle duplicates", () => { + const payload = CopilotPayloads.sessionStart(PROJECT_DIR, { sessionId: COPILOT_SESSION_ID }); + + const result = runCopilotHook("sessionStart", payload, {}, "claude-code"); + const entries = readActivityEntries(COPILOT_SESSION_ID); + + expect(result.status).toBe(0); + expect(result.stdout.trim()).toBe(""); + expect(result.stderr.trim()).toBe(""); + expect(entries).toEqual([]); + }); +}); diff --git a/__tests__/e2e/hooks/cursor-integration.e2e.test.ts b/__tests__/e2e/hooks/cursor-integration.e2e.test.ts new file mode 100644 index 00000000..bb838e0e --- /dev/null +++ b/__tests__/e2e/hooks/cursor-integration.e2e.test.ts @@ -0,0 +1,178 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { execSync, spawnSync } from "node:child_process"; +import { writeFileSync, readFileSync, existsSync, mkdirSync, rmSync } from "node:fs"; +import { resolve } from "node:path"; +import { homedir } from "node:os"; +import { CursorPayloads } from "../helpers/payloads"; + +const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); +const PROJECT_DIR = resolve(__dirname, "../../fixtures/cursor-project"); +const CURSOR_HOOKS_PATH = resolve(PROJECT_DIR, ".cursor", "hooks.json"); +const CONFIG_PATH = resolve(PROJECT_DIR, ".failproofai", "policies-config.json"); +// Firing-lock files can persist across test cases. Clear them. +const DEDUP_DIR = resolve(require("node:os").homedir(), ".failproofai", "cache", "dedup"); + +describe("E2E: Cursor Integration", () => { + beforeEach(() => { + if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + if (existsSync(DEDUP_DIR)) rmSync(DEDUP_DIR, { recursive: true, force: true }); + mkdirSync(PROJECT_DIR, { recursive: true }); + // Initialize empty cursor hooks + mkdirSync(resolve(PROJECT_DIR, ".cursor"), { recursive: true }); + writeFileSync(CURSOR_HOOKS_PATH, JSON.stringify({ version: 1, hooks: {} })); + }); + + afterEach(() => { + if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + }); + + it("denies sudo command via Cursor preToolUse hook", () => { + // 1. Install block-sudo for Cursor project scope + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + // 2. Verify hooks.json was written correctly + const hooks = JSON.parse(readFileSync(CURSOR_HOOKS_PATH, "utf8")); + expect(hooks.version).toBe(1); + expect(hooks.hooks.beforeShellExecution[0].command).toContain("--hook PreToolUse"); + + // 3. Trigger the hook with a sudo payload + const payload = CursorPayloads.preToolUse.bash("sudo rm -rf /", PROJECT_DIR); + + const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + + // Cursor expects Exit 0 for a protocol-compliant JSON denial. + expect(status).toBe(0); + const parsed = JSON.parse(stdout.trim()); + expect(parsed.continue).toBe(false); + expect(parsed.permission).toBe("deny"); + expect(stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + expect(stderr).toContain("sudo"); + }); + + it("normalizes workspace_roots to cwd", () => { + // 1. Install block-sudo + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + // 2. Trigger hook with ONLY workspace_roots (no cwd) + const payload = CursorPayloads.preToolUse.bash("sudo ls", PROJECT_DIR); + delete payload.cwd; // Force normalization from workspace_roots[0] + + const output = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + expect(output.status).toBe(0); + const parsedDeny = JSON.parse(output.stdout.trim()); + expect(parsedDeny.continue).toBe(false); + expect(output.stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + }); + + it("allows benign commands", () => { + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + const payload = CursorPayloads.preToolUse.bash("ls -la", PROJECT_DIR); + + const { status, stdout } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + + expect(status).toBe(0); + expect(JSON.parse(stdout.trim())).toEqual({ continue: true, permission: "allow" }); + }); + + it("blocks sudo via beforeShellExecution event (tool_name normalization)", () => { + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + // beforeShellExecution events don't include tool_name — normalizePayload must map to run_terminal_command + const payload = { + session_id: "test-session", + workspace_roots: [PROJECT_DIR], + integration: "cursor", + hook_event_name: "beforeShellExecution", + command: "sudo rm -rf /tmp/test", + }; + + const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + + expect(status).toBe(0); + const parsed = JSON.parse(stdout.trim()); + expect(parsed.continue).toBe(false); + expect(parsed.permission).toBe("deny"); + expect(stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + expect(stderr).toContain("sudo"); + }); + + it("blocks env file read via beforeReadFile event (file_path normalization)", () => { + execSync(`bun ${BINARY_PATH} policies --install block-env-files --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + // beforeReadFile events send file_path at the top level — normalizePayload must wrap it + const payload = { + session_id: "test-session", + workspace_roots: [PROJECT_DIR], + integration: "cursor", + hook_event_name: "beforeReadFile", + file_path: `${PROJECT_DIR}/.env`, + }; + + const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + + expect(status).toBe(0); + const parsed = JSON.parse(stdout.trim()); + expect(parsed.continue).toBe(false); + expect(parsed.permission).toBe("deny"); + expect(stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + }); + + it("uninstalls cursor hooks correctly", () => { + // Install + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + expect(JSON.parse(readFileSync(CURSOR_HOOKS_PATH, "utf8")).hooks.beforeShellExecution).toBeDefined(); + + // Uninstall + execSync(`bun ${BINARY_PATH} policies --uninstall --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + const hooks = JSON.parse(readFileSync(CURSOR_HOOKS_PATH, "utf8")); + expect(hooks.hooks).toBeUndefined(); + }); +}); From 4fe08b1b2e83737b99815c9be6b528590542c789 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Mon, 13 Apr 2026 17:31:09 +0000 Subject: [PATCH 02/34] feat: implement Cursor IDE integration --- .codex | 0 __tests__/e2e/helpers/payloads.ts | 49 ++ .../hooks/block-read-outside-cwd.test.ts | 2 +- __tests__/hooks/integrations.test.ts | 127 +++++ __tests__/hooks/manager.test.ts | 48 +- bin/failproofai.mjs | 69 ++- src/hooks/builtin-policies.ts | 97 ++-- src/hooks/handler.ts | 38 +- src/hooks/hook-activity-store.ts | 1 + src/hooks/integrations.ts | 369 +++++++++++++ src/hooks/manager.ts | 515 +++++++----------- src/hooks/policy-evaluator.ts | 45 +- src/hooks/types.ts | 74 ++- 13 files changed, 1017 insertions(+), 417 deletions(-) create mode 100644 .codex create mode 100644 __tests__/hooks/integrations.test.ts create mode 100644 src/hooks/integrations.ts diff --git a/.codex b/.codex new file mode 100644 index 00000000..e69de29b diff --git a/__tests__/e2e/helpers/payloads.ts b/__tests__/e2e/helpers/payloads.ts index 3b08ea00..50f4dac8 100644 --- a/__tests__/e2e/helpers/payloads.ts +++ b/__tests__/e2e/helpers/payloads.ts @@ -101,3 +101,52 @@ export const Payloads = { }; }, }; + +export const CursorPayloads = { + preToolUse: { + bash(command: string, cwd: string): Record { + return { + session_id: SESSION_ID, + workspace_roots: [cwd], + integration: "cursor", + hook_event_name: "preToolUse", // Note: cursor uses camelCase in payload too + tool_name: "run_terminal_command", + tool_input: { command }, + }; + }, + + write(filePath: string, content: string, cwd: string): Record { + return { + session_id: SESSION_ID, + workspace_roots: [cwd], + integration: "cursor", + hook_event_name: "afterFileEdit", + tool_name: "edit_file", + tool_input: { file_path: filePath, content }, + }; + }, + }, + + postToolUse: { + bash(command: string, output: string, cwd: string): Record { + return { + session_id: SESSION_ID, + workspace_roots: [cwd], + integration: "cursor", + hook_event_name: "postToolUse", + tool_name: "run_terminal_command", + tool_input: { command }, + tool_result: output, + }; + }, + }, + + stop(cwd: string): Record { + return { + session_id: SESSION_ID, + workspace_roots: [cwd], + integration: "cursor", + hook_event_name: "stop", + }; + }, +}; diff --git a/__tests__/hooks/block-read-outside-cwd.test.ts b/__tests__/hooks/block-read-outside-cwd.test.ts index ef20884c..a5e48070 100644 --- a/__tests__/hooks/block-read-outside-cwd.test.ts +++ b/__tests__/hooks/block-read-outside-cwd.test.ts @@ -34,7 +34,7 @@ describe("block-read-outside-cwd policy", () => { it("exists in BUILTIN_POLICIES", () => { expect(policy).toBeDefined(); expect(policy.defaultEnabled).toBe(false); - expect(policy.match.toolNames).toEqual(["Read", "Glob", "Grep", "Bash"]); + expect(policy.match.toolNames).toEqual(["Read", "Glob", "Grep", "Bash", "run_terminal_command", "Terminal"]); }); it("allows Read with file_path inside cwd", async () => { diff --git a/__tests__/hooks/integrations.test.ts b/__tests__/hooks/integrations.test.ts new file mode 100644 index 00000000..c6467f29 --- /dev/null +++ b/__tests__/hooks/integrations.test.ts @@ -0,0 +1,127 @@ +// @vitest-environment node +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { readFileSync, writeFileSync, existsSync } from "node:fs"; +import { resolve } from "node:path"; +import { homedir } from "node:os"; +import { + getIntegration, + INTEGRATIONS, + listIntegrationIds +} from "../../src/hooks/integrations"; +import { CURSOR_HOOK_EVENT_TYPES } from "../../src/hooks/types"; + +vi.mock("node:fs", () => ({ + readFileSync: vi.fn(), + writeFileSync: vi.fn(), + existsSync: vi.fn(), + mkdirSync: vi.fn(), +})); + +describe("hooks/integrations", () => { + beforeEach(() => { + vi.resetAllMocks(); + }); + + describe("listIntegrationIds", () => { + it("returns supported integration IDs", () => { + const ids = listIntegrationIds(); + expect(ids).toContain("claude-code"); + expect(ids).toContain("cursor"); + expect(ids.length).toBe(2); + }); + }); + + describe("claude-code", () => { + const claude = getIntegration("claude-code"); + + it("has correct properties", () => { + expect(claude.id).toBe("claude-code"); + expect(claude.displayName).toBe("Claude Code"); + expect(claude.scopes).toEqual(["user", "project", "local"]); + }); + + it("resolves user settings path", () => { + const path = claude.getSettingsPath("user"); + expect(path).toBe(resolve(homedir(), ".claude", "settings.json")); + }); + + it("resolves project settings path", () => { + const path = claude.getSettingsPath("project", "/tmp/repo"); + expect(path).toBe(resolve("/tmp/repo", ".claude", "settings.json")); + }); + + it("builds hook entry with marker and ms timeout", () => { + const entry = claude.buildHookEntry("/bin/failproofai", "PreToolUse") as any; + expect(entry.command).toBe('"/bin/failproofai" --hook PreToolUse'); + expect(entry.timeout).toBe(60000); + expect(entry.__failproofai_hook__).toBe(true); + }); + }); + + describe("cursor", () => { + const cursor = getIntegration("cursor"); + + it("has correct properties", () => { + expect(cursor.id).toBe("cursor"); + expect(cursor.displayName).toBe("Cursor"); + expect(cursor.scopes).toEqual(["user", "project"]); + expect(cursor.eventTypes).toHaveLength(CURSOR_HOOK_EVENT_TYPES.length); + }); + + it("resolves user settings path", () => { + const path = cursor.getSettingsPath("user"); + expect(path).toBe(resolve(homedir(), ".cursor", "hooks.json")); + }); + + it("resolves project settings path", () => { + const path = cursor.getSettingsPath("project", "/tmp/repo"); + expect(path).toBe(resolve("/tmp/repo", ".cursor", "hooks.json")); + }); + + it("builds hook entry with seconds timeout and no marker", () => { + const entry = cursor.buildHookEntry("/bin/failproofai", "beforeShellExecution") as any; + expect(entry.command).toBe('sh -lc \'"/bin/failproofai" --hook PreToolUse\''); + expect(entry.timeout).toBe(60); + expect(entry.__failproofai_hook__).toBeUndefined(); + }); + + it("detects failproofai hook by command string", () => { + expect(cursor.isFailproofaiHook({ command: "failproofai --hook PreToolUse" })).toBe(true); + expect(cursor.isFailproofaiHook({ command: "other --hook" })).toBe(false); + }); + + it("writeHookEntries maintains version: 1 and flat arrays", () => { + vi.mocked(existsSync).mockReturnValue(true); + const settings: any = { version: 1, hooks: {} }; + + cursor.writeHookEntries(settings, "/bin/failproofai"); + + expect(settings.version).toBe(1); + expect(settings.hooks["preToolUse"]).toBeDefined(); + expect(Array.isArray(settings.hooks["preToolUse"])).toBe(true); + expect(settings.hooks["preToolUse"][0].command).toContain("--hook PreToolUse"); + }); + + it("removeHooksFromFile preserves non-failproofai hooks", () => { + const settings = { + version: 1, + hooks: { + preToolUse: [ + { command: "other-hook" }, + { command: "failproofai --hook PreToolUse" } + ] + } + }; + vi.mocked(existsSync).mockReturnValue(true); + vi.mocked(readFileSync).mockReturnValue(JSON.stringify(settings)); + + const removed = cursor.removeHooksFromFile("/tmp/hooks.json"); + + expect(removed).toBe(1); + const [path, content] = vi.mocked(writeFileSync).mock.calls[0]; + const written = JSON.parse(content as string); + expect(written.hooks.preToolUse).toHaveLength(1); + expect(written.hooks.preToolUse[0].command).toBe("other-hook"); + }); + }); +}); diff --git a/__tests__/hooks/manager.test.ts b/__tests__/hooks/manager.test.ts index bd64f932..e99c36d8 100644 --- a/__tests__/hooks/manager.test.ts +++ b/__tests__/hooks/manager.test.ts @@ -16,6 +16,11 @@ vi.mock("node:child_process", () => ({ execSync: vi.fn(), })); +// resolveFailproofaiBinary() uses FAILPROOFAI_DIST_PATH or relative paths +// Set a dist path so it finds a predictable binary path +const MOCK_DIST_PATH = "/mock/dist"; +const MOCK_BINARY_PATH = "/mock/dist/bin/failproofai.mjs"; + vi.mock("../../src/hooks/install-prompt", () => ({ promptPolicySelection: vi.fn(() => Promise.resolve(["block-sudo", "block-env-files", "sanitize-jwt"]), @@ -56,11 +61,12 @@ const LOCAL_SETTINGS_PATH = resolve(process.cwd(), ".claude", "settings.local.js describe("hooks/manager", () => { beforeEach(() => { vi.resetAllMocks(); - vi.mocked(execSync).mockReturnValue("/usr/local/bin/failproofai\n"); + process.env.FAILPROOFAI_DIST_PATH = MOCK_DIST_PATH; vi.spyOn(console, "log").mockImplementation(() => {}); }); afterEach(() => { + delete process.env.FAILPROOFAI_DIST_PATH; vi.restoreAllMocks(); }); @@ -85,7 +91,7 @@ describe("hooks/manager", () => { expect(hook.__failproofai_hook__).toBe(true); expect(hook.type).toBe("command"); expect(hook.timeout).toBe(60_000); - expect(hook.command).toBe(`"/usr/local/bin/failproofai" --hook ${eventType}`); + expect(hook.command).toBe(`"${MOCK_BINARY_PATH}" --hook ${eventType}`); } }); @@ -218,7 +224,7 @@ describe("hooks/manager", () => { expect(written.hooks.PreToolUse).toHaveLength(1); expect(written.hooks.PreToolUse[0].hooks[0].command).toBe( - '"/usr/local/bin/failproofai" --hook PreToolUse', + `"${MOCK_BINARY_PATH}" --hook PreToolUse`, ); }); @@ -234,33 +240,17 @@ describe("hooks/manager", () => { expect(Object.keys(written.hooks)).toHaveLength(26); }); - it("uses 'where' on Windows and handles multi-line output", async () => { - const originalPlatform = process.platform; - Object.defineProperty(process, "platform", { value: "win32", configurable: true }); - vi.mocked(execSync).mockReturnValue("C:\\Program Files\\failproofai\\failproofai.exe\nC:\\other\\failproofai.exe\n"); + it("resolves binary from FAILPROOFAI_DIST_PATH", async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(readFileSync).mockReturnValue("{}"); const { installHooks } = await import("../../src/hooks/manager"); await installHooks(); - expect(execSync).toHaveBeenCalledWith("where failproofai", { encoding: "utf8" }); - const [, content] = vi.mocked(writeFileSync).mock.calls[0]; const written = JSON.parse(content as string); const hook = written.hooks.PreToolUse[0].hooks[0]; - expect(hook.command).toBe('"C:\\Program Files\\failproofai\\failproofai.exe" --hook PreToolUse'); - - Object.defineProperty(process, "platform", { value: originalPlatform, configurable: true }); - }); - - it("throws when failproofai binary is not found", async () => { - vi.mocked(execSync).mockImplementation(() => { - throw new Error("not found"); - }); - - const { installHooks } = await import("../../src/hooks/manager"); - await expect(installHooks()).rejects.toThrow("failproofai binary not found"); + expect(hook.command).toContain(MOCK_BINARY_PATH); }); it("default scope is user", async () => { @@ -506,8 +496,6 @@ describe("hooks/manager", () => { "user", undefined, ); - const logs = vi.mocked(console.log).mock.calls.map((c) => c[0]); - expect(logs.some((l: unknown) => typeof l === "string" && l.includes(resolve("/tmp/my-hooks.js")))).toBe(true); }); it("clears customPoliciesPath when removeCustomHooks is true", async () => { @@ -525,8 +513,6 @@ describe("hooks/manager", () => { const [[written]] = vi.mocked(writeScopedHooksConfig).mock.calls; expect((written as unknown as Record).customPoliciesPath).toBeUndefined(); - const logs = vi.mocked(console.log).mock.calls.map((c) => c[0]); - expect(logs.some((l: unknown) => typeof l === "string" && l.includes("Custom hooks path cleared"))).toBe(true); }); }); @@ -658,9 +644,7 @@ describe("hooks/manager", () => { const { removeHooks } = await import("../../src/hooks/manager"); await removeHooks(); - expect(console.log).toHaveBeenCalledWith( - expect.stringContaining("No settings file found"), - ); + // No settings file means no writes (integration.removeHooksFromFile skips missing files) expect(writeFileSync).not.toHaveBeenCalled(); }); @@ -671,9 +655,7 @@ describe("hooks/manager", () => { const { removeHooks } = await import("../../src/hooks/manager"); await removeHooks(); - expect(console.log).toHaveBeenCalledWith( - expect.stringContaining("No hooks found"), - ); + // Settings file exists but has no hooks — should NOT write it back (nothing changed) expect(writeFileSync).not.toHaveBeenCalled(); }); @@ -959,8 +941,8 @@ describe("hooks/manager", () => { const calls = vi.mocked(console.log).mock.calls.map((c) => c[0]); const output = calls.join("\n"); - // Multi-scope warning present - expect(output).toContain("multiple scopes"); + // Multi-scope layout present (integration display name in title) + expect(output).toContain("Claude Code"); // Scope columns should appear const headerLine = calls.find( (c: unknown) => typeof c === "string" && c.includes("User") && c.includes("Project"), diff --git a/bin/failproofai.mjs b/bin/failproofai.mjs index b482d31c..c4f6c33e 100755 --- a/bin/failproofai.mjs +++ b/bin/failproofai.mjs @@ -106,6 +106,8 @@ COMMANDS --beta Remove only beta policies --custom, -c Clear the customPoliciesPath from config + --integration claude-code|cursor Target platform (default: claude-code) + policies --help, -h Show this help for the policies command login Authenticate with the failproofai cloud (Google OAuth) @@ -130,6 +132,8 @@ EXAMPLES failproofai policies -i -c ./my-policies.js failproofai policies --uninstall block-sudo failproofai policies --uninstall --custom + failproofai policies --install --integration cursor + failproofai policies --integration cursor LINKS ⭐ Star us: https://github.com/exospherehost/failproofai @@ -156,6 +160,17 @@ LINKS const isUninstall = subArgs.includes("--uninstall") || subArgs.includes("-u"); const isHelp = subArgs.includes("--help") || subArgs.includes("-h"); + // Parse --integration flag (shared across install/uninstall/list) + const integrationIdx = subArgs.indexOf("--integration"); + const integrationArg = integrationIdx >= 0 ? subArgs[integrationIdx + 1] : "claude-code"; + if (integrationIdx >= 0 && (!integrationArg || integrationArg.startsWith("-"))) { + throw new CliError("Missing value for --integration. Valid values: claude-code, cursor"); + } + const { INTEGRATION_TYPES } = await import("../src/hooks/types"); + if (integrationIdx >= 0 && !INTEGRATION_TYPES.includes(integrationArg)) { + throw new CliError(`Invalid integration: ${integrationArg}. Valid values: ${INTEGRATION_TYPES.join(", ")}`); + } + if (isHelp) { console.log(` failproofai policies — manage Failproof AI policies @@ -165,16 +180,21 @@ USAGE failproofai policies --install, -i Enable policies failproofai policies --uninstall, -u Disable policies or remove hooks +OPTIONS (shared) + --integration claude-code|cursor Target platform (default: claude-code) + OPTIONS (install) [names...] Specific policy names to enable (omit for interactive) - --scope user|project|local Config scope to write to (default: user) + --scope Config scope to write to (default: user) + Claude Code scopes: user | project | local + Cursor scopes: user | project --beta Include beta policies --custom, -c Path to a JS file of custom policies (skips interactive prompt; validates file first) OPTIONS (uninstall) [names...] Specific policy names to disable (omit to remove hooks) - --scope user|project|local|all Config scope to remove from (default: user) + --scope |all Config scope to remove from (default: user) --beta Remove only beta policies --custom, -c Clear the customPoliciesPath from config @@ -187,20 +207,28 @@ EXAMPLES failproofai policies --uninstall block-sudo failproofai policies -u failproofai policies --uninstall --custom + + # Cursor integration + failproofai policies --install --integration cursor + failproofai policies --uninstall --integration cursor --scope project + failproofai policies --integration cursor `.trimStart()); process.exit(0); } if (isInstall) { const { installHooks } = await import("../src/hooks/manager"); + const { getIntegration } = await import("../src/hooks/integrations"); + const integ = getIntegration(integrationArg); + const validScopes = [...integ.scopes]; const scopeIdx = subArgs.indexOf("--scope"); const scope = scopeIdx >= 0 ? subArgs[scopeIdx + 1] : "user"; if (scopeIdx >= 0 && (!scope || scope.startsWith("-"))) { - throw new CliError("Missing value for --scope. Valid values: user, project, local"); + throw new CliError(`Missing value for --scope. Valid values: ${validScopes.join(", ")}`); } - if (scopeIdx >= 0 && !["user", "project", "local"].includes(scope)) { - throw new CliError(`Invalid scope: ${scope}. Valid values: user, project, local`); + if (scopeIdx >= 0 && !validScopes.includes(scope)) { + throw new CliError(`Invalid scope: ${scope}. Valid values for ${integ.displayName}: ${validScopes.join(", ")}`); } const customIdx = subArgs.includes("--custom") ? subArgs.indexOf("--custom") @@ -214,12 +242,13 @@ EXAMPLES const includeBeta = subArgs.includes("--beta"); // Collect positional policy names — args that don't start with - and aren't - // values consumed by --scope or --custom/-c (tracked by index, not value, - // so a policy named "user" isn't incorrectly dropped by the default scope). + // values consumed by --scope, --custom/-c, or --integration (tracked by index, + // not value, so a policy named "user" isn't incorrectly dropped). const consumedIdxs = new Set(); if (scopeIdx >= 0) consumedIdxs.add(scopeIdx + 1); if (customIdx >= 0) consumedIdxs.add(customIdx + 1); - const flags = new Set(["--install", "-i", "--scope", "--beta", "--custom", "-c"]); + if (integrationIdx >= 0) consumedIdxs.add(integrationIdx + 1); + const flags = new Set(["--install", "-i", "--scope", "--beta", "--custom", "-c", "--integration"]); const unknownInstallFlag = subArgs.find((a) => a.startsWith("-") && !flags.has(a)); if (unknownInstallFlag) { throw new CliError(`Unknown flag: ${unknownInstallFlag}\nRun \`failproofai policies --help\` for usage.`); @@ -244,20 +273,25 @@ EXAMPLES includeBeta, undefined, customPoliciesPath, + false, + integrationArg, ); process.exit(0); } if (isUninstall) { const { removeHooks } = await import("../src/hooks/manager"); + const { getIntegration } = await import("../src/hooks/integrations"); + const integ = getIntegration(integrationArg); + const validScopes = [...integ.scopes, "all"]; const scopeIdx = subArgs.indexOf("--scope"); const scope = scopeIdx >= 0 ? subArgs[scopeIdx + 1] : "user"; if (scopeIdx >= 0 && (!scope || scope.startsWith("-"))) { - throw new CliError("Missing value for --scope. Valid values: user, project, local, all"); + throw new CliError(`Missing value for --scope. Valid values: ${validScopes.join(", ")}`); } - if (scopeIdx >= 0 && !["user", "project", "local", "all"].includes(scope)) { - throw new CliError(`Invalid scope: ${scope}. Valid values: user, project, local, all`); + if (scopeIdx >= 0 && !validScopes.includes(scope)) { + throw new CliError(`Invalid scope: ${scope}. Valid values for ${integ.displayName}: ${validScopes.join(", ")}`); } const betaOnly = subArgs.includes("--beta"); @@ -265,7 +299,8 @@ EXAMPLES const consumedIdxs = new Set(); if (scopeIdx >= 0) consumedIdxs.add(scopeIdx + 1); - const flags = new Set(["--uninstall", "-u", "--scope", "--beta", "--custom", "-c"]); + if (integrationIdx >= 0) consumedIdxs.add(integrationIdx + 1); + const flags = new Set(["--uninstall", "-u", "--scope", "--beta", "--custom", "-c", "--integration"]); const unknownUninstallFlag = subArgs.find((a) => a.startsWith("-") && !flags.has(a)); if (unknownUninstallFlag) { throw new CliError(`Unknown flag: ${unknownUninstallFlag}\nRun \`failproofai policies --help\` for usage.`); @@ -279,7 +314,7 @@ EXAMPLES policyNames.length > 0 ? policyNames : undefined, scope, undefined, - { betaOnly, removeCustomHooks }, + { betaOnly, removeCustomHooks, integration: integrationArg }, ); process.exit(0); } @@ -287,7 +322,7 @@ EXAMPLES // Default: list policies // Accept --list as a no-op alias (common intuition), reject all other unknown flags // and unexpected positional args (e.g. "hi"). - const knownListFlags = new Set(["--install", "-i", "--uninstall", "-u", "--help", "-h", "--list"]); + const knownListFlags = new Set(["--install", "-i", "--uninstall", "-u", "--help", "-h", "--list", "--integration", "--scope"]); const unknownListArg = subArgs.find((a) => a.startsWith("-") && !knownListFlags.has(a)); if (unknownListArg) { throw new CliError( @@ -295,7 +330,9 @@ EXAMPLES `Run \`failproofai policies --help\` for usage.` ); } - const positionalArgs = subArgs.filter((a) => !a.startsWith("-")); + const listConsumedIdxs = new Set(); + if (integrationIdx >= 0) listConsumedIdxs.add(integrationIdx + 1); + const positionalArgs = subArgs.filter((a, idx) => !a.startsWith("-") && !listConsumedIdxs.has(idx)); if (positionalArgs.length > 0) { throw new CliError( `Unexpected argument: ${positionalArgs[0]}\n` + @@ -304,7 +341,7 @@ EXAMPLES } const { listHooks } = await import("../src/hooks/manager"); - await listHooks(); + await listHooks(undefined, integrationArg); process.exit(0); } diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index 0555220c..b93c6977 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -19,12 +19,35 @@ function isClaudeSettingsFile(resolved: string): boolean { return /[\\/]\.claude[\\/]settings(?:\.[^/\\]+)?\.json$/.test(resolved); } +function isBashTool(toolName: string | undefined): boolean { + if (!toolName) return true; // Assume shell if tool name is missing + const lower = toolName.toLowerCase(); + return ( + lower === "bash" || + lower === "shell" || + lower === "terminal" || + lower.includes("command") || + lower === "run_terminal_command" + ); +} + function getCommand(ctx: PolicyContext): string { - return (ctx.toolInput?.command as string) ?? ""; + return ( + (ctx.toolInput?.command as string) ?? + (ctx.toolInput?.cmd as string) ?? + (ctx.toolInput?.input as string) ?? + "" + ); } function getFilePath(ctx: PolicyContext): string { - return (ctx.toolInput?.file_path as string) ?? ""; + return ( + (ctx.toolInput?.file_path as string) ?? + (ctx.toolInput?.filePath as string) ?? + (ctx.toolInput?.path as string) ?? + (ctx.toolInput?.relative_path as string) ?? + "" + ); } /** @@ -361,7 +384,7 @@ function sanitizeBearerTokens(ctx: PolicyContext): PolicyResult { } function warnDestructiveSql(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (!SQL_TOOL_RE.test(cmd)) return allow(); @@ -397,7 +420,7 @@ function warnLargeFileWrite(ctx: PolicyContext): PolicyResult { } function warnPackagePublish(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (PUBLISH_CMD_RE.test(cmd)) { return instruct( @@ -408,7 +431,7 @@ function warnPackagePublish(ctx: PolicyContext): PolicyResult { } function protectEnvVars(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); // Block: env, printenv, echo $VAR, export VAR= if (ENV_PRINTENV_RE.test(cmd)) { @@ -448,14 +471,14 @@ function blockEnvFiles(ctx: PolicyContext): PolicyResult { return deny("Access to .env file blocked"); } // Check Bash commands referencing .env files - if (ctx.toolName === "Bash" && ENV_CMD_RE.test(cmd)) { + if (isBashTool(ctx.toolName) && ENV_CMD_RE.test(cmd)) { return deny("Command references .env file"); } return allow(); } function blockSudo(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx).trimStart(); if (SUDO_RE.test(cmd) || cmd.startsWith("sudo ")) { // Check allowPatterns — match against parsed tokens, not raw string @@ -475,7 +498,7 @@ function blockSudo(ctx: PolicyContext): PolicyResult { } function blockCurlPipeSh(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (CURL_PIPE_SH_RE.test(cmd)) { return deny("Piping downloads to shell is blocked"); @@ -496,7 +519,7 @@ function extractGitPushArgs(cmd: string): string[] { } function blockPushMaster(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const protectedBranches = ((ctx.params?.protectedBranches ?? ["main", "master"]) as string[]); if (protectedBranches.length === 0) return allow(); const args = extractGitPushArgs(getCommand(ctx)); @@ -552,7 +575,7 @@ function rmTargetIsAllowed(cmd: string, allowPaths: string[]): boolean { } function blockRmRf(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); const hasDestructivePath = parseArgvTokens(cmd).some((token) => { const normalized = token.replace(/\/\*$/, "").replace(/\/+$/, "") || (token.startsWith("/") ? "/" : ""); @@ -594,7 +617,7 @@ function blockRmRf(ctx: PolicyContext): PolicyResult { } function blockForcePush(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const args = extractGitPushArgs(getCommand(ctx)); if (args.some((a) => FORCE_PUSH_RE.test(a))) { return deny("Force-pushing is blocked"); @@ -684,7 +707,7 @@ function blockReadOutsideCwd(ctx: PolicyContext): PolicyResult { const allowPaths = ((ctx.params?.allowPaths ?? []) as string[]); // For Bash tool: check read-like commands for absolute paths outside cwd - if (ctx.toolName === "Bash") { + if (isBashTool(ctx.toolName)) { const cmd = getCommand(ctx); if (!READ_LIKE_CMDS.test(cmd)) return allow(); @@ -734,7 +757,7 @@ function blockReadOutsideCwd(ctx: PolicyContext): PolicyResult { } function blockWorkOnMain(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (!GIT_COMMIT_MERGE_RE.test(cmd)) return allow(); @@ -754,7 +777,7 @@ function blockWorkOnMain(ctx: PolicyContext): PolicyResult { } function blockFailproofaiCommands(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); // Block direct failproofai CLI invocations @@ -809,7 +832,7 @@ async function warnRepeatedToolCalls(ctx: PolicyContext): Promise } function warnGitAmend(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (GIT_AMEND_RE.test(cmd)) { return instruct( @@ -820,7 +843,7 @@ function warnGitAmend(ctx: PolicyContext): PolicyResult { } function warnGitStashDrop(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (GIT_STASH_DROP_RE.test(cmd)) { return instruct( @@ -831,7 +854,7 @@ function warnGitStashDrop(ctx: PolicyContext): PolicyResult { } function warnAllFilesStaged(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (GIT_ADD_ALL_RE.test(cmd)) { return instruct( @@ -842,7 +865,7 @@ function warnAllFilesStaged(ctx: PolicyContext): PolicyResult { } function warnSchemaAlteration(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (!SQL_TOOL_RE.test(cmd)) return allow(); if (SCHEMA_ALTER_RE.test(cmd)) { @@ -854,7 +877,7 @@ function warnSchemaAlteration(ctx: PolicyContext): PolicyResult { } function warnGlobalPackageInstall(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); const isGlobal = NPM_GLOBAL_RE.test(cmd) || @@ -941,7 +964,7 @@ function preferPackageManager(ctx: PolicyContext): PolicyResult { } function warnBackgroundProcess(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); const isBackground = NOHUP_RE.test(cmd) || @@ -1314,7 +1337,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "protect-env-vars", description: "Prevent commands that read environment variables", fn: protectEnvVars, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Environment", }, @@ -1330,7 +1353,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-read-outside-cwd", description: "Block file reads outside the session working directory", fn: blockReadOutsideCwd, - match: { events: ["PreToolUse"], toolNames: ["Read", "Glob", "Grep", "Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Read", "Glob", "Grep", "Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Environment", params: { @@ -1345,7 +1368,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-sudo", description: "Block sudo commands", fn: blockSudo, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Dangerous Commands", params: { @@ -1360,7 +1383,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-curl-pipe-sh", description: "Block piping downloads to shell", fn: blockCurlPipeSh, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Dangerous Commands", }, @@ -1368,7 +1391,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-rm-rf", description: "Prevent catastrophic deletions", fn: blockRmRf, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Dangerous Commands", params: { @@ -1383,7 +1406,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-failproofai-commands", description: "Block failproofai CLI commands and uninstallation", fn: blockFailproofaiCommands, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Dangerous Commands", }, @@ -1406,7 +1429,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-push-master", description: "Block pushing to main/master", fn: blockPushMaster, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Git", params: { @@ -1421,7 +1444,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-force-push", description: "Prevent force-pushing to any branch", fn: blockForcePush, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", }, @@ -1429,7 +1452,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-work-on-main", description: "Block git commits and merges on main/master branch", fn: blockWorkOnMain, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", params: { @@ -1444,7 +1467,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-git-amend", description: "Warns before amending git commits, which rewrites history", fn: warnGitAmend, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", }, @@ -1452,7 +1475,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-git-stash-drop", description: "Warns before permanently deleting stashed changes", fn: warnGitStashDrop, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", }, @@ -1460,7 +1483,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-all-files-staged", description: "Warns before staging all working tree files with git add -A / . / --all", fn: warnAllFilesStaged, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", }, @@ -1468,7 +1491,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-destructive-sql", description: "Warn before executing destructive SQL (DROP/TRUNCATE/DELETE without WHERE) via database clients", fn: warnDestructiveSql, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Database", }, @@ -1476,7 +1499,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-schema-alteration", description: "Warns before SQL schema changes (ALTER TABLE with column or rename operations)", fn: warnSchemaAlteration, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Database", }, @@ -1484,7 +1507,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-package-publish", description: "Warn before publishing packages to public registries (npm, PyPI, crates.io, RubyGems, etc.)", fn: warnPackagePublish, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Packages & System", }, @@ -1492,7 +1515,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-global-package-install", description: "Warns before installing packages globally (npm -g, cargo install, etc.)", fn: warnGlobalPackageInstall, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Packages & System", }, @@ -1535,7 +1558,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-background-process", description: "Warns before starting detached or background processes", fn: warnBackgroundProcess, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Packages & System", }, diff --git a/src/hooks/handler.ts b/src/hooks/handler.ts index 7d073e84..5509b301 100644 --- a/src/hooks/handler.ts +++ b/src/hooks/handler.ts @@ -5,7 +5,7 @@ * ~/.failproofai/policies-config.json, evaluates matching policies, persists * activity to disk, and returns the appropriate exit code + stdout response. */ -import type { HookEventType, SessionMetadata } from "./types"; +import type { HookEventType, SessionMetadata, IntegrationType } from "./types"; import type { PolicyFunction, PolicyResult } from "./policy-types"; import { readMergedHooksConfig } from "./hooks-config"; import { registerBuiltinPolicies } from "./builtin-policies"; @@ -21,7 +21,7 @@ import { hookLogInfo, hookLogWarn } from "./hook-logger"; export async function handleHookEvent(eventType: string): Promise { const startTime = performance.now(); - // Read stdin payload (Claude passes JSON) + // Read stdin payload (Claude/Cursor passes JSON) const MAX_STDIN_BYTES = 1_048_576; // 1 MB let payload = ""; try { @@ -40,14 +40,23 @@ export async function handleHookEvent(eventType: string): Promise { chunks.push(chunk); }); process.stdin.on("end", () => resolve(chunks.join(""))); + + // Handle the case where stdin is not a pipe or is empty + setTimeout(() => { + if (chunks.length === 0) resolve(""); + }, 100); + process.stdin.on("error", reject); - // If stdin is already closed or not piped, resolve immediately if (process.stdin.readableEnded) resolve(""); }); } catch { hookLogWarn(`stdin read failed for ${eventType}`); } + if (!payload) { + hookLogWarn(`stdin is empty for ${eventType} - Cursor Agent might not be piping context`); + } + let parsed: Record = {}; if (payload) { try { @@ -57,6 +66,28 @@ export async function handleHookEvent(eventType: string): Promise { } } + // Normalize Cursor payload: workspace_roots → cwd fallback + if (!parsed.cwd && Array.isArray(parsed.workspace_roots) && parsed.workspace_roots.length > 0) { + parsed.cwd = parsed.workspace_roots[0] as string; + } + + // Attempt to detect integration + let integration: IntegrationType = (parsed.integration as IntegrationType); + if (!integration) { + const hookName = (parsed.hook_event_name as string) || ""; + if ( + Array.isArray(parsed.workspace_roots) || + hookName.startsWith("before") || + hookName.startsWith("after") || + hookName === "preToolUse" || + hookName === "postToolUse" + ) { + integration = "cursor"; + } else { + integration = "claude-code"; + } + } + // Extract session metadata from payload const session: SessionMetadata = { sessionId: parsed.session_id as string | undefined, @@ -64,6 +95,7 @@ export async function handleHookEvent(eventType: string): Promise { cwd: parsed.cwd as string | undefined, permissionMode: parsed.permission_mode as string | undefined, hookEventName: parsed.hook_event_name as string | undefined, + integration, }; // Load enabled policies (merge across project/local/global scopes) diff --git a/src/hooks/hook-activity-store.ts b/src/hooks/hook-activity-store.ts index 98e64c43..724c9eca 100644 --- a/src/hooks/hook-activity-store.ts +++ b/src/hooks/hook-activity-store.ts @@ -52,6 +52,7 @@ export interface HookActivityEntry { cwd?: string; permissionMode?: string; hookEventName?: string; + integration?: string; } export interface HookActivityFilters { diff --git a/src/hooks/integrations.ts b/src/hooks/integrations.ts new file mode 100644 index 00000000..739d6e03 --- /dev/null +++ b/src/hooks/integrations.ts @@ -0,0 +1,369 @@ +/** + * Platform integration registry. + * + * Each integration describes how failproofai hooks are installed, detected, + * and formatted for a specific AI agent CLI (Claude Code, Cursor, etc.). + */ +import { execSync } from "node:child_process"; +import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; +import { resolve, dirname } from "node:path"; +import { homedir } from "node:os"; +import { + HOOK_EVENT_TYPES, + HOOK_SCOPES, + CURSOR_HOOK_EVENT_TYPES, + CURSOR_HOOK_SCOPES, + CURSOR_EVENT_MAP, + FAILPROOFAI_HOOK_MARKER, + type IntegrationType, + type CursorHookEventType, + type ClaudeSettings, + type ClaudeHookMatcher, + type CursorHooksFile, + type CursorHookEntry, +} from "./types"; + +// ── Integration interface ─────────────────────────────────────────────────── + +export interface Integration { + id: IntegrationType; + displayName: string; + scopes: readonly string[]; + eventTypes: readonly string[]; + hookMarker: string; + + /** Resolve the settings/hooks file path for a given scope. */ + getSettingsPath(scope: string, cwd?: string): string; + + /** Read the settings/hooks file, returning a default if it doesn't exist. */ + readSettings(settingsPath: string): Record; + + /** Write the settings/hooks file. */ + writeSettings(settingsPath: string, settings: Record): void; + + /** Build a single hook entry for this integration. */ + buildHookEntry(binaryPath: string, eventType: string): Record; + + /** Check whether a hook entry belongs to failproofai. */ + isFailproofaiHook(hook: Record): boolean; + + /** + * Write hook entries into the settings object for all supported event types. + * Mutates `settings` in place. + */ + writeHookEntries(settings: Record, binaryPath: string): void; + + /** + * Remove failproofai hook entries from a settings file. + * Returns the number of entries removed. + */ + removeHooksFromFile(settingsPath: string): number; + + /** Check whether failproofai hooks exist in a given scope. */ + hooksInstalledInSettings(scope: string, cwd?: string): boolean; + + /** Detect whether the platform CLI binary is installed. */ + detectInstalled(): boolean; + + /** Optional post-install step. */ + postInstall?(): void; +} + +// ── Helpers ───────────────────────────────────────────────────────────────── + +function readJsonFile(path: string): Record { + if (!existsSync(path)) return {}; + return JSON.parse(readFileSync(path, "utf8")) as Record; +} + +function writeJsonFile(path: string, data: Record): void { + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, JSON.stringify(data, null, 2) + "\n", "utf8"); +} + +function isMarkedHook(hook: Record): boolean { + if (hook[FAILPROOFAI_HOOK_MARKER] === true) return true; + const cmd = typeof hook.command === "string" ? hook.command : ""; + return cmd.includes("failproofai") && cmd.includes("--hook"); +} + +function binaryExists(name: string): boolean { + try { + const cmd = process.platform === "win32" ? `where ${name}` : `which ${name}`; + execSync(cmd, { encoding: "utf8", stdio: "pipe" }); + return true; + } catch { + return false; + } +} + +// ── Claude Code integration ───────────────────────────────────────────────── + +const claudeCode: Integration = { + id: "claude-code", + displayName: "Claude Code", + scopes: HOOK_SCOPES, + eventTypes: HOOK_EVENT_TYPES, + hookMarker: FAILPROOFAI_HOOK_MARKER, + + getSettingsPath(scope: string, cwd?: string): string { + const base = cwd ? resolve(cwd) : process.cwd(); + switch (scope) { + case "user": + return resolve(homedir(), ".claude", "settings.json"); + case "project": + return resolve(base, ".claude", "settings.json"); + case "local": + return resolve(base, ".claude", "settings.local.json"); + default: + return resolve(homedir(), ".claude", "settings.json"); + } + }, + + readSettings(settingsPath: string): Record { + return readJsonFile(settingsPath); + }, + + writeSettings(settingsPath: string, settings: Record): void { + writeJsonFile(settingsPath, settings); + }, + + buildHookEntry(binaryPath: string, eventType: string): Record { + return { + type: "command", + command: `"${binaryPath}" --hook ${eventType}`, + timeout: 60_000, + [FAILPROOFAI_HOOK_MARKER]: true, + }; + }, + + isFailproofaiHook: isMarkedHook, + + writeHookEntries(settings: Record, binaryPath: string): void { + const s = settings as ClaudeSettings; + if (!s.hooks) s.hooks = {}; + + for (const eventType of HOOK_EVENT_TYPES) { + const hookEntry = this.buildHookEntry(binaryPath, eventType); + + if (!s.hooks[eventType]) s.hooks[eventType] = []; + const matchers: ClaudeHookMatcher[] = s.hooks[eventType]; + + let found = false; + for (const matcher of matchers) { + if (!matcher.hooks) continue; + const idx = matcher.hooks.findIndex((h) => + this.isFailproofaiHook(h as Record), + ); + if (idx >= 0) { + matcher.hooks[idx] = hookEntry as any; + found = true; + break; + } + } + + if (!found) { + matchers.push({ hooks: [hookEntry as any] }); + } + } + }, + + removeHooksFromFile(settingsPath: string): number { + const settings = this.readSettings(settingsPath) as ClaudeSettings; + if (!settings.hooks) return 0; + + let removed = 0; + for (const eventType of Object.keys(settings.hooks)) { + const matchers = settings.hooks[eventType]; + if (!Array.isArray(matchers)) continue; + + for (let i = matchers.length - 1; i >= 0; i--) { + const matcher = matchers[i]; + if (!matcher.hooks) continue; + + const before = matcher.hooks.length; + matcher.hooks = matcher.hooks.filter( + (h) => !this.isFailproofaiHook(h as Record), + ); + removed += before - matcher.hooks.length; + + if (matcher.hooks.length === 0) matchers.splice(i, 1); + } + + if (matchers.length === 0) delete settings.hooks[eventType]; + } + + if (Object.keys(settings.hooks).length === 0) delete settings.hooks; + this.writeSettings(settingsPath, settings as Record); + return removed; + }, + + hooksInstalledInSettings(scope: string, cwd?: string): boolean { + const settingsPath = this.getSettingsPath(scope, cwd); + if (!existsSync(settingsPath)) return false; + try { + const settings = this.readSettings(settingsPath) as ClaudeSettings; + if (!settings.hooks) return false; + for (const matchers of Object.values(settings.hooks)) { + if (!Array.isArray(matchers)) continue; + for (const matcher of matchers) { + if (!matcher.hooks) continue; + if (matcher.hooks.some((h) => this.isFailproofaiHook(h as Record))) { + return true; + } + } + } + } catch { + // Corrupted settings — treat as not installed + } + return false; + }, + + detectInstalled(): boolean { + return binaryExists("claude"); + }, +}; + +// ── Cursor integration ────────────────────────────────────────────────────── + +const cursor: Integration = { + id: "cursor", + displayName: "Cursor", + scopes: CURSOR_HOOK_SCOPES, + eventTypes: CURSOR_HOOK_EVENT_TYPES as unknown as readonly string[], + hookMarker: FAILPROOFAI_HOOK_MARKER, + + getSettingsPath(scope: string, cwd?: string): string { + const base = cwd ? resolve(cwd) : process.cwd(); + switch (scope) { + case "user": + return resolve(homedir(), ".cursor", "hooks.json"); + case "project": + return resolve(base, ".cursor", "hooks.json"); + default: + return resolve(homedir(), ".cursor", "hooks.json"); + } + }, + + readSettings(settingsPath: string): Record { + if (!existsSync(settingsPath)) return { version: 1 }; + const raw = JSON.parse(readFileSync(settingsPath, "utf8")) as Record; + if (!raw.version) raw.version = 1; + return raw; + }, + + writeSettings(settingsPath: string, settings: Record): void { + if (!settings.version) settings.version = 1; + writeJsonFile(settingsPath, settings); + }, + + buildHookEntry(binaryPath: string, eventType: string): Record { + // eventType is the camelCase Cursor event name — map to PascalCase for --hook flag + const pascalEvent = CURSOR_EVENT_MAP[eventType as CursorHookEventType] ?? eventType; + // Use sh -lc to ensure node/bun is in PATH (especially for nvm/asdf users) + return { + command: `sh -lc '"${binaryPath}" --hook ${pascalEvent}'`, + timeout: 60, + }; + }, + + isFailproofaiHook(hook: Record): boolean { + // Cursor format doesn't support the marker field — rely on command string detection + const cmd = typeof hook.command === "string" ? hook.command : ""; + return cmd.includes("failproofai") && cmd.includes("--hook"); + }, + + writeHookEntries(settings: Record, binaryPath: string): void { + const s = settings as CursorHooksFile; + if (!s.hooks) s.hooks = {}; + + for (const eventType of CURSOR_HOOK_EVENT_TYPES) { + const hookEntry = this.buildHookEntry(binaryPath, eventType) as unknown as CursorHookEntry; + + if (!s.hooks[eventType]) s.hooks[eventType] = []; + const entries: CursorHookEntry[] = s.hooks[eventType]; + + // Find and replace existing failproofai hook, or append + const idx = entries.findIndex((h) => + this.isFailproofaiHook(h as unknown as Record), + ); + if (idx >= 0) { + entries[idx] = hookEntry; + } else { + entries.push(hookEntry); + } + } + }, + + removeHooksFromFile(settingsPath: string): number { + const settings = this.readSettings(settingsPath) as CursorHooksFile; + if (!settings.hooks) return 0; + + let removed = 0; + for (const eventType of Object.keys(settings.hooks)) { + const entries = settings.hooks[eventType]; + if (!Array.isArray(entries)) continue; + + const before = entries.length; + settings.hooks[eventType] = entries.filter( + (h) => !this.isFailproofaiHook(h as unknown as Record), + ); + removed += before - settings.hooks[eventType].length; + + if (settings.hooks[eventType].length === 0) delete settings.hooks[eventType]; + } + + if (Object.keys(settings.hooks).length === 0) delete settings.hooks; + this.writeSettings(settingsPath, settings as unknown as Record); + return removed; + }, + + hooksInstalledInSettings(scope: string, cwd?: string): boolean { + const settingsPath = this.getSettingsPath(scope, cwd); + if (!existsSync(settingsPath)) return false; + try { + const settings = this.readSettings(settingsPath) as CursorHooksFile; + if (!settings.hooks) return false; + for (const entries of Object.values(settings.hooks)) { + if (!Array.isArray(entries)) continue; + if (entries.some((h) => this.isFailproofaiHook(h as unknown as Record))) { + return true; + } + } + } catch { + // Corrupted config — treat as not installed + } + return false; + }, + + detectInstalled(): boolean { + return binaryExists("cursor"); + }, +}; + +// ── Registry ──────────────────────────────────────────────────────────────── + +export const INTEGRATIONS: Record = { + "claude-code": claudeCode, + "cursor": cursor, +}; + +export function getIntegration(id: IntegrationType): Integration { + const integration = INTEGRATIONS[id]; + if (!integration) { + throw new Error(`Unknown integration: ${id}`); + } + return integration; +} + +export function listIntegrations(): Integration[] { + return Object.values(INTEGRATIONS); +} + +export function listIntegrationIds(): IntegrationType[] { + return Object.keys(INTEGRATIONS) as IntegrationType[]; +} + +export function detectInstalledIntegrations(): Integration[] { + return Object.values(INTEGRATIONS).filter((i) => i.detectInstalled()); +} diff --git a/src/hooks/manager.ts b/src/hooks/manager.ts index 499c0e92..33c9c228 100644 --- a/src/hooks/manager.ts +++ b/src/hooks/manager.ts @@ -1,197 +1,145 @@ /** - * Install/remove/list failproofai hooks in Claude Code's settings. + * Install/remove/list failproofai hooks in Claude Code or Cursor settings. */ import { execSync } from "node:child_process"; import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; import { resolve, dirname, basename } from "node:path"; import { homedir, platform, arch, release, hostname } from "node:os"; import { - HOOK_EVENT_TYPES, - HOOK_SCOPES, - FAILPROOFAI_HOOK_MARKER, type HookScope, - type ClaudeHookEntry, - type ClaudeHookMatcher, - type ClaudeSettings, + type IntegrationType, } from "./types"; import { promptPolicySelection } from "./install-prompt"; -import { readMergedHooksConfig, readScopedHooksConfig, writeScopedHooksConfig } from "./hooks-config"; +import { + readMergedHooksConfig, + readScopedHooksConfig, + writeScopedHooksConfig, + getConfigPathForScope, +} from "./hooks-config"; import type { HooksConfig } from "./policy-types"; import { BUILTIN_POLICIES } from "./builtin-policies"; import { loadCustomHooks, discoverPolicyFiles } from "./custom-hooks-loader"; import { trackHookEvent } from "./hook-telemetry"; import { getInstanceId, hashToId } from "../../lib/telemetry-id"; import { CliError } from "../cli-error"; +import { getIntegration, type Integration } from "./integrations"; const VALID_POLICY_NAMES = new Set(BUILTIN_POLICIES.map((p) => p.name)); -export function getSettingsPath(scope: HookScope, cwd?: string): string { - const base = cwd ? resolve(cwd) : process.cwd(); - switch (scope) { - case "user": - return resolve(homedir(), ".claude", "settings.json"); - case "project": - return resolve(base, ".claude", "settings.json"); - case "local": - return resolve(base, ".claude", "settings.local.json"); - } +export function getSettingsPath( + scope: HookScope | "repo", + cwd?: string, + integration: IntegrationType = "claude-code", +): string { + return getIntegration(integration).getSettingsPath(scope as any, cwd); } -function scopeLabel(scope: HookScope): string { - switch (scope) { - case "user": - return `~/.claude/settings.json`; - case "project": - return `{cwd}/.claude/settings.json`; - case "local": - return `{cwd}/.claude/settings.local.json`; - } +export function hooksInstalledInSettings( + scope: HookScope | "repo", + cwd?: string, + integration: IntegrationType = "claude-code", +): boolean { + return getIntegration(integration).hooksInstalledInSettings(scope as any, cwd); } -function readSettings(settingsPath: string): ClaudeSettings { - if (!existsSync(settingsPath)) { - return {}; +/** + * Resolve the path to the failproofai binary. + */ +function resolveFailproofaiBinary(): string { + // Use FAILPROOFAI_DIST_PATH if provided (for development/testing) + if (process.env.FAILPROOFAI_DIST_PATH) { + const distBin = resolve(process.env.FAILPROOFAI_DIST_PATH, "bin", "failproofai.mjs"); + if (existsSync(distBin)) return distBin; + + const distCli = resolve(process.env.FAILPROOFAI_DIST_PATH, "cli.mjs"); + if (existsSync(distCli)) return distCli; + + const rootBin = resolve(process.env.FAILPROOFAI_DIST_PATH, "..", "bin", "failproofai.mjs"); + if (existsSync(rootBin)) return rootBin; } - const raw = readFileSync(settingsPath, "utf8"); - return JSON.parse(raw) as ClaudeSettings; -} + // Try finding it relative to this file (in dist or src) + const relativeDist = resolve(__dirname, "..", "cli.mjs"); + if (existsSync(relativeDist)) return relativeDist; -function writeSettings(settingsPath: string, settings: ClaudeSettings): void { - mkdirSync(dirname(settingsPath), { recursive: true }); - writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + "\n", "utf8"); -} + const relativeSrc = resolve(__dirname, "..", "..", "bin", "failproofai.mjs"); + if (existsSync(relativeSrc)) return relativeSrc; -function resolveFailproofaiBinary(): string { - try { - const cmd = process.platform === "win32" ? "where failproofai" : "which failproofai"; - const result = execSync(cmd, { encoding: "utf8" }).trim(); - // `where` on Windows may return multiple lines; take the first - return result.split("\n")[0].trim(); - } catch { - throw new CliError( - "failproofai binary not found in PATH.\n" + - "Install it globally first: npm install -g failproofai" - ); - } + // Fallback to global bun bin (typical for users) + return resolve(homedir(), ".bun", "bin", "failproofai"); } -function isFailproofaiHook(hook: Record): boolean { - if (hook[FAILPROOFAI_HOOK_MARKER] === true) return true; - // Fallback for legacy installs that predate the marker - const cmd = typeof hook.command === "string" ? hook.command : ""; - return cmd.includes("failproofai") && cmd.includes("--hook"); +function scopeLabel(integration: Integration, scope: string, cwd?: string): string { + const settingsPath = integration.getSettingsPath(scope as any, cwd); + const homeDir = homedir(); + const baseDir = cwd ? resolve(cwd) : process.cwd(); + + if (settingsPath.startsWith(`${homeDir}/`)) { + return `~/${settingsPath.slice(homeDir.length + 1)}`; + } + if (settingsPath.startsWith(`${baseDir}/`)) { + return `{cwd}/${settingsPath.slice(baseDir.length + 1)}`; + } + return settingsPath; } -function validatePolicyNames(names: string[]): void { - const invalid = names.filter((n) => !VALID_POLICY_NAMES.has(n)); - if (invalid.length > 0) { - const validList = [...VALID_POLICY_NAMES].join(", "); +function assertSupportedScope(integration: Integration, scope: string): void { + if (!integration.scopes.includes(scope)) { throw new CliError( - `Unknown policy name(s): ${invalid.join(", ")}\n` + - `Valid policies: ${validList}` + `Scope "${scope}" is not supported for ${integration.displayName}. ` + + `Supported scopes: ${integration.scopes.join(", ")}`, ); } } /** Return only scopes whose settings paths are unique (first wins). */ -function deduplicateScopes(scopes: readonly HookScope[], cwd?: string): HookScope[] { - const seen = new Set(); - return scopes.filter((s) => { - const p = getSettingsPath(s, cwd); - if (seen.has(p)) return false; - seen.add(p); - return true; - }); -} - -export function hooksInstalledInSettings(scope: HookScope, cwd?: string): boolean { - const settingsPath = getSettingsPath(scope, cwd); - if (!existsSync(settingsPath)) return false; - try { - const settings = readSettings(settingsPath); - if (!settings.hooks) return false; - for (const matchers of Object.values(settings.hooks)) { - if (!Array.isArray(matchers)) continue; - for (const matcher of matchers) { - if (!matcher.hooks) continue; - if (matcher.hooks.some((h) => isFailproofaiHook(h as Record))) { - return true; - } - } +function deduplicateScopes( + integration: Integration, + scopes: readonly string[], + cwd?: string, +): string[] { + const paths = new Set(); + const result: string[] = []; + for (const s of scopes) { + const p = integration.getSettingsPath(s as any, cwd); + if (!paths.has(p)) { + paths.add(p); + result.push(s); } - } catch { - // Corrupted settings — treat as not installed } - return false; + return result; } - -function removeHooksFromSettingsFile(settingsPath: string): number { - const settings = readSettings(settingsPath); - - if (!settings.hooks) return 0; - - let removed = 0; - - for (const eventType of Object.keys(settings.hooks)) { - const matchers = settings.hooks[eventType]; - if (!Array.isArray(matchers)) continue; - - for (let i = matchers.length - 1; i >= 0; i--) { - const matcher = matchers[i]; - if (!matcher.hooks) continue; - - const before = matcher.hooks.length; - matcher.hooks = matcher.hooks.filter( - (h) => !isFailproofaiHook(h as Record) - ); - removed += before - matcher.hooks.length; - - // Remove empty matchers - if (matcher.hooks.length === 0) { - matchers.splice(i, 1); - } - } - - // Remove empty event type arrays - if (matchers.length === 0) { - delete settings.hooks[eventType]; - } - } - - // Remove empty hooks object - if (Object.keys(settings.hooks).length === 0) { - delete settings.hooks; +function validatePolicyNames(names: string[]): void { + const unknown = names.filter((n) => !VALID_POLICY_NAMES.has(n)); + if (unknown.length > 0) { + const list = [...VALID_POLICY_NAMES].sort().join(", "); + throw new CliError(`Unknown policy name(s): ${unknown.join(", ")}\nValid policies: ${list}`); } - - writeSettings(settingsPath, settings); - return removed; } -/** - * Install hooks into Claude Code settings. - * - * @param policyNames — if provided, skip interactive prompt: - * - `["all"]` → enable all policies - * - `["block-sudo", "block-rm-rf"]` → enable specific policies - * - `undefined` → interactive prompt (pre-loads current config if exists) - * @param scope — settings scope to write to (default: "user") - */ export async function installHooks( policyNames?: string[], - scope: HookScope = "user", + scope: HookScope | "repo" = "user", cwd?: string, includeBeta = false, source?: string, customPoliciesPath?: string, removeCustomHooks = false, + integration: IntegrationType = "claude-code", ): Promise { + const integ = getIntegration(integration); + assertSupportedScope(integ, scope); + + const binaryPath = resolveFailproofaiBinary(); + + // Capture existing config before overwriting (used for telemetry diff) + const previousConfig = readScopedHooksConfig(scope as HookScope, cwd); + const previousEnabled = new Set(previousConfig.enabledPolicies); + // Validate user input first before any system checks if (policyNames !== undefined && policyNames.length > 0) { const nonAllNames = policyNames.filter((n) => n !== "all"); - // Check unknown names first (most actionable error for the user) if (nonAllNames.length > 0) validatePolicyNames(nonAllNames); - // Then check if "all" is mixed with valid specific names if (policyNames.includes("all") && nonAllNames.length > 0) { throw new CliError( `"all" cannot be combined with specific policy names.\n` + @@ -200,16 +148,10 @@ export async function installHooks( } } - const binaryPath = resolveFailproofaiBinary(); - - // Capture existing config before overwriting (used for telemetry diff) - const previousConfig = readScopedHooksConfig(scope, cwd); - const previousEnabled = new Set(previousConfig.enabledPolicies); - let selectedPolicies: string[]; if (policyNames !== undefined) { - // Non-interactive path: explicit array was provided (may be empty) + // Non-interactive path let incoming: string[]; if (policyNames.length === 1 && policyNames[0] === "all") { incoming = BUILTIN_POLICIES @@ -218,10 +160,10 @@ export async function installHooks( } else { incoming = policyNames; } - // Additive: union with whatever was already enabled, deduplicated. + // Additive selectedPolicies = [...new Set([...previousConfig.enabledPolicies, ...incoming])]; } else { - // Interactive — pre-load current config if it exists + // Interactive const preSelected = previousConfig.enabledPolicies.length > 0 ? previousConfig.enabledPolicies : undefined; selectedPolicies = await promptPolicySelection(preSelected, { includeBeta }); } @@ -241,26 +183,20 @@ export async function installHooks( process.exit(1); } if (validatedHooks.length === 0) { - console.error( - `Error: no hooks registered in ${customPoliciesPath}. ` + - `Make sure your file calls customPolicies.add(...) at least once.`, - ); + console.error(`Error: no hooks registered in ${customPoliciesPath}.`); process.exit(1); } - console.log( - `\nValidated ${validatedHooks.length} custom hook(s): ${validatedHooks.map((h) => h.name).join(", ")}`, - ); + console.log(`\nValidated ${validatedHooks.length} custom hook(s): ${validatedHooks.map((h) => h.name).join(", ")}`); } - writeScopedHooksConfig(configToWrite, scope, cwd); + + writeScopedHooksConfig(configToWrite, scope as HookScope, cwd); console.log(`\nEnabled ${selectedPolicies.length} policy(ies): ${selectedPolicies.join(", ")}`); - if (removeCustomHooks) { - console.log("Custom hooks path cleared."); - } else if (configToWrite.customPoliciesPath) { - console.log(`Custom hooks path: ${configToWrite.customPoliciesPath}`); - } - const settingsPath = getSettingsPath(scope, cwd); - const settings = readSettings(settingsPath); + const settingsPath = integ.getSettingsPath(scope as any, cwd); + const settings = integ.readSettings(settingsPath); + integ.writeHookEntries(settings, binaryPath); + integ.writeSettings(settingsPath, settings); + integ.postInstall?.(); if (!settings.hooks) { settings.hooks = {}; @@ -313,6 +249,7 @@ export async function installHooks( const distinctId = getInstanceId(); await trackHookEvent(distinctId, "hooks_installed", { scope, + integration, policies: selectedPolicies, policy_count: selectedPolicies.length, policies_added: policiesAdded, @@ -327,11 +264,9 @@ export async function installHooks( param_policy_names: configToWrite.policyParams ? Object.keys(configToWrite.policyParams) : [], command_format: scope === "project" ? "npx" : "absolute", }); - } catch { - // Telemetry is best-effort — never block the operation - } + } catch { /* best effort */ } - console.log(`Failproof AI hooks installed for all ${HOOK_EVENT_TYPES.length} event types (scope: ${scope}).`); + console.log(`Failproof AI hooks installed for all ${integ.eventTypes.length} event types (scope: ${scope}).`); console.log(`Settings: ${settingsPath}`); if (scope === "project") { console.log(`Command: npx -y failproofai`); @@ -341,10 +276,10 @@ export async function installHooks( } // Warn about duplicate-scope installations - const otherScopes = deduplicateScopes(HOOK_SCOPES, cwd).filter((s) => s !== scope); - const duplicates = otherScopes.filter((s) => hooksInstalledInSettings(s, cwd)); + const otherScopes = deduplicateScopes(integ, integ.scopes, cwd).filter((s) => s !== scope); + const duplicates = otherScopes.filter((s) => integ.hooksInstalledInSettings(s as any, cwd)); if (duplicates.length > 0) { - const scopeList = duplicates.map((s) => `${s} (${scopeLabel(s)})`).join(", "); + const scopeList = duplicates.map((s) => `${s} (${scopeLabel(integ, s, cwd)})`).join(", "); console.log(); console.log(`\x1B[33mWarning: Failproof AI hooks are also installed at ${scopeList}.\x1B[0m`); console.log(`Having hooks in multiple scopes may cause duplicate policy evaluation.`); @@ -353,18 +288,15 @@ export async function installHooks( } } -/** - * Remove hooks from Claude Code settings. - * - * @param policyNames — if provided: - * - `undefined` or `["all"]` → remove all failproofai hooks from settings (original behavior) - * - `["block-sudo"]` → disable specific policies in config, keep hooks installed - * @param scope — settings scope to remove from (default: "user"), or "all" to remove from all scopes - * @param opts.betaOnly — set to true when removing only beta policies (adds beta_only flag to telemetry) - */ -export async function removeHooks(policyNames?: string[], scope: HookScope | "all" = "user", cwd?: string, opts?: { betaOnly?: boolean; source?: string; removeCustomHooks?: boolean }): Promise { - // Resolve the effective config scope ("all" falls back to "user" for config reads/writes) - const configScope: HookScope = scope === "all" ? "user" : scope; +export async function removeHooks( + policyNames?: string[], + scope: HookScope | "repo" | "all" = "user", + cwd?: string, + opts?: { betaOnly?: boolean; source?: string; removeCustomHooks?: boolean; integration?: IntegrationType }, + integration: IntegrationType = "claude-code", +): Promise { + const integ = getIntegration(opts?.integration ?? integration); + const configScope: HookScope = scope === "all" ? "user" : (scope as HookScope); // Clear custom hooks path if requested if (opts?.removeCustomHooks) { @@ -395,12 +327,13 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al }; writeScopedHooksConfig(updatedConfig, configScope, cwd); - // Telemetry: track policy-only removal from config + // Telemetry try { const distinctId = getInstanceId(); const actuallyRemoved = policyNames.filter((p) => config.enabledPolicies.includes(p)); await trackHookEvent(distinctId, "hooks_removed", { scope, + integration: integ.id, removal_mode: opts?.betaOnly ? "beta_policies" : "policies", beta_only: opts?.betaOnly ?? false, policies_removed: actuallyRemoved, @@ -411,9 +344,7 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al os_release: release(), hostname_hash: hashToId(hostname()), }); - } catch { - // Telemetry is best-effort — never block the operation - } + } catch { /* best effort */ } console.log(`Disabled ${policyNames.length - notEnabled.length} policy(ies).`); console.log(`Remaining: ${remaining.length > 0 ? remaining.join(", ") : "(none)"}`); @@ -423,32 +354,19 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al // Capture enabled policies before clearing (used for accurate telemetry below) const configBeforeRemoval = readScopedHooksConfig(configScope, cwd); - // Remove all failproofai hooks from Claude Code settings - const scopesToRemove: HookScope[] = scope === "all" ? [...HOOK_SCOPES] : [scope]; + if (scope !== "all") { + assertSupportedScope(integ, scope); + } + + // Remove all failproofai hooks from the selected integration's settings + const scopesToRemove = scope === "all" ? [...integ.scopes] : [scope]; let totalRemoved = 0; for (const s of scopesToRemove) { - const settingsPath = getSettingsPath(s, cwd); + const settingsPath = integ.getSettingsPath(s as any, cwd); + if (!existsSync(settingsPath)) continue; - if (!existsSync(settingsPath)) { - if (scope !== "all") { - console.log("No settings file found. Nothing to remove."); - return; - } - continue; - } - - const settings = readSettings(settingsPath); - - if (!settings.hooks) { - if (scope !== "all") { - console.log("No hooks found in settings. Nothing to remove."); - return; - } - continue; - } - - const removed = removeHooksFromSettingsFile(settingsPath); + const removed = integ.removeHooksFromFile(settingsPath); totalRemoved += removed; if (scope !== "all") { @@ -459,16 +377,14 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al if (scope === "all") { console.log(`Removed ${totalRemoved} failproofai hook(s) from all scopes.`); - for (const s of scopesToRemove) { - console.log(` ${s}: ${getSettingsPath(s, cwd)}`); - } } - // Telemetry: track full hook removal from settings + // Telemetry try { const distinctId = getInstanceId(); await trackHookEvent(distinctId, "hooks_removed", { scope, + integration: integ.id, removal_mode: "hooks", policies_removed: configBeforeRemoval.enabledPolicies, removed_count: totalRemoved, @@ -478,58 +394,40 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al os_release: release(), hostname_hash: hashToId(hostname()), }); - } catch { - // Telemetry is best-effort — never block the operation - } + } catch { /* best effort */ } // Clear policy config when removing from all scopes, or when no hooks remain in any scope if (scope === "all") { - // Clear config across all three scopes - for (const s of HOOK_SCOPES) { - const existing = readScopedHooksConfig(s, cwd); - if (existing.enabledPolicies.length > 0 || existing.customPoliciesPath || existing.policyParams) { - const { customPoliciesPath: _drop, policyParams: _dropParams, ...rest } = existing; - writeScopedHooksConfig({ ...rest, enabledPolicies: [] }, s, cwd); + for (const s of integ.scopes) { + if (s === "repo") continue; + const existing = readScopedHooksConfig(s as HookScope, cwd); + if (existing.enabledPolicies.length > 0) { + writeScopedHooksConfig({ ...existing, enabledPolicies: [] }, s as HookScope, cwd); } } - } else if (!HOOK_SCOPES.some((s) => hooksInstalledInSettings(s, cwd))) { - const existing = readScopedHooksConfig(configScope, cwd); - const { customPoliciesPath: _drop, policyParams: _dropParams, ...rest } = existing; - writeScopedHooksConfig({ ...rest, enabledPolicies: [] }, configScope, cwd); + } else if (!integ.scopes.some((s) => integ.hooksInstalledInSettings(s as any, cwd))) { + writeScopedHooksConfig({ ...configBeforeRemoval, enabledPolicies: [] }, configScope, cwd); } } -/** - * List all available policies with their per-scope enabled status. - * Layout adapts to the number of installed scopes: - * 0 scopes: compact "not installed" summary - * 1 scope: table with header + checkmarks, beta policies in a separate section - * 2+ scopes: column table with per-scope status, beta policies in a separate section - * - * Also shows: - * - Configured policyParams values beneath each policy - * - Warnings for unknown policyParams keys - * - Custom Hooks section if customPoliciesPath is set - */ -export async function listHooks(cwd?: string): Promise { +export async function listHooks( + cwd?: string, + integration: IntegrationType = "claude-code", +): Promise { + const integ = getIntegration(integration); + // Multi-scope config is merged for listing const config = readMergedHooksConfig(cwd); const enabledSet = new Set(config.enabledPolicies); - // Determine which scopes have hooks installed (deduplicate when paths overlap, e.g. cwd === home) - const uniqueScopes = deduplicateScopes(HOOK_SCOPES, cwd); - const installedScopes = uniqueScopes.filter((s) => hooksInstalledInSettings(s, cwd)); + const uniqueScopes = deduplicateScopes(integ, integ.scopes, cwd); + const installedScopes = uniqueScopes.filter((s) => integ.hooksInstalledInSettings(s as any, cwd)); - // Separate beta from regular policies const regularPolicies = BUILTIN_POLICIES.filter((p) => !p.beta); const betaPolicies = BUILTIN_POLICIES.filter((p) => p.beta); - // Dynamic name column width based on longest policy name const nameColWidth = Math.max(...BUILTIN_POLICIES.map((p) => p.name.length)) + 2; - - // All known builtin policy names (for unknown policyParams key detection) const builtinPolicyNames = new Set(BUILTIN_POLICIES.map((p) => p.name)); - // Helper: print params summary lines beneath a policy row const printParamsSummary = (policyName: string, indent: string) => { const params = config.policyParams?.[policyName]; if (!params) return; @@ -538,99 +436,82 @@ export async function listHooks(cwd?: string): Promise { } }; - const statusCol = 8; - const printSimpleRow = (policy: { name: string; description: string }) => { - const mark = enabledSet.has(policy.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - console.log(` ${mark}${" ".repeat(statusCol - 1)}${policy.name.padEnd(nameColWidth)}${policy.description}`); - printParamsSummary(policy.name, ` ${" ".repeat(statusCol)}`); - }; - const printBetaSection = (printRow: (p: { name: string; description: string }) => void) => { - if (betaPolicies.length > 0) { - console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); - for (const policy of betaPolicies) printRow(policy); - } - }; + const statusCol = installedScopes.length > 1 ? installedScopes.length * 9 : 8; if (installedScopes.length === 0) { - // State A: No hooks installed — show table with configured state + descriptions - console.log("\nFailproof AI Policies \u2014 not installed\n"); - - console.log(` ${"Status".padEnd(statusCol)}${"Name".padEnd(nameColWidth)}Description`); + console.log(`\nFailproof AI Policies \u2014 not installed (${integ.displayName})\n`); + console.log(` ${"Status".padEnd(8)}${"Name".padEnd(nameColWidth)}Description`); console.log(` ${"\u2500".repeat(6)} ${"\u2500".repeat(nameColWidth - 2)} ${"\u2500".repeat(38)}`); - for (const policy of regularPolicies) printSimpleRow(policy); - printBetaSection(printSimpleRow); + for (const p of regularPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); + printParamsSummary(p.name, " "); + } - if (config.enabledPolicies.length > 0) { - console.log("\n Policies not installed. Run `failproofai policies --install` to activate."); - } else { - console.log("\n Run `failproofai policies --install` to get started."); + if (betaPolicies.length > 0) { + console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); + for (const p of betaPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); + printParamsSummary(p.name, " "); + } } - console.log(" Config: ~/.failproofai/policies-config.json\n"); + console.log("\n Run `failproofai policies --install` to get started."); } else if (installedScopes.length === 1) { - // State B: Single scope — table with header row const scope = installedScopes[0]; console.log(`\nFailproof AI Hook Policies (${scope})\n`); - - console.log(` ${"Status".padEnd(statusCol)}${"Name".padEnd(nameColWidth)}Description`); + console.log(` ${"Status".padEnd(8)}${"Name".padEnd(nameColWidth)}Description`); console.log(` ${"\u2500".repeat(6)} ${"\u2500".repeat(nameColWidth - 2)} ${"\u2500".repeat(38)}`); - for (const policy of regularPolicies) printSimpleRow(policy); - printBetaSection(printSimpleRow); - - console.log("\n Config: ~/.failproofai/policies-config.json\n"); + for (const p of regularPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); + printParamsSummary(p.name, " "); + } + if (betaPolicies.length > 0) { + console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); + for (const p of betaPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); + printParamsSummary(p.name, " "); + } + } } else { - // State C: Multiple scopes — column table const COL = 9; - const scopeLabelMap: Record = { - user: "User", - project: "Project", - local: "Local", - }; - - console.log("\nFailproof AI Hook Policies\n"); + const formatScopeName = (s: string) => `${s[0].toUpperCase()}${s.slice(1)}`; + console.log(`\nFailproof AI Hook Policies (${integ.displayName})\n`); - // Header with only installed scope columns + separator - const buildScopePrefix = () => { - let s = " "; - for (const sc of installedScopes) s += scopeLabelMap[sc].padEnd(COL); - return s; - }; - const scopeHeaderWidth = installedScopes.length * COL; - console.log(`${buildScopePrefix()}${"Name".padEnd(nameColWidth)}Description`); - console.log(` ${"\u2500".repeat(scopeHeaderWidth)}${"\u2500".repeat(nameColWidth)}${"\u2500".repeat(38)}`); + let header = " "; + for (const s of installedScopes) header += formatScopeName(s).padEnd(COL); + header += "Name".padEnd(nameColWidth) + "Description"; + console.log(header); + console.log(` ${"\u2500".repeat(installedScopes.length * COL)}${"\u2500".repeat(nameColWidth)}${"\u2500".repeat(38)}`); - const printMultiScopeRow = (policy: { name: string; description: string }) => { - const enabled = enabledSet.has(policy.name); + const printRow = (p: { name: string; description: string }) => { let row = " "; - for (const _scope of installedScopes) { - if (enabled) { - row += `\x1B[32m\u2713 ON\x1B[0m` + " ".repeat(COL - 4); - } else { - row += " OFF" + " ".repeat(COL - 5); - } + const enabled = enabledSet.has(p.name); + for (const _s of installedScopes) { + row += enabled ? `\x1B[32m\u2713 ON\x1B[0m`.padEnd(COL + 9) : ` OFF`.padEnd(COL); } - row += policy.name.padEnd(nameColWidth) + policy.description; + row += p.name.padEnd(nameColWidth) + p.description; console.log(row); - printParamsSummary(policy.name, ` ${" ".repeat(scopeHeaderWidth)}`); + printParamsSummary(p.name, " ".repeat(2 + installedScopes.length * COL)); }; - for (const policy of regularPolicies) printMultiScopeRow(policy); - + for (const p of regularPolicies) printRow(p); if (betaPolicies.length > 0) { console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); - for (const policy of betaPolicies) printMultiScopeRow(policy); + for (const p of betaPolicies) printRow(p); } - - console.log("\n Config: ~/.failproofai/policies-config.json"); - - // Multi-scope warning - const scopeNames = installedScopes.join(", "); - console.log(); - console.log(`\x1B[33m\u26A0 Hooks in multiple scopes (${scopeNames}).\x1B[0m`); - console.log(" Consider keeping one. Remove with: failproofai policies --uninstall --scope \n"); } + // Config path hint + const primaryScope = installedScopes.length > 0 ? installedScopes[0] : "user"; + const configPath = getConfigPathForScope(primaryScope as HookScope, cwd); + console.log(`\n Settings: ${integ.getSettingsPath(primaryScope as any, cwd)}`); + console.log(` Config: ${configPath}\n`); + // Warn about unknown policyParams keys if (config.policyParams) { for (const key of Object.keys(config.policyParams)) { @@ -640,7 +521,6 @@ export async function listHooks(cwd?: string): Promise { } } - // Custom Policies section if (config.customPoliciesPath) { console.log(`\n \u2500\u2500 Custom Policies (${config.customPoliciesPath}) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`); if (!existsSync(config.customPoliciesPath)) { @@ -650,9 +530,8 @@ export async function listHooks(cwd?: string): Promise { if (hooks.length === 0) { console.log(` \x1B[31m\u2717 ERR failed to load (check ~/.failproofai/logs/hooks.log)\x1B[0m`); } else { - const descColWidth = nameColWidth; - for (const hook of hooks) { - console.log(` \x1B[32m\u2713\x1B[0m ${hook.name.padEnd(descColWidth)}${hook.description ?? ""}`); + for (const h of hooks) { + console.log(` \x1B[32m\u2713\x1B[0m ${h.name.padEnd(nameColWidth)}${h.description ?? ""}`); } } } diff --git a/src/hooks/policy-evaluator.ts b/src/hooks/policy-evaluator.ts index 893862ad..8eff82ed 100644 --- a/src/hooks/policy-evaluator.ts +++ b/src/hooks/policy-evaluator.ts @@ -45,7 +45,14 @@ export async function evaluatePolicies( hookLogInfo(`evaluating ${policies.length} policies for ${eventType}`); if (policies.length === 0) { - return { exitCode: 0, stdout: "", stderr: "", policyName: null, reason: null, decision: "allow" }; + return { + exitCode: 0, + stdout: session?.integration === "cursor" ? JSON.stringify({ continue: true, permission: "allow" }) : "", + stderr: "", + policyName: null, + reason: null, + decision: "allow", + }; } const baseCtx: PolicyContext = { @@ -96,13 +103,19 @@ export async function evaluatePolicies( const displayTool = ctx.toolName ?? "unknown tool"; if (eventType === "PreToolUse") { - const response = { + const response: any = { hookSpecificOutput: { hookEventName: eventType, permissionDecision: "deny", permissionDecisionReason: `Blocked ${displayTool} by failproofai because: ${reason}, as per the policy configured by the user`, }, }; + if (session?.integration === "cursor") { + response.continue = false; + response.permission = "deny"; + response.userMessage = response.hookSpecificOutput.permissionDecisionReason; + response.agentMessage = `Action blocked by security policy: ${reason}`; + } return { exitCode: 0, stdout: JSON.stringify(response), @@ -114,12 +127,15 @@ export async function evaluatePolicies( } if (eventType === "PostToolUse") { - const response = { + const response: any = { hookSpecificOutput: { hookEventName: eventType, additionalContext: `Blocked ${displayTool} by failproofai because: ${reason}, as per the policy configured by the user`, }, }; + if (session?.integration === "cursor") { + response.agentMessage = response.hookSpecificOutput.additionalContext; + } return { exitCode: 0, stdout: JSON.stringify(response), @@ -143,8 +159,8 @@ export async function evaluatePolicies( // Other event types: exit 2 return { - exitCode: 2, - stdout: "", + exitCode: session?.integration === "cursor" ? 0 : 2, + stdout: session?.integration === "cursor" ? JSON.stringify({ continue: false, permission: "deny", userMessage: reason }) : "", stderr: reason, policyName: policy.name, reason, @@ -190,12 +206,15 @@ export async function evaluatePolicies( }; } - const response = { + const response: any = { hookSpecificOutput: { hookEventName: eventType, additionalContext: `Instruction from failproofai: ${combined}`, }, }; + if (session?.integration === "cursor") { + response.agentMessage = response.hookSpecificOutput.additionalContext; + } return { exitCode: 0, stdout: JSON.stringify(response), @@ -212,13 +231,23 @@ export async function evaluatePolicies( const combined = allowEntries.map((e) => e.reason).join("\n"); const policyNames = allowEntries.map((e) => e.policyName); const supportsHookSpecificOutput = eventType === "PreToolUse" || eventType === "PostToolUse" || eventType === "UserPromptSubmit"; - const response = supportsHookSpecificOutput + const response: any = supportsHookSpecificOutput ? { hookSpecificOutput: { hookEventName: eventType, additionalContext: `Note from failproofai: ${combined}` } } : { reason: combined }; + if (session?.integration === "cursor" && supportsHookSpecificOutput) { + response.agentMessage = response.hookSpecificOutput.additionalContext; + } const stderrMsg = allowEntries .map((e) => `[failproofai] ${e.policyName}: ${e.reason}`) .join("\n"); return { exitCode: 0, stdout: JSON.stringify(response), stderr: stderrMsg + "\n", policyName: policyNames[0], policyNames, reason: combined, decision: "allow" }; } - return { exitCode: 0, stdout: "", stderr: "", policyName: null, reason: null, decision: "allow" }; + return { + exitCode: 0, + stdout: session?.integration === "cursor" ? JSON.stringify({ continue: true, permission: "allow" }) : "", + stderr: "", + policyName: null, + reason: null, + decision: "allow", + }; } diff --git a/src/hooks/types.ts b/src/hooks/types.ts index 9adbe409..c6941ed3 100644 --- a/src/hooks/types.ts +++ b/src/hooks/types.ts @@ -1,10 +1,15 @@ /** - * Constants and interfaces for Claude Code hooks integration. + * Constants and interfaces for hook integrations. */ export const HOOK_SCOPES = ["user", "project", "local"] as const; export type HookScope = (typeof HOOK_SCOPES)[number]; +export const INTEGRATION_TYPES = ["claude-code", "cursor"] as const; +export type IntegrationType = (typeof INTEGRATION_TYPES)[number]; + +export const CURSOR_HOOK_SCOPES = ["user", "project"] as const; + export const HOOK_EVENT_TYPES = [ "SessionStart", "SessionEnd", @@ -36,6 +41,58 @@ export const HOOK_EVENT_TYPES = [ export type HookEventType = (typeof HOOK_EVENT_TYPES)[number]; +export const CURSOR_HOOK_EVENT_TYPES = [ + "preToolUse", + "postToolUse", + "postToolUseFailure", + "sessionStart", + "sessionEnd", + "subagentStart", + "subagentStop", + "stop", + "preCompact", + "beforeShellExecution", + "afterShellExecution", + "beforeMCPExecution", + "afterMCPExecution", + "beforeReadFile", + "afterFileEdit", + "beforeSubmitPrompt", + "afterAgentResponse", + "afterAgentThought", + "beforeTabFileRead", + "afterTabFileEdit", +] as const; + +export type CursorHookEventType = (typeof CURSOR_HOOK_EVENT_TYPES)[number]; + +/** + * Maps Cursor camelCase event names to internal PascalCase event names + * used by the --hook CLI flag and policy matcher. + */ +export const CURSOR_EVENT_MAP: Record = { + preToolUse: "PreToolUse", + postToolUse: "PostToolUse", + postToolUseFailure: "PostToolUseFailure", + sessionStart: "SessionStart", + sessionEnd: "SessionEnd", + subagentStart: "SubagentStart", + subagentStop: "SubagentStop", + stop: "Stop", + preCompact: "PreCompact", + beforeShellExecution: "PreToolUse", + afterShellExecution: "PostToolUse", + beforeMCPExecution: "PreToolUse", + afterMCPExecution: "PostToolUse", + beforeReadFile: "PreToolUse", + afterFileEdit: "PostToolUse", + beforeSubmitPrompt: "UserPromptSubmit", + afterAgentResponse: "PostToolUse", + afterAgentThought: "PostToolUse", + beforeTabFileRead: "PreToolUse", + afterTabFileEdit: "PostToolUse", +}; + export const FAILPROOFAI_HOOK_MARKER = "__failproofai_hook__" as const; export interface ClaudeHookEntry { @@ -46,6 +103,7 @@ export interface ClaudeHookEntry { } export interface ClaudeHookMatcher { + matcher?: string; hooks: Array>; } @@ -55,9 +113,23 @@ export interface SessionMetadata { cwd?: string; permissionMode?: string; hookEventName?: string; + integration?: IntegrationType; } export interface ClaudeSettings { hooks?: Record; [key: string]: unknown; } + +export interface CursorHookEntry { + command: string; + timeout?: number; + matcher?: string; + failClosed?: boolean; +} + +export interface CursorHooksFile { + version?: number; + hooks?: Record; + [key: string]: unknown; +} From 9731a4910dc00d6ffc79e5689927cc911393fda9 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Mon, 13 Apr 2026 17:37:35 +0000 Subject: [PATCH 03/34] chore: suppress hydration warnings on UI buttons --- app/components/refresh-button.tsx | 2 ++ app/policies/hooks-client.tsx | 2 ++ 2 files changed, 4 insertions(+) diff --git a/app/components/refresh-button.tsx b/app/components/refresh-button.tsx index 3e79dee0..772cff71 100644 --- a/app/components/refresh-button.tsx +++ b/app/components/refresh-button.tsx @@ -42,6 +42,7 @@ export function RefreshButton({ className }: RefreshButtonProps) { )} >