From 2c9b5c2c3e3c4f4ffde406d7e3ed077ab2bb5465 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Tue, 21 Apr 2026 11:31:02 +0000 Subject: [PATCH 01/47] restore: add back integration test files and documentation --- INTEGRATION_EXECUTION_GUIDE.md | 885 ++++++++++++++++++ INTEGRATION_PLAN.md | 267 ++++++ __tests__/INTEGRATION_TEST_CASES.md | 869 +++++++++++++++++ .../e2e/hooks/copilot-integration.e2e.test.ts | 241 +++++ .../e2e/hooks/cursor-integration.e2e.test.ts | 178 ++++ 5 files changed, 2440 insertions(+) create mode 100644 INTEGRATION_EXECUTION_GUIDE.md create mode 100644 INTEGRATION_PLAN.md create mode 100644 __tests__/INTEGRATION_TEST_CASES.md create mode 100644 __tests__/e2e/hooks/copilot-integration.e2e.test.ts create mode 100644 __tests__/e2e/hooks/cursor-integration.e2e.test.ts diff --git a/INTEGRATION_EXECUTION_GUIDE.md b/INTEGRATION_EXECUTION_GUIDE.md new file mode 100644 index 00000000..32ba3039 --- /dev/null +++ b/INTEGRATION_EXECUTION_GUIDE.md @@ -0,0 +1,885 @@ +# Integration Execution Guide + +> Snapshot for branch `feat/cursor-integration`. +> +> This guide is grounded in the current source and test surface in this branch, not in wishful architecture. +> +> Refresh this file whenever integration source, integration tests, or integration fixtures materially change. +> +> Primary truth sources for this document: +> `src/hooks/types.ts`, `src/hooks/integrations.ts`, `src/hooks/handler.ts`, +> `__tests__/hooks/integrations.test.ts`, `__tests__/hooks/handler.test.ts`, +> `__tests__/e2e/helpers/payloads.ts`, +> `__tests__/e2e/hooks/*.test.ts`, +> `__tests__/INTEGRATION_TEST_CASES.md`. + +--- + +## 1. Purpose And How To Use This Guide + +**Simple View** + +This file is the main playbook for non-Claude integrations in this repo: +Cursor, Gemini, GitHub Copilot, Codex, OpenCode, and Pi. + +Use it when you need to answer four questions: + +1. What already exists in this branch? +2. What is still missing? +3. Which regressions matter most? +4. What tests should be written next? + +If you are new to the repo, read this file top to bottom once. +If you are doing implementation work, jump in this order: + +1. Current Branch Truth Snapshot +2. Cross-Integration Pending Matrix +3. The playbook for your integration +4. How To Convert A Pending Row Into Tests + +**Expert View** + +This file is not a replacement for `__tests__/INTEGRATION_TEST_CASES.md`. + +Use this guide for: + +- current branch truth +- prioritization +- gap tracking +- next-test planning +- regression awareness + +Use `__tests__/INTEGRATION_TEST_CASES.md` for: + +- the deeper edge-case contract +- long-form assertions +- named regression references +- exhaustive future test ideas + +If this guide and the source code disagree, trust the source code first and update this guide second. + +### Important Words In Simple Language + +| Term | Meaning | +|---|---| +| Integration | One external agent or tool that failproofai connects to, such as Cursor or Copilot | +| Native event name | The exact event name used by that tool, such as `preToolUse` or `pre_tool_use` | +| Canonical event name | The shared internal failproofai event name, such as `PreToolUse` | +| Payload | The JSON data sent into the hook handler | +| Normalize | Convert different payload styles into one common internal shape | +| Regression test | A test for a bug that already happened once and must not return | +| Gap table | A truth table that says what is implemented, what is tested, and what is still missing | + +### Where To Look In This Repo + +| File Or Area | Why It Matters | +|---|---| +| `src/hooks/types.ts` | integration ids, native event lists, native-to-canonical maps | +| `src/hooks/integrations.ts` | install, uninstall, detect, normalize, command generation, helper logic | +| `src/hooks/handler.ts` | stdin parsing, attribution, session extraction, persistence, transcript logic | +| `src/hooks/manager.ts` | CLI install and uninstall flow | +| `__tests__/hooks/integrations.test.ts` | unit tests for per-integration object behavior | +| `__tests__/hooks/handler.test.ts` | unit tests for runtime and handler behavior | +| `__tests__/e2e/helpers/payloads.ts` | reusable event payload builders | +| `__tests__/e2e/hooks/*.test.ts` | true end-to-end integration flows | +| `__tests__/INTEGRATION_TEST_CASES.md` | deeper contract and regression checklist | +| `AGENTS.md` | repo rules for testing, Docker smoke tests, CI, and branch hygiene | + +--- + +## 2. What “Done” Means For Any Integration + +**Simple View** + +An integration is not done just because one event appears in the dashboard. + +An integration is done only when all of these are true: + +- install works +- uninstall works +- reinstall does not duplicate hooks +- native events fire correctly +- event names map to the right canonical names +- payload data is normalized correctly +- session id is extracted correctly +- dashboard shows the right integration and the right session +- policies still allow, deny, or instruct correctly +- important regressions have named tests +- the relevant tests pass + +**Expert View** + +Minimum done-bar for any integration: + +- source registration exists in `types.ts` and `integrations.ts` +- install and uninstall behavior is covered +- integration identity is reliable with and without `--integration` +- session id fallback is safe +- persistence fields are correct +- one broken branch does not silently relabel data as another integration +- regression-prone behavior has dedicated tests, not only incidental coverage + +Things that are not enough: + +- “events show in dashboard” +- “manual testing looked okay once” +- “unit section exists” +- “one e2e test passes” + +Done means stable, attributable, test-backed behavior. + +### One Big Rule + +Do one integration fully before moving to the next one. + +Bad pattern: + +- Gemini half done +- Cursor half done +- Copilot half done +- Codex half done + +Good pattern: + +1. Pick one integration +2. Make it stable +3. Add regression tests +4. Only then move to the next one + +This is the fastest safe way to work on this repo. + +--- + +## 3. Current Branch Truth Snapshot + +**Simple View** + +This branch already has real implementations for all 6 non-Claude integrations. +The problem is not “nothing exists.” +The problem is that the code is ahead of the tests. + +Today: + +- Cursor, Gemini, and Copilot have the strongest test surface +- Codex and OpenCode have unit coverage but no dedicated e2e file +- Pi has source code, but the weakest visible test surface +- handler-level integration coverage is shallow across the branch + +**Expert View** + +Status meanings used below: + +- `Yes`: a dedicated surface exists +- `Shallow`: only light or indirect coverage exists +- `No`: no dedicated surface was found + +| Integration | Source Implemented | Unit Coverage | Handler Coverage | E2E Coverage | Payload Fixtures | Highest-Risk Pending Area | +|---|---|---|---|---|---|---| +| Cursor | Yes | Yes | Shallow | Yes | Yes | Twin-fire dedup, cwd/workspace attribution, MCP deep cases | +| Gemini | Yes | Yes | Shallow | Yes | Yes | Deep extraction, attribution without flag, transcript and dashboard deep cases | +| Copilot | Yes | Yes | Shallow | Yes | Yes | `toolArgs` parsing, sync/snap branches, session-id and dashboard regressions, silence guard | +| Codex | Yes | Yes | Shallow | No dedicated file | No dedicated section | Snake_case identity, handler mapping, trace-related behavior | +| OpenCode | Yes | Yes | Shallow | No dedicated file | No dedicated section | Plugin blocking, stderr silence, session persistence | +| Pi | Yes | No dedicated section found | Shallow | No dedicated file | No dedicated section | Extension session handling, UI feedback, recursive isolation | + +Global handler note: +`__tests__/hooks/handler.test.ts` currently does not provide deep, integration-specific coverage across these six integrations. +Treat handler behavior as an active gap area unless a dedicated assertion is clearly present. + +Current file-evidence summary: + +- Dedicated e2e files exist today for `cursor`, `gemini`, and `copilot` +- Dedicated payload helper sections exist today for `CursorPayloads`, `GeminiPayloads`, and `CopilotPayloads` +- Dedicated `integrations.test.ts` sections exist today for `cursor`, `gemini`, `copilot`, `codex`, and `opencode` +- No dedicated `integrations.test.ts` section was found for `pi` + +Confirmed missing surfaces discussed in planning: + +- Codex has source and unit coverage, but still has no dedicated e2e lane +- OpenCode has source and unit coverage, but still has no dedicated e2e lane +- Pi is the weakest current surface: no dedicated unit section, no dedicated e2e lane, no dedicated payload helpers +- Codex, OpenCode, and Pi still do not have dedicated payload-helper sections in `__tests__/e2e/helpers/payloads.ts` +- integration-specific handler coverage is shallow across the whole branch +- persistence, dashboard-field, transcript-path, and virtual-mirror assertions are still weak or missing across several integrations +- cross-version compatibility remains mostly unproven +- scope interactions and dedup behavior still need stronger regression coverage + +--- + +## 4. Cross-Integration Pending Matrix + +**Simple View** + +Most of the missing work is not in “writing integration source from zero.” +Most of the missing work is in proving the source with the right tests. + +Use this matrix when you want to decide where to work next by layer instead of by integration. + +**Expert View** + +Status meanings: + +- `Partially Covered`: dedicated tests exist, but deep checklist coverage is still missing +- `Weakly Covered`: some direct or incidental coverage exists, but the layer is not reliable yet +- `Largely Missing`: the layer has little or no dependable test surface + +| Layer | Current Status | What Matters | What Still Needs Tests | +|---|---|---|---| +| Install / uninstall | Partially covered | Hooks must install in the right file, preserve user config, stay idempotent, and uninstall cleanly | Stronger idempotence, byte-preservation, no-project-file, and cross-scope safety checks across all integrations | +| Command format and binary resolution | Partially covered | Native event names and command shape are part of integration identity | More assertions for native event casing, `FAILPROOFAI_DIST_PATH`, quoting, platform path behavior, and older-handler compatibility | +| Event firing reality | Partially covered | A supported event list is useless if real native events do not reach the handler correctly | More event-by-event coverage, especially for Codex, OpenCode, and Pi, plus empty-stdin and block-path behavior | +| Canonical event mapping | Partially covered | Native event names must map to the same internal event language used by policies and the dashboard | More regression tests for Copilot camelCase, Codex snake_case, Gemini PascalCase, and unknown-event fallback | +| Payload normalization | Weakly covered | Policy logic depends on normalized tool name, tool input, cwd, and session fields | More deep-shape, malformed-value, nested-data, null-handling, and stringified-JSON tests | +| Detection and attribution | Weakly covered | The handler must know which integration a payload belongs to, even when signals conflict | More explicit precedence tests for `--integration`, `payload.integration`, unique event names, and negative detect samples | +| Session ID extraction | Weakly covered | Wrong or blank session ids break grouping, persistence, and dashboard navigation | More empty-stdin, env fallback, nested session-field, and same-session-across-events coverage | +| Policy evaluation | Partially covered | After normalization, allow, deny, and instruct must still behave correctly per integration protocol | More protocol-specific decision-format tests, non-git stop behavior, and normalized command parsing coverage | +| Deduplication | Weakly covered | Two hooks must not double-log the same event, but real distinct events must still be recorded | More lifecycle-window, cross-scope, same-command, and integration-in-fingerprint coverage | +| Persistence / dashboard fields | Largely missing | The stored record is what the dashboard actually renders | More checks for integration label, session id, raw hook name, canonical event name, stats, and decision fields | +| Transcript and virtual mirror behavior | Largely missing | Non-Claude sessions must still connect to transcript paths and mirrored project views | More tests for transcript derivation, mirror paths, and dashboard session-detail expectations | +| Scope interactions | Weakly covered | User, project, and local installs change real runtime behavior and duplication risk | More multi-scope install, precedence, dev-dist, and dedup interaction tests | +| Cross-version compatibility | Largely missing | Project-scope installs and older published handlers must still attribute events correctly | More tests around native event self-identification, `npx -y failproofai`, and old-handler fallback behavior | + +Interpretation: + +- The branch is strongest in install basics, basic mapping, and core policy plumbing +- The branch is weakest in handler attribution depth, session behavior, persistence, transcripts, mirrors, and cross-version safety + +--- + +## 5. Per-Integration Playbooks + +### Cursor + +**Simple View** + +Cursor is an IDE-style integration. +Its main challenge is that hooks can fire from more than one scope, and the payload often describes workspace roots instead of the exact working directory you care about. + +This means Cursor work is less about “does it run?” and more about “does it attribute the event correctly and avoid duplicate behavior?” + +**Expert View** + +**What makes this integration different** + +- Cursor-native hook formats and event names +- IDE-style behavior with user and project hooks both capable of firing +- `--stdin` is part of the command contract +- workspace roots are often the first cwd signal +- MCP events must map correctly to canonical tool events + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- dedicated `cursor-integration.e2e.test.ts` exists +- dedicated `CursorPayloads` helper exists +- handler coverage for Cursor-specific attribution and dedup behavior is still shallow + +**Known regression risks** + +- twin-fire dedup across user and project scope +- cwd lifted from `workspace_roots[0]` +- more specific subfolder cwd overriding workspace root when tool input contains it +- MCP event mapping into `PreToolUse` and `PostToolUse` +- non-Claude policy behavior staying correct under Cursor protocol + +**Tests that must exist** + +- unit tests for detection, event mapping, settings-path shape, and command generation +- handler tests for attribution precedence, session fallback, dedup, and cwd override behavior +- e2e tests for real deny, allow, install, uninstall, and protocol-compliant decision handling +- payload fixtures that cover shell, file, MCP, and subfolder-cwd cases + +**What is still pending right now** + +- deeper handler attribution coverage +- stronger dedup regression tests +- more persistence and dashboard-field assertions +- mirror and transcript-related coverage +- more MCP deep-case coverage +- fuller event-reality coverage for Cursor-native events such as shell, file, and MCP paths +- stronger scope-interaction coverage for user plus project hook coexistence + +**Exact next work order** + +1. Extend fixtures only where current Cursor payloads are still too shallow +2. Fill unit gaps in `__tests__/hooks/integrations.test.ts` +3. Add Cursor-specific handler tests in `__tests__/hooks/handler.test.ts` +4. Add only the highest-value new e2e flows after the handler gaps are proven +5. Fix source only after a failing test shows the exact break + +### Gemini + +**Simple View** + +Gemini is the deep-data integration. +Its danger is not only event identity. +Its danger is that useful values can be buried in nested payload shapes. + +If Gemini work is done badly, policies still run, but they run on the wrong extracted data. + +**Expert View** + +**What makes this integration different** + +- Gemini-native PascalCase event names +- deeply nested payload shapes +- deep extraction from fields like `parts`, `arguments`, and `call.method` +- transcript paths derived into Gemini-specific chat storage + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- dedicated `gemini-integration.e2e.test.ts` exists +- dedicated `GeminiPayloads` helper exists +- handler coverage for Gemini-specific attribution and session behavior is still shallow + +**Known regression risks** + +- deep extraction from nested data +- PascalCase identity guard when `--integration` is missing +- transcript-path resolution for dashboard use +- wrong or partial normalization of text, args, or tool name +- fallback to Claude identity when only Gemini-native event naming should decide + +**Tests that must exist** + +- unit tests for deep extraction, detection, event mapping, and settings path +- handler tests for attribution without explicit flag, session handling, and transcript derivation +- e2e tests for deny and allow flows plus richer native-event coverage +- payload fixtures that cover nested method calls, `parts`, `arguments`, and odd-shaped values + +**What is still pending right now** + +- richer deep fixtures +- more attribution-without-flag tests +- deeper persistence, transcript, and dashboard assertions +- stronger session-fallback coverage +- more complete event-by-event native-shape coverage +- more realistic nested payload coverage for `parts`, `arguments`, and `call.method` +- stronger transcript-path and mirror-path regression coverage + +**Exact next work order** + +1. Deep fixtures first +2. Unit normalization and detection tests second +3. Handler attribution, transcript, and session tests third +4. E2E additions last +5. Fix source only after a failing test proves the branch that broke + +### Copilot + +**Simple View** + +Copilot is the most branch-sensitive integration right now. +It has real code and real tests, but it also has the heaviest history of regressions. + +Its biggest dangers are: + +- being mislabeled as Claude +- losing the session id in the dashboard +- parsing `toolArgs` incorrectly +- sync and snap behavior quietly damaging the install + +**Expert View** + +**What makes this integration different** + +- camelCase native event names +- settings surface at `~/.copilot/config.json` +- sync engine that merges project hooks into the user config +- snap revision repair behavior +- stringified JSON normalization through fields like `toolArgs` + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- dedicated `copilot-integration.e2e.test.ts` exists +- dedicated `CopilotPayloads` helper exists +- Copilot utility coverage exists for sync helpers +- handler coverage for deeper Copilot attribution, session fallback, persistence, and silence-guard behavior is still shallow + +**Known regression risks** + +- Copilot events labeled as Claude +- blank session id on the dashboard +- user-scope hooks wiped by `synchronizeCopilotProjectHooks` +- malformed or stringified `toolArgs` +- silence guard for legacy wrong-Claude hook firings +- snap revision hook-path behavior +- heuristic detection when explicit integration metadata is missing + +**Tests that must exist** + +- unit tests for sync helpers, event mapping, native command shape, `toolArgs` parsing, and detect logic +- handler tests for session fallback, persistence labeling, silence guard, env recovery, and transcript path derivation +- e2e tests for allow, deny, install, uninstall, sync safety, and regression-heavy payload shapes +- payload fixtures that cover good JSON, bad JSON, nested data, empty input, and env fallback cases + +**What is still pending right now** + +- malformed `toolArgs` handling tests +- env fallback and session synthesis tests +- stronger persistence assertions for integration label and session id +- snap and sync branch coverage +- deeper silence-guard and heuristic-detect coverage +- fuller event-reality coverage for all 8 Copilot native events +- stronger transcript-path derivation coverage for `~/.copilot/session-state//events.jsonl` +- better install-command regression coverage for camelCase native hook names + +**Exact next work order** + +1. Expand Copilot fixtures +2. Fill unit gaps in normalize, detect, and sync helpers +3. Add handler tests for session, attribution, silence guard, and persistence +4. Add targeted e2e regression flows +5. Fix source only after a failing test shows the specific break + +### Codex + +**Simple View** + +Codex is implemented in source, but its proof surface is incomplete. +The biggest current issue is not that Codex has no logic. +The biggest issue is that Codex does not yet have its own dedicated e2e lane or payload helper lane in this branch. + +**Expert View** + +**What makes this integration different** + +- snake_case native events +- handler mapping from snake_case to canonical PascalCase +- legacy CLI compatibility concerns +- trace-related metadata and parsing expectations + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- no dedicated `codex-integration.e2e.test.ts` was found +- no dedicated Codex payload helper section was found in `__tests__/e2e/helpers/payloads.ts` +- handler coverage for Codex identity and session behavior is still shallow + +**Known regression risks** + +- snake_case identity stability +- wrong attribution to another integration when the handler must decide from event naming +- cross-version fallback when older handlers ignore the integration flag +- trace-related behavior and metadata expectations +- past risk of lifecycle events being misattributed + +**Tests that must exist** + +- unit tests for Codex detection, mapping, settings path, and native command shape +- handler tests for attribution, session extraction, fallback, and persistence labeling +- dedicated `codex-integration.e2e.test.ts` +- dedicated Codex payload fixtures for pre-tool, post-tool, session, and stop-like events + +**What is still pending right now** + +- dedicated e2e surface is missing +- dedicated payload helper surface is missing +- deeper handler attribution and session coverage is missing +- persistence, transcript, and cross-version compatibility coverage is weak +- explicit event-reality coverage for `pre_tool_use`, `post_tool_use`, `session_start`, `session_end`, `user_prompt_submitted`, `agent_stop`, and `notification` +- stronger trace-related and old-handler fallback coverage is still missing + +**Exact next work order** + +1. Create Codex fixture shapes first +2. Expand unit coverage second +3. Add Codex-specific handler tests third +4. Add the first dedicated Codex e2e vertical slice last +5. Fix source only when the new failing test proves the gap + +### OpenCode + +**Simple View** + +OpenCode is plugin-shaped, not just config-shaped. +That means the integration is only healthy when the wrapper and the handler both behave correctly. + +Its biggest dangers are: + +- blocking must be honored immediately +- stderr noise can break the plugin protocol +- session state must remain stable across calls + +**Expert View** + +**What makes this integration different** + +- plugin-based wrapper flow +- synchronous CLI blocking behavior +- dotted native event names +- stderr/JSON protocol sensitivity +- session state may be held or forwarded by the plugin wrapper + +**What already exists in this branch** + +- source implementation exists +- dedicated `integrations.test.ts` section exists +- no dedicated `opencode-integration.e2e.test.ts` was found +- no dedicated OpenCode payload helper section was found +- handler coverage for OpenCode-specific silence, session, and persistence behavior is still shallow + +**Known regression risks** + +- wrapper must block correctly when the CLI denies +- stderr leakage can break OpenCode protocol handling +- session persistence across plugin calls +- dotted-event attribution and canonicalization +- session-created versus later tool events staying grouped together + +**Tests that must exist** + +- unit tests for detection, mapping, settings path, and wrapper-related assumptions +- handler tests for attribution, session grouping, silence on success, and persistence fields +- dedicated `opencode-integration.e2e.test.ts` +- dedicated OpenCode payload helpers for session start, tool before, tool after, and chat/message flows + +**What is still pending right now** + +- dedicated e2e surface is missing +- dedicated payload helper surface is missing +- stronger handler silence and session tests are missing +- persistence and dashboard-focused coverage is weak +- explicit event-reality coverage for `session.created`, `session.idle`, `tool.execute.before`, `tool.execute.after`, and `chat.message` is still missing +- stronger wrapper-blocking and stderr-cleanliness regression coverage is still missing + +**Exact next work order** + +1. Add plugin-style payload fixtures first +2. Expand unit behavior coverage second +3. Add handler silence, attribution, and session tests third +4. Add OpenCode e2e blocking and success flows last +5. Fix source only after the failing test identifies the broken branch + +### Pi + +**Simple View** + +Pi is the weakest-tested integration in this branch. +The source exists, but the supporting proof surfaces are thin. + +That makes Pi high risk even before you find a bug. + +**Expert View** + +**What makes this integration different** + +- extension-based wrapper +- session handoff from extension context +- IDE-style user feedback through status UI +- recursive isolation concerns +- inheritance-style keys such as `codex_session_id` + +**What already exists in this branch** + +- source implementation exists +- no dedicated Pi section was found in `__tests__/hooks/integrations.test.ts` +- no dedicated `pi-integration.e2e.test.ts` was found +- no dedicated Pi payload helper section was found +- handler coverage for Pi-specific session, attribution, and status behavior is still shallow + +**Known regression risks** + +- missing or unstable session ids +- deny flow not surfacing status UI feedback +- recursive self-trigger loops +- inherited metadata keys not being honored +- wrong attribution or grouping when the wrapper sends sparse payloads + +**Tests that must exist** + +- first dedicated Pi unit section in `__tests__/hooks/integrations.test.ts` +- handler tests for session extraction, attribution, recursive isolation, and persistence +- dedicated `pi-integration.e2e.test.ts` +- dedicated Pi payload helpers for session start, tool call, tool result, and UI-feedback deny cases + +**What is still pending right now** + +- dedicated unit surface is missing +- dedicated e2e surface is missing +- dedicated payload helper surface is missing +- deeper handler coverage is missing +- persistence and session-label behavior remains weakly proven +- explicit event-reality coverage for `session_start`, `tool_call`, `tool_result`, and `input` is still missing +- UI feedback behavior and recursive-isolation regressions still need dedicated proof +- inherited metadata handling such as `codex_session_id` and `codex_event` still needs direct tests + +**Exact next work order** + +1. Define Pi payload shapes first +2. Add the first dedicated Pi unit section second +3. Add Pi-specific handler tests third +4. Add the first Pi e2e flow last +5. Fix source only after a failing test makes the break concrete + +--- + +## 6. How To Convert A Pending Row Into Tests + +**Simple View** + +Do not fix a pending gap by jumping straight into source code. +First convert the gap into the smallest useful test shape. + +Use this order every time: + +1. fixture first +2. unit second +3. handler third +4. e2e last +5. fix code only after the failing test proves the bug + +**Expert View** + +### Map each kind of missing work to the right file + +| Missing Work Type | Put It Here | What It Should Prove | +|---|---|---| +| Integration object behavior | `__tests__/hooks/integrations.test.ts` | settings paths, event maps, detect logic, command shape, helper utilities | +| Handler and runtime behavior | `__tests__/hooks/handler.test.ts` | attribution precedence, session extraction, fallback logic, persistence fields, silence guard, transcript path | +| Payload builders | `__tests__/e2e/helpers/payloads.ts` | realistic native payload shapes for each integration | +| End-to-end integration flows | `__tests__/e2e/hooks/-integration.e2e.test.ts` | install, run, allow, deny, protocol contract, uninstall, high-value regressions | + +### Biggest missing surfaces right now + +- `codex-integration.e2e.test.ts` does not exist +- `opencode-integration.e2e.test.ts` does not exist +- `pi-integration.e2e.test.ts` does not exist +- Codex payload helpers do not have a dedicated section +- OpenCode payload helpers do not have a dedicated section +- Pi payload helpers do not have a dedicated section +- Pi does not have a dedicated integration unit section +- integration-specific handler coverage is still shallow across the branch +- persistence, dashboard, transcript, and virtual-mirror coverage remains weak or missing across several integrations + +### First missing files and first tests to add later + +| First Missing File Or Area | First High-Value Tests | +|---|---| +| `__tests__/e2e/helpers/payloads.ts` for Codex | snake_case native event payloads, session fallback payloads, stop/session lifecycle payloads | +| `__tests__/e2e/helpers/payloads.ts` for OpenCode | plugin-style session-created and tool-before/tool-after payloads, stderr-sensitive success payloads | +| `__tests__/e2e/helpers/payloads.ts` for Pi | extension session payloads, deny-with-status payloads, recursive-isolation payloads | +| `__tests__/e2e/hooks/codex-integration.e2e.test.ts` | install, one deny flow, one allow flow, old-handler/native-event attribution regression | +| `__tests__/e2e/hooks/opencode-integration.e2e.test.ts` | wrapper blocking, clean success path, silence-on-success regression | +| `__tests__/e2e/hooks/pi-integration.e2e.test.ts` | session propagation, deny feedback, recursive isolation | +| `__tests__/hooks/integrations.test.ts` for Pi | detect logic, settings path, event mapping, payload normalization entry points | +| `__tests__/hooks/handler.test.ts` across all | attribution precedence, session fallback, persistence fields, transcript-path derivation, silence-guard regressions | + +### Practical conversion examples + +If the pending row is “wrong integration label on dashboard”: + +- add or expand handler tests first +- assert persisted `integration` +- then add e2e only if the bug depends on full CLI flow + +If the pending row is “payload shape is weird”: + +- add fixtures first +- add unit normalization tests second +- add handler tests third + +If the pending row is “install or uninstall broke user config”: + +- add unit tests around helper behavior +- add e2e only if file-on-disk flow matters + +If the pending row is “agent did not stop on deny”: + +- add e2e because full protocol behavior matters +- add unit or handler tests only for the branches that explain why it broke + +### How To Build A Gap Table + +Before implementation, write a small truth table for the integration you are touching. + +Use columns like: + +| Check | Status | +|---|---| +| Install works | yes / no | +| Uninstall works | yes / no | +| Events fire | yes / no | +| Session id correct | yes / no | +| Dashboard integration correct | yes / no | +| Policies work | yes / no | +| Unit tests exist | yes / no | +| Handler tests exist | yes / no | +| E2E tests exist | yes / no | + +If you want a richer version, use: + +| Case | Source Exists | Unit Test Exists | Handler Test Exists | E2E Test Exists | Status | +|---|---|---|---|---|---| +| Copilot `toolArgs` JSON parse | yes | yes / no | yes / no | yes / no | green / yellow / red | + +Simple status meanings: + +- `green`: implemented and tested well +- `yellow`: implemented, but weakly tested +- `red`: missing or still risky + +### What A Good Test Looks Like + +Use one test name for one behavior. + +Good examples: + +- `maps errorOccurred to Stop` +- `preserves user scope when no project file` +- `uses COPILOT_SESSION_ID when payload is empty` + +Bad tests usually have these problems: + +- too many unrelated assertions +- too much fixture setup repeated inline +- failure message does not explain the bug + +The best test order in this repo is still: + +1. payload fixture +2. unit test +3. handler test +4. e2e + +### Useful Commands + +These are the most useful commands when doing the real implementation work later: + +```bash +git branch --show-current +bunx vitest run __tests__/hooks/integrations.test.ts +bunx vitest run __tests__/hooks/handler.test.ts +bunx vitest run --config vitest.config.e2e.mts __tests__/e2e/hooks/copilot-integration.e2e.test.ts +bun run test:run +bun run test:e2e +bun run lint +bunx tsc --noEmit +``` + +After non-trivial changes in `src/hooks/` or `package.json`, also run the Docker smoke test from `AGENTS.md`. + +Before pushing, follow the repo rules in `AGENTS.md`: + +```bash +git fetch origin && git log --oneline origin/main ^HEAD +gh pr list --head "$(git branch --show-current)" +gh run list --limit 3 +``` + +### Reusable Pattern For Future Integrations + +For future integrations, reuse this same build order: + +1. add or confirm native event definitions in `src/hooks/types.ts` +2. add or confirm integration behavior in `src/hooks/integrations.ts` +3. confirm handler attribution and session flow in `src/hooks/handler.ts` +4. add payload builders +5. add unit tests +6. add handler tests +7. add the first dedicated e2e file + +That pattern is safer than shipping “mostly working” source without proof. + +--- + +## 7. Named Regression Index + +**Simple View** + +These are not abstract risks. +These are the kinds of bugs that can confuse users, hide events, or make a working integration look broken. + +Every row below should stay tied to a named test or named test area. + +**Expert View** + +| Regression | Affected Integration(s) | Test Name / Test Area | User-Visible Symptom | +|---|---|---|---| +| Copilot events labeled as Claude | Copilot | `copilot > native camelCase event names install` and handler attribution coverage | Dashboard shows Copilot activity as Claude activity | +| Copilot session id blank on dashboard | Copilot | `copilot > fallback sessionId synthesized` and handler session extraction coverage | Session page shows blank or dash-style session id | +| Copilot sync wiping user-scope hooks | Copilot | `copilot-sync > preserves user scope when no project file` | Copilot hooks disappear after install, sync, or terminal startup | +| Copilot `toolArgs` string handling | Copilot | `copilot > normalize parses toolArgs JSON` and malformed-string variants | Policies see raw strings, wrong commands, or crash-prone input | +| Codex / Copilot / Gemini attribution mistakes | Codex, Copilot, Gemini | handler attribution precedence coverage | Events land under the wrong integration and dashboard/policies look inconsistent | +| Codex SessionStart mis-attributed to Gemini | Codex, Gemini | `handler > --integration flag wins over event-name` | Session activity shows under the wrong agent family | +| Old handler fallback with `npx -y failproofai` | Codex, Copilot, older published handler paths | `cross-version > event-name fallback attributes correctly on old handler` | Project-scope installs behave differently on older published versions | +| Lifecycle dedup swallowing real events | All, especially session lifecycle flows | `dedup > lifecycle uses 5s window + sessionId` | Real session start or stop events disappear from logs | +| Cursor non-Claude policy bypass behavior | Cursor | `policy > warn-repeated-tool-calls tunes for non-Claude` and related policy/evaluator coverage | Policy feels active in Claude but not in Cursor | +| OpenCode / Pi stderr protocol leakage | OpenCode, Pi | `opencode/pi > handler silent on success` | Wrapper or client protocol breaks because unexpected stderr appears | +| Convention hooks not loading | Convention policy system across integrations | `custom-hooks > convention files loaded per scope` | Policy files exist, but nothing runs and the user thinks hooks are broken | + +Interpretation: + +- if a row here has no obvious dedicated test, that is active debt +- if a regression returns, update both the test surface and this index + +--- + +## 8. Closeout Checklist Before Saying “Done” + +**Simple View** + +Do not stop when the code “looks fine.” +Stop when the branch truth has improved and the proof matches the claim. + +**Expert View** + +- [ ] I used this file as a snapshot of the current branch, not as a fantasy roadmap +- [ ] I checked `__tests__/INTEGRATION_TEST_CASES.md` for deeper contract details +- [ ] I know which integration I am working on +- [ ] I know which layer is actually failing: install, mapping, normalization, attribution, session, persistence, or e2e protocol +- [ ] I created or updated payload fixtures before writing high-level tests +- [ ] I added or planned the right unit coverage in `__tests__/hooks/integrations.test.ts` +- [ ] I added or planned the right handler coverage in `__tests__/hooks/handler.test.ts` +- [ ] I added or planned the right e2e coverage in `__tests__/e2e/hooks/-integration.e2e.test.ts` +- [ ] I did not count shallow incidental coverage as proof +- [ ] I did not fix source code before a failing test made the break specific +- [ ] I checked whether the change affects dashboard fields, transcripts, mirrors, or dedup behavior +- [ ] I updated the guide again if the branch truth changed materially + +Final reminder: +this guide is strongest when it stays honest. +If the branch still has a gap, write the gap down clearly instead of hiding it behind “mostly working.” + +### Common Mistakes To Avoid + +- trusting the dashboard alone +- calling something “done” because one event appeared once +- skipping handler tests because unit tests already pass +- writing source changes before a failing test proves the branch that broke +- working on many integrations at the same time +- assuming shallow coverage is the same as strong coverage + +### If You Only Have 2 Days + +Do not try to finish six integrations badly. + +Use the time like this: + +1. Pick the riskiest single integration +2. Build the gap table +3. Add fixtures +4. Fill unit and handler gaps +5. Add or expand one real e2e lane +6. Fix only the bugs the tests expose + +That creates one reliable template instead of many unstable partial wins. + +### Final Instruction For The Next Person + +If you are unsure what to do next, do this exact sequence: + +1. Open this guide +2. Find your integration in Section 5 +3. Read its “What is still pending right now” block +4. Turn the first pending item into a fixture, unit test, handler test, or e2e test +5. Run the smallest useful test first +6. Fix source only after the failing test proves the bug + +That is the safest path through this codebase. diff --git a/INTEGRATION_PLAN.md b/INTEGRATION_PLAN.md new file mode 100644 index 00000000..76c8115d --- /dev/null +++ b/INTEGRATION_PLAN.md @@ -0,0 +1,267 @@ +# Improved Integration Plan: Gemini CLI + GitHub Copilot + +> [!NOTE] +> **V2 IMPROVEMENTS**: This plan introduces a modular architecture where each integration (Claude, Cursor, Gemini, Copilot) owns its own detection and normalization logic. This fixes 5 existing bugs and adds deep regression guards for Claude/Cursor. + +--- + +## Known Bugs in the Previous Plan (Read First) + +| Bug | Severity | Root Cause | Fixed In Phase | +|---|---|---|---| +| **1. Detection Collision** | **CRITICAL** | Gemini's `SessionStart` overlaps with Claude Code. | Phase 3 (Modular Detection) | +| **2. Raw Event Logging** | **MEDIUM** | Logs used raw `--hook` arg instead of mapped canonical names. | Phase 3 (Canonical Mapping) | +| **3. Log Formatting** | **LOW** | Disconnect between console output and actual evaluation name. | Phase 3 (Unified Logging) | +| **4. Copilot Deny Branch** | **LOW** | Missing specific block format for `PostToolUse`. | Phase 4 (Evaluator) | +| **5. Copilot Allow Spam** | **MEDIUM** | `permissionDecision: allow` was sent on every single event. | Phase 4 (Evaluator) | +| **6. Cursor Normalization** | **MEDIUM** | Hardcoded `workspace_roots` check in handler was fragile. | Phase 2 (Modular Integration) | + +--- + +## 1. Modular Architecture Overview + +Instead of hardcoding "integration detection" in the main handler, we extend the `Integration` interface: + +```typescript +export interface Integration { + // ... existing methods ... + /** Detect if this payload belongs to this integration */ + detect(payload: Record): boolean; + /** Normalize payload fields (e.g. camelCase -> snake_case) */ + normalizePayload(payload: Record): void; + /** Map raw hook names to canonical PascalCase (PreToolUse, etc.) */ + getCanonicalEventName(payload: Record, cliArg: string): string; +} +``` + +### Flow: +1. `handler.ts` receives payload. +2. Iterates over `INTEGRATIONS.detect(payload)`. +3. First match wins (Copilot -> Gemini -> Cursor -> Claude Code). +4. `integration.normalizePayload(payload)` is called. +5. `integration.getCanonicalEventName(payload, cliArg)` is called. +6. Execution proceeds with perfectly clean, canonical state. + +--- + +## Phase 1 — `src/hooks/types.ts` + +**Changes**: Add `"gemini"` and `"copilot"` to `INTEGRATION_TYPES`. Add Gemini/Copilot event maps and types. + +Update [types.ts](file:///home/yashu/fp/failproofai/src/hooks/types.ts): + +```typescript +// Line 8: +export const INTEGRATION_TYPES = ["claude-code", "cursor", "gemini", "copilot"] as const; + +// ... Append at end of file ... + +// ── Gemini CLI ──────────────────────────────────────────────────────────────── +export const GEMINI_HOOK_EVENT_TYPES = [ + "BeforeTool", "AfterTool", "BeforeAgent", "AfterAgent", "BeforeModel", + "AfterModel", "BeforeToolSelection", "SessionStart", "SessionEnd", + "Notification", "PreCompress" +] as const; + +export type GeminiHookEventType = (typeof GEMINI_HOOK_EVENT_TYPES)[number]; + +export const GEMINI_EVENT_MAP: Record = { + BeforeTool: "PreToolUse", AfterTool: "PostToolUse", + BeforeAgent: "SessionStart", AfterAgent: "Stop", + BeforeModel: "UserPromptSubmit", AfterModel: "PostToolUse", + BeforeToolSelection: "PreToolUse", SessionStart: "SessionStart", + SessionEnd: "SessionEnd", Notification: "Notification", + PreCompress: "PreCompact", +}; + +// ── GitHub Copilot ──────────────────────────────────────────────────────────── +export const COPILOT_HOOK_EVENT_TYPES = [ + "sessionStart", "sessionEnd", "userPromptSubmitted", + "preToolUse", "postToolUse", "agentStop", "subagentStop", "errorOccurred" +] as const; + +export type CopilotHookEventType = (typeof COPILOT_HOOK_EVENT_TYPES)[number]; + +export const COPILOT_EVENT_MAP: Record = { + sessionStart: "SessionStart", sessionEnd: "SessionEnd", + userPromptSubmitted: "UserPromptSubmit", preToolUse: "PreToolUse", + postToolUse: "PostToolUse", agentStop: "Stop", + subagentStop: "SubagentStop", errorOccurred: "Stop", +}; +``` + +--- + +## Phase 2 — `src/hooks/integrations.ts` + +**Changes**: Update `Integration` interface and implement new methods for all four integrations. + +### 2.1 Interface Update +```typescript +export interface Integration { + // ... (existing methods: getSettingsPath, readSettings, etc.) ... + detect(payload: Record): boolean; + normalizePayload(payload: Record): void; + getCanonicalEventName(payload: Record, cliArg: string): string; +} +``` + +### 2.2 Claude Code Implementation +```typescript +const claudeCode: Integration = { + // ... existing ... + detect: () => true, // Fallback + normalizePayload: () => {}, // Claude uses snake_case natively + getCanonicalEventName: (_, cliArg) => cliArg, +}; +``` + +### 2.3 Cursor Implementation (Modularized) +```typescript +const cursor: Integration = { + // ... existing ... + detect(payload) { + const hookName = (payload.hook_event_name as string) || ""; + return ( + Array.isArray(payload.workspace_roots) || + hookName.startsWith("before") || + hookName.startsWith("after") || + hookName === "preToolUse" || + hookName === "postToolUse" + ); + }, + normalizePayload(payload) { + if (!payload.cwd && Array.isArray(payload.workspace_roots) && payload.workspace_roots.length > 0) { + payload.cwd = payload.workspace_roots[0]; + } + }, + getCanonicalEventName: (_, cliArg) => cliArg, +}; +``` + +### 2.4 Gemini Implementation +```typescript +const gemini: Integration = { + // ... existing ... + detect(payload) { + const h = payload.hook_event_name as string; + // Exclusive detection: avoid SessionStart/SessionEnd collisions + return ["BeforeTool", "AfterTool", "BeforeAgent", "AfterAgent", "BeforeModel", "AfterModel", "BeforeToolSelection"].includes(h); + }, + normalizePayload: () => {}, // Gemini uses snake_case + getCanonicalEventName(payload, cliArg) { + const h = payload.hook_event_name as GeminiHookEventType; + return GEMINI_EVENT_MAP[h] ?? cliArg; + } +}; +``` + +--- + +## Phase 3 — `src/hooks/handler.ts` (Modularized) + +**Changes**: Clean up the detection logic and fix logging bugs. + +```typescript +// ... Inside handleHookEvent ... + + // 1. Modular Detection + let integrationType: IntegrationType = (parsed.integration as IntegrationType); + if (!integrationType) { + // Priority: Copilot -> Gemini -> Cursor -> Claude Code (default) + if (copilot.detect(parsed)) integrationType = "copilot"; + else if (gemini.detect(parsed)) integrationType = "gemini"; + else if (cursor.detect(parsed)) integrationType = "cursor"; + else integrationType = "claude-code"; + } + + const integ = getIntegration(integrationType); + + // 2. Modular Normalization + integ.normalizePayload(parsed); + + // 3. Modular Canonical Mapping (Fix Bug 1, 2, 3) + const canonicalEventName = integ.getCanonicalEventName(parsed, eventType); + + // 4. Update session metadata + const session: SessionMetadata = { + sessionId: parsed.session_id as string, + integration: integrationType, + // ... other fields ... + }; + + hookLogInfo(`event=${canonicalEventName} integration=${integrationType} ...`); + + // 5. Evaluate (Fix Bug 2) + const result = await evaluatePolicies(canonicalEventName as HookEventType, parsed, session, config); + + // 6. Persist (Fix Bug 2) + persistHookActivity({ + ...result, + eventType: canonicalEventName, + integration: integrationType, + }); +``` + +--- + +## Phase 4 — `src/hooks/policy-evaluator.ts` + +**Changes**: Fix Bug 4 & 5 and format Gemini action blocks. + +```typescript +// Line 39: Empty policy final allow (Fix Bug 5) +if (policies.length === 0) { + let stdout = ""; + if (session?.integration === "cursor") { + stdout = JSON.stringify({ continue: true, permission: "allow" }); + } else if (session?.integration === "copilot" && eventType === "PreToolUse") { + stdout = JSON.stringify({ permissionDecision: "allow" }); + } + return { exitCode: 0, stdout, ... }; +} + +// Inside PreToolUse deny (Add Gemini action: "BLOCK") +if (session?.integration === "gemini") { + return { + exitCode: 0, + stdout: JSON.stringify({ action: "BLOCK", reason: blockMessage }), + ... + }; +} +``` + +--- + +## Phase 9 — Regression Suite (New Phase) + +To ensure no impact on Claude Code or Cursor: + +1. **Claude Regression**: Mock a "Bash" tool event from Claude. + - Verify `integration === "claude-code"`. + - Verify `canonicalEventName === "PreToolUse"`. + - Verify empty stdout on allow. + +2. **Cursor Regression**: Mock a `workspace_roots` payload. + - Verify `integration === "cursor"`. + - Verify `parsed.cwd` is correctly extracted from `workspace_roots[0]`. + - Verify `stdout` contains `continue/permission` fields. + +3. **Log Visibility**: + - Verify that activity persisted for **both** Gemini and Copilot contains the `integration` field. + - This ensures the Dashboard `/policies` activity tab correctly shows which integration triggered each block. + +--- + +## Phase 10 — Manual Smoke Tests + +```bash +# Gemini Allow Check +echo '{"hook_event_name":"BeforeTool","tool_name":"ls"}' | failproofai --hook PreToolUse +# Result: exit 0, empty stdout + +# Copilot Deny Check (Simulate block) +# Force a deny policy (e.g. block-sudo) +echo '{"sessionId":"123","toolName":"sudo","hookEventName":"preToolUse"}' | failproofai --hook PreToolUse +# Result: exit 0, stdout = {"permissionDecision":"deny", ...} +``` diff --git a/__tests__/INTEGRATION_TEST_CASES.md b/__tests__/INTEGRATION_TEST_CASES.md new file mode 100644 index 00000000..0973e34d --- /dev/null +++ b/__tests__/INTEGRATION_TEST_CASES.md @@ -0,0 +1,869 @@ +# Integration Test Cases — Non-Claude Integrations + +A comprehensive checklist of edge cases the test suite must cover for every non-Claude +integration (Cursor, Gemini, GitHub Copilot, Codex, OpenCode, Pi). Cases are grouped by +layer — installation, hook firing, payload normalization, attribution, dashboard display, +and cross-cutting concerns. Each case is written as a testable assertion. + +Symbols: ✅ = must-pass assertion, ⚠️ = regression guard (has broken before), 🔁 = parameterize across all integrations. + +--- + +## 0. Index + +1. [Installation & Uninstallation](#1-installation--uninstallation) +2. [Hook Command Format & Binary Resolution](#2-hook-command-format--binary-resolution) +3. [Event Firing & Trigger Reality](#3-event-firing--trigger-reality) +4. [Event Name Canonicalization](#4-event-name-canonicalization) +5. [Payload Normalization](#5-payload-normalization) +6. [Integration Detection & Attribution](#6-integration-detection--attribution) +7. [Session ID Extraction & Fallback](#7-session-id-extraction--fallback) +8. [Policy Evaluation per Integration](#8-policy-evaluation-per-integration) +9. [Deduplication](#9-deduplication) +10. [Persistence to hook-activity Store](#10-persistence-to-hook-activity-store) +11. [Dashboard Display Gaps](#11-dashboard-display-gaps) +12. [Sync & Merge Functions](#12-sync--merge-functions) +13. [Scopes: user / project / local](#13-scopes-user--project--local) +14. [Cross-Version Compatibility](#14-cross-version-compatibility) +15. [Integration-Specific Deep Cases](#15-integration-specific-deep-cases) + +--- + +## 1. Installation & Uninstallation + +🔁 For every integration {cursor, gemini, copilot, codex, opencode, pi}: + +- ✅ `policies --install --integration ` writes the correct settings file at the correct path for each scope. +- ✅ Fresh install on a machine with **no prior config file** creates parent directories (e.g. `~/.copilot/`, `~/.config/github-copilot/hooks/`, `~/.gemini/`, `.github/hooks/`). +- ✅ Install **preserves existing user settings** in the same file (e.g. Copilot's `copilotTokens`, `loggedInUsers` must remain untouched after installing hooks). +- ⚠️ Re-running install is idempotent — no duplicate hook entries. +- ⚠️ Install followed by uninstall leaves the settings file in a state byte-identical to before install (modulo whitespace). No orphan `hooks: {}` block if none existed. +- ⚠️ Uninstall removes **only** failproofai entries, not other user-authored hooks. +- ⚠️ Install of integration A does not touch integration B's settings file. +- ✅ `policies --install all --integration ` enables all policy names and registers hooks for every event type in that integration's `eventTypes` list. +- ⚠️ Running uninstall with `--scope project` when **no project file exists** exits 0 gracefully. +- 🔁 `hooksInstalledInSettings(scope)` returns true iff any failproofai marker is present. +- ⚠️ **Copilot regression**: after user-scope install, `synchronizeCopilotProjectHooks` (postInstall) must not wipe the just-written user entries when no project file exists. + +--- + +## 2. Hook Command Format & Binary Resolution + +🔁 For each integration: + +- ✅ Project-scope hook command uses portable `npx -y failproofai` (no machine-specific path) so the file is safe to commit. +- ✅ User-scope hook command uses absolute local binary path (`process.execPath` + resolved dist entry), so it works without `PATH` setup. +- ⚠️ `FAILPROOFAI_DIST_PATH` env var overrides the resolved binary path. +- ⚠️ When `FAILPROOFAI_DIST_PATH` is unset, `findDistIndex()` walks from the running binary's directory up to find `dist/`. +- ✅ Each installed command contains `--integration ` and `--hook `. +- ⚠️ **Copilot regression**: event name in the command is native camelCase (`sessionStart`), NOT PascalCase. +- ⚠️ **Codex**: event name is snake_case (`pre_tool_use`). +- ⚠️ **Gemini**: event name is Gemini's unique PascalCase (`BeforeTool`, `BeforeModel`). +- ⚠️ **Cursor**: event name matches Cursor's native format; command includes `--stdin`. +- ⚠️ Shell-quoting: paths containing spaces are double-quoted in the generated bash. +- ⚠️ Windows path separators: on win32, binary path uses backslashes but is wrapped in quotes so bash can execute it. + +--- + +## 3. Event Firing & Trigger Reality + +🔁 For each integration: simulate each native event type and verify the hook handler is invoked. + +- ✅ **Cursor**: `beforeShellExecution`, `afterFileEdit`, `beforeReadFile`, `beforeMCPExecution`, `stop` all fire via the JSONL pipe and produce one handler invocation each. +- ✅ **Gemini**: `BeforeTool`, `AfterTool`, `BeforeModel`, `AfterModel`, `BeforeAgent`, `AfterAgent`, `BeforeToolSelection`, `PreCompress` each fire. +- ✅ **Copilot**: `sessionStart`, `sessionEnd`, `userPromptSubmitted`, `preToolUse`, `postToolUse`, `agentStop`, `subagentStop`, `errorOccurred` each fire. +- ✅ **Codex**: `pre_tool_use`, `post_tool_use`, `session_start`, `session_end`, `user_prompt_submitted`, `agent_stop`, `notification` each fire. +- ✅ **OpenCode**: dotted events (`tool.before`, `tool.after`, `session.start`, `session.end`). +- ✅ **Pi**: snake_case events (`session_start`, `tool_call`, `tool_result`, `input`). +- ⚠️ **No-event-payload**: some CLIs invoke hooks with empty stdin. Handler must not crash; fallback sessionId must be synthesized from cwd. +- ⚠️ Non-zero exit code: handler returning 2 (block) must be honored — integration cancels the tool call. + +--- + +## 4. Event Name Canonicalization + +🔁 For each integration: + +- ✅ Native event name (camelCase/snake_case/dotted) fed into handler becomes the canonical PascalCase name used by builtins and the dashboard. +- ⚠️ **Copilot** regression: `sessionStart` → `SessionStart`, `errorOccurred` → `Stop`. Dashboard row's `eventType` must be PascalCase after persistence. +- ⚠️ Unknown event name — handler falls through without throwing, logs a warning, returns "allow". +- ✅ `ALL_CANONICAL_EVENTS` set in handler.ts includes every mapped value from every integration's EVENT_MAP. + +--- + +## 5. Payload Normalization + +🔁 For each integration: + +- ✅ `session_id` extracted from payload's native key (`sessionId`, `conversation_id`, `tab_id`, …) and assigned to normalized `session_id`. +- ✅ `tool_name` extracted from native keys (`toolName`, `tool`, `name`, `call.method`…). +- ✅ `tool_input` extracted and parsed. +- ⚠️ **Copilot** `toolArgs` that is not valid JSON falls back to raw string, not a crash. +- ⚠️ **Cursor** `conversation_id` appears inside nested `data` object — deep extract finds it. +- ⚠️ **Gemini** deep-extract for text/args/name finds values under `parts`, `arguments`, `call.method` etc. Test with realistic Gemini payload shapes. +- ⚠️ `cwd` extraction: native keys (`workspace_root`, `projectRoot`, `cwd`, `directory`) all normalize. +- ⚠️ Payloads with `null` values in expected-string fields don't become the literal string "null" in the session. + +--- + +## 6. Integration Detection & Attribution + +- ⚠️ **Priority 1**: `--integration ` CLI flag wins over everything else. Test: pass `--integration cursor` with a payload shaped like Copilot → attribution is cursor. +- ⚠️ **Priority 2**: `payload.integration` field (set by some CLIs' own wrappers). +- ⚠️ **Priority 3**: unique event-name fallback: + - `BeforeTool` / `AfterTool` / `BeforeModel` → gemini + - camelCase `COPILOT_HOOK_EVENT_TYPES.includes(eventType)` → copilot + - snake_case `CODEX_HOOK_EVENT_TYPES.includes(eventType)` → codex + - dotted `tool.before` → opencode +- ⚠️ **Priority 4**: payload shape `detect()` — parameterize each integration's detect function with representative payloads and negative samples from every other integration. A detect function must not false-positive on another's payload. +- ⚠️ **Default fallback**: unknown → `claude-code`. Regression-test: payload `{ hook_event_name: "sessionStart" }` with no `--integration` flag must still resolve to `copilot`, not `claude-code`. + +--- + +## 7. Session ID Extraction & Fallback + +🔁 For each integration: + +- ✅ Real session ID present in payload → extracted and passed through unchanged. +- ⚠️ Empty payload + no env session vars → fallback ID is `session--` (never blank, never literal `—`, never `undefined`). +- ⚠️ Env var recovery: `COPILOT_SESSION_ID`, `CURSOR_SESSION_ID`, `GEMINI_SESSION_ID` populate session when payload is empty. +- ⚠️ Same session across events emits **same** sessionId — dashboard groups them into one session row. + +--- + +## 8. Policy Evaluation per Integration + +🔁 For each integration: + +- ✅ Policy fires with correct canonical event name. +- ⚠️ Policy returning `deny` results in exit code 2 and `stderr` containing the reason. +- ⚠️ Policy returning `instruct` results in exit 0 with `stdout` containing the instruction block. +- ⚠️ Stop-event policies (`require-commit-before-stop`) evaluate correctly for **non-git** cwd — they skip with a reason, not crash. +- ⚠️ Block policies (`block-sudo`, `block-rm-rf`, …) parse `tool_input.command` correctly after normalization for each integration. + +--- + +## 9. Deduplication + +- ⚠️ Same logical event fired in two scopes (project + user) produces exactly one persisted entry — the firing lock claims the event and the second process silently exits 0. +- ⚠️ Lifecycle events (SessionStart/SessionEnd/Stop) use the 5s dedup window with sessionId in the fingerprint — rapid re-runs of the same session don't double-log, but two different sessions within 5s each log. +- ⚠️ Non-lifecycle events use DEDUP_BUCKET_MS with tool_input JSON in the fingerprint — two identical Bash commands within the window log once. +- ⚠️ Dedup fingerprint **includes** `integrationType` so a Copilot SessionStart and a Claude SessionStart in the same cwd at the same instant both get logged. + +--- + +## 10. Persistence to hook-activity Store + +🔁 For each integration: + +- ✅ Entry written has `integration`, `sessionId`, `eventType` (canonical), `hookEventName` (raw), `cwd`, `decision`, `timestamp`, `durationMs`. +- ⚠️ **Copilot regression**: persisted `integration` field is `"copilot"`, never undefined and never silently defaulted to `"claude-code"`. +- ⚠️ Stats file (`stats.json`) increments `totalEvents`, `denyCount`, `topPolicy`, `topPolicyCount` accurately per integration. + +--- + +## 11. Dashboard Display Gaps + +- ⚠️ SessionId `—` (em-dash) on the dashboard means the persisted entry literally lacks a sessionId. After the Copilot fix, this should never happen. +- ⚠️ Virtual project mirror: when `integration ∈ {cursor, gemini, codex, pi, opencode}`, events are mirrored into `~/.claude/projects//.jsonl` for cross-integration project views. Test each integration writes to the correct mirror. +- ⚠️ Dashboard's session detail page for a non-Claude session shows the transcript path computed by the handler (Copilot's `~/.copilot/session-state//events.jsonl`, Gemini's `~/.gemini/tmp//chats/session-...`). +- ⚠️ `cwd` trunc displays enough right-edge characters to distinguish nested projects. + +--- + +## 12. Sync & Merge Functions + +- ⚠️ **`synchronizeCopilotProjectHooks`** regression guard: call it with **no** project file present — user-scope hooks in `~/.copilot/config.json` must be preserved byte-for-byte. +- ⚠️ With a project file present, sync merges project entries without duplicating existing ones and without touching user-scope (local-binary) entries. +- ⚠️ `ensureCopilotRevisionSymlink`: on a snap install, creates `snap/copilot-cli//.config/.../hooks` → `common/.config/.../hooks` symlink. On a non-snap install, is a no-op. Test both branches. + +--- + +## 13. Scopes: user / project / local + +🔁 For each integration that supports multiple scopes: + +- ⚠️ Simultaneous install at user + project fires each hook only once (firing-lock dedup). +- ⚠️ Local scope (`.failproofai/policies-config.local.json`) overrides project scope overrides user scope for enabled-policies list. +- ⚠️ Installing at user with `FAILPROOFAI_DIST_PATH` pointing at dev dist produces hooks that reference dev dist; installing at project always uses `npx -y failproofai`. + +--- + +## 14. Cross-Version Compatibility + +- ⚠️ Hook command emitted by version N must be understood by version N's handler AND (at best-effort) by version N-1's handler. The **self-identifying event name** rule (camelCase for Copilot, snake_case for Codex) makes this work. +- ⚠️ Published npm `latest` version compatibility: install at project scope (uses `npx -y failproofai`), run the hooks, confirm the published handler still produces a dashboard-compatible entry. + +--- + +## 15. Integration-Specific Deep Cases + +### 15.1 Cursor (The IDE Native) + +#### The Function Pipeline +1. **Cursor IDE** triggers a hook from `hooks.json`. +2. **Command**: `failproofai --hook --integration cursor --stdin`. +3. **Payload**: JSONL object piped to stdin. +4. **Handler**: `--integration cursor` explicitly guards identity. + +#### Deep Assertions +- 🛠️ **Twin-Fire Deduplication**: Cursor fires both User and Project hooks. + - ✅ **Assertion**: Firing lock MUST claim the first and exit-0 the second immediately. No duplicate Allow JSON should bypass a Deny from the first. +- 🛠️ **Hyper-Specific Attribution**: + - ✅ **Assertion**: `cwd` must be lifted from `workspace_roots[0]` if top-level `cwd` is missing. + - ✅ **Assertion**: If tool_input contains `cwd` (e.g. from terminal executing in a sub-folder), it overrides `workspace_roots`. +- 🛠️ **MCP Protocol**: + - ✅ **Assertion**: `beforeMCPExecution` and `afterMCPExecution` correctly map to `PreToolUse` and `PostToolUse` and block unauthorized MCP tool calls. + +### 15.2 Gemini CLI (Deep Data Mining) + +#### The Function Pipeline +1. **Gemini CLI** triggers a hook from `~/.gemini/settings.json`. +2. **Command**: `failproofai --hook --integration gemini --stdin`. +3. **Normalization**: Performs **Deep Extract Logic**. +4. **Handler**: PascalCase Identity Guard protects native event fallback. + +#### Deep Assertions +- 🛠️ **Deep Extract Logic**: Gemini nests data deeply. + - ✅ **Assertion**: Payload `{ data: { call: { method: "ls" } } }` MUST yield `tool_name: "ls"`. + - ✅ **Assertion**: Payload `{ parts: [{ text: "hi" }] }` MUST yield `tool_input: "hi"`. +- 🛠️ **PascalCase Identity Guard**: + - ✅ **Assertion**: `BeforeTool` MUST BE detected as `gemini` purely by its name if no integration flag is present. +- 🛠️ **Transcript Resolution**: + - ✅ **Assertion**: Dashboard transcript links MUST point to `~/.gemini/tmp//chats/session-.json`. + +### 15.3 GitHub Copilot (Sync & Snap) + +#### The Function Pipeline +1. **Copilot CLI** triggers a hook from `~/.copilot/config.json`. +2. **Command**: `failproofai --hook --integration copilot`. +3. **Normalization**: Parses stringified JSON values. + +#### Deep Assertions +- 🛠️ **JSON-in-String Normalization**: + - ✅ **Assertion**: `toolArgs` formatted as `"{\"command\":\"ls\"}"` MUST be parsed into an object. +- 🛠️ **Waterfall Metadata Extraction**: + - ✅ **Assertion**: Copilot `tool_input` resolution MUST cleanly cascade across inconsistent keys: `toolInput` -> `toolArgs` -> `data.params` -> `message` -> `prompt`. +- 🛠️ **The Sync Engine & Snap Repair**: + - ✅ **Assertion**: `.bashrc` MUST correctly contain `env failproofai copilot-sync 2>/dev/null` allowing snap revisions to access the `common/` hook symlinks without manual intervention. +- ⚠️ **CamelCase Stability**: + - ✅ **Assertion**: Hook command MUST install with `preToolUse` (camelCase) to ensure older handlers correctly classify it as Copilot without flags. +- 🛠️ **Fuzzy Deep Payload Detection (Heuristic)**: + - ✅ **Assertion**: If `--integration copilot` is missing, `detect()` must successfully identify Copilot if keys like `sessionId` or `toolName` exist inside a nested `data` object, PROVIDED the `hookName` does NOT start with PascalCase (which would conflict with Claude). +- 🛠️ **Silence Guard (Double-Dip Protection)**: + - ✅ **Assertion**: If an event arrives marked as `--integration claude-code` (from a corrupted legacy project install) but the event type is exclusively Copilot's (e.g., `sessionStart`), the handler MUST silently abort (exit 0, no dashboard log) to prevent phantom duplicates. +- 🛠️ **Binary Detection**: + - ✅ **Assertion**: `detectInstalled()` accurately verifies Copilot presence by checking `which gh` instead of `copilot`, reflecting its architecture as a GitHub CLI extension. + +### 15.4 OpenCode (Plugin-Based) + +#### The Plugin Pipeline +**OpenCode uses a TypeScript plugin injected at `.opencode/plugins/failproofai.ts`**: +```typescript +import { spawnSync } from "node:child_process"; +export const FailproofAIPlugin = (ctx: any) => { + const callcli = (event: string, args: any) => { + const payload = { ...args, integration: "opencode", cwd: ctx.directory }; + const cmd = 'failproofai --hook ' + event + ' --integration opencode --stdin'; + const res = spawnSync(cmd, { input: JSON.stringify(payload), shell: true, encoding: "utf8" }); + if (res.status !== 0) throw new Error(res.stderr || "Blocked by FailproofAI"); + }; +}; +``` + +#### Deep Assertions +- ✅ **Synchronous Blocking**: The plugin MUST `throw Error` if `spawnSync` exits with code 2, halting the agent workflow definitively. +- ✅ **Session ID Persistence**: `session.created` must set `currentSessionId` used by all subsequent calls in the session. +- ⚠️ **Diagnostic Silence**: The wrapper must not write debug logs to stderr that could break OpenCode's JSON protocol. + +### 15.5 Pi Coding Agent (Extension-Based) + +#### The Extension Pipeline +**Pi uses a TypeScript extension at `.pi/extensions/failproofai.ts`**: +```typescript +export default function (pi: ExtensionAPI) { + const callcli = (event: string, args: any, ctx?: any) => { + const sessionId = ctx?.sessionId || pi.session?.id || "default"; + const payload = { ...args, integration: "pi", cwd: process.cwd(), session_id: sessionId }; + const res = spawnSync('failproofai --hook ' + event + ' --integration pi --stdin', { + input: JSON.stringify(payload), shell: true, encoding: "utf8" + }); + if (res.status !== 0) { + ctx?.ui?.setStatus("FailproofAI: Blocked - " + (res.stderr || res.stdout)); + return { block: true }; + } + }; +} +``` + +#### Deep Assertions +- ✅ **Premium UI Feedback**: Verify `setStatus` is called when a policy denies an action so the user receives IDE UI feedback. +- ✅ **Recursive Isolation**: Verify the extension ignores messages starting with `/failproofai-status` to prevent infinite trigger loops. +- ✅ **Heritage Attribution**: Verify `codex_session_id` and `codex_event` keys (if present) are handled. + +### 15.6 OpenAI Codex (Legacy CLI) + +#### Deep Assertions +- 🛠️ **Case Stability**: + - ✅ **Assertion**: CLI invokes with snake_case `pre_tool_use`, but `handler` maps to PascalCase `PreToolUse`. Config file keys must be PascalCase. +- 🛠️ **Trace Parsing**: + - ✅ **Assertion**: `trace-parser.ts` MUST correctly segment multi-line Codex logs into individual `HookActivityEntry` metadata blocks. + +--- + +## Cross-cutting: Fixture Matrix (Ultimate Payload Gallery) + +For every integration, maintain a fixture directory parameterized in tests. Here are canonical assertions for parsing these core event payloads: + +### 1. Cursor `beforeShellExecution` (Stdin JSON) +```json +{ + "hook_event_name": "beforeShellExecution", + "workspace_roots": ["/home/user/project"], + "command": "rm -rf .env", + "integration": "cursor" +} +``` +**Assertion**: `tool_name` -> `run_terminal_command`, `tool_input` -> `rm -rf .env`, `cwd` -> `/home/user/project`. + +### 2. Gemini `BeforeTool` (Deep Stdin) +```json +{ + "hook_event_name": "BeforeTool", + "data": { + "call": { + "method": "read_file", + "arguments": { "path": "secrets.json" } + } + } +} +``` +**Assertion**: `tool_name` -> `read_file`, `tool_input` -> `{ "path": "secrets.json" }`, `integration` -> `gemini`. + +### 3. Copilot `preToolUse` (CLI Args + toolArgs String) +```json +{ + "hookEventName": "preToolUse", + "sessionId": "550e8400-e29b-41d4-a716-446655440000", + "toolName": "bash", + "toolArgs": "{\"command\":\"ls -la\"}" +} +``` +**Assertion**: `tool_input` MUST be a parsed JSON object `{"command":"ls -la"}`, not the raw string. + +### 4. OpenCode/Pi Plugin (Standard Normalized Stdin) +```json +{ + "integration": "opencode", + "session_id": "ses_123", + "tool_name": "edit_file", + "tool_input": { "content": "..." }, + "cwd": "/abs/path" +} +``` +**Assertion**: `integration` securely hardcoded inside the typescript wrapper, bypassing any CLI guesswork. + +--- + +## 16. Per-Integration End-to-End Deep Coverage + +Every integration is noble in itself — different install surface, different payload shape, +different transcript format, different failure modes. This section enumerates exhaustive +cases **per integration**, covering the full pipeline from CLI trigger to dashboard row. + +Each subsection follows the same structure: + +- **A. Install pipeline** (function chain + every branch) +- **B. Uninstall pipeline** +- **C. Settings-file shape preservation** +- **D. Hook command format** (every token of the generated bash) +- **E. Trigger reality** (what actually fires, what doesn't) +- **F. Payload ingestion** +- **G. Normalization** (every key, every fallback) +- **H. Event canonicalization round-trip** +- **I. Attribution** (without `--integration` flag) +- **J. Session ID extraction** (all keys, env recovery, fallback) +- **K. Cwd / workspace resolution** +- **L. Tool name / tool input extraction** +- **M. Policy evaluation** (block, warn, sanitize, instruct) +- **N. Decision honoring** (CLI cancels on exit 2) +- **O. Stdout/stderr contract** +- **P. Persistence fields** +- **Q. Virtual project mirror** +- **R. Transcript path resolution** +- **S. Dashboard row rendering** +- **T. Dashboard session detail page** +- **U. Scope matrix** (user / project / local) +- **V. Cross-scope duplication / dedup** +- **W. Error paths** (empty stdin, malformed JSON, permission errors) +- **X. Config-file concurrency** (two processes writing simultaneously) +- **Y. Cross-version compatibility** (old published handler sees new install) +- **Z. Known quirks specific to this integration** + +--- + +### 16.1 Cursor — End-to-End + +**CLI**: `cursor` and `cursor-agent`. Settings file: `hooks.json` (user: `~/.cursor/hooks.json`, project: `.cursor/hooks.json`, local: `.cursor/hooks.local.json`). + +#### A. Install pipeline +- ✅ `manager.install("cursor", "user")` resolves path to `~/.cursor/hooks.json`. +- ✅ `manager.install("cursor", "project", cwd)` resolves to `/.cursor/hooks.json`. +- ⚠️ If `.cursor/` directory doesn't exist, `mkdirSync(..., { recursive: true })` creates it; permission error surfaces as `CliError` not silent failure. +- ✅ `readSettings` handles a blank file (returns `{}`), a valid JSON file, and a malformed JSON file (throws a clear `CliError` with the file path). +- ✅ `writeHookEntries` iterates every event in `CURSOR_HOOK_EVENT_TYPES` and calls `buildHookEntry` per event. +- ⚠️ Existing non-failproofai entries under the same event key are preserved; ours is appended. +- ⚠️ A prior failproofai entry at that event is **replaced**, not duplicated — test re-install twice, count must remain 1 per event. +- ✅ `isFailproofaiHook(h)` matches by command substring (no marker field in Cursor's format). +- ⚠️ `postInstall` is a no-op for Cursor (unlike Copilot). Must not invoke any sync. + +#### B. Uninstall pipeline +- ✅ `removeHooksFromFile` removes only entries where `isFailproofaiHook` returns true. +- ⚠️ If an event key becomes empty after removal, the key is deleted; if `hooks` object becomes empty, it is deleted; if the file becomes empty `{}`, it is still written (not deleted) to preserve explicit empty state. +- ⚠️ Uninstall on a file that never had failproofai entries returns `removed: 0` and doesn't modify the file's mtime. +- ⚠️ Uninstalling user scope must not touch project scope and vice versa. + +#### C. Settings-file shape preservation +- ⚠️ Pre-existing top-level keys (not `hooks`) preserved byte-identical. +- ⚠️ Whitespace / trailing newline preserved if `writeJsonFile` uses `JSON.stringify(..., null, 2) + "\n"`. Assert EOF behavior. +- ⚠️ Nested Cursor-specific options (matcher regexes, disabled flags) inside each hook entry preserved. + +#### D. Hook command format +- ✅ Command string: `"${process.execPath}" "${binaryPath}" --hook ${pascalEvent} --integration cursor --stdin`. +- ⚠️ `eventType` fed to `buildHookEntry` is Cursor's native camelCase (`beforeShellExecution` etc.); **mapped to PascalCase** for the `--hook` argument (via `CURSOR_EVENT_MAP`). This is intentional because Cursor's camelCase names overlap with Copilot's — the `--integration cursor` flag + `--stdin` are what disambiguate. +- ⚠️ `timeout: 60` field present (seconds, not ms — Cursor's schema). +- ⚠️ `--stdin` flag is mandatory; without it the handler reads nothing and mis-classifies. +- ⚠️ Windows: `process.execPath` has backslashes; bash wrapping survives Cursor's shell invocation. + +#### E. Trigger reality +- ✅ Each of `beforeShellExecution`, `afterFileEdit`, `beforeReadFile`, `beforeSubmitPrompt`, `beforeMCPExecution`, `afterMCPExecution`, `stop` fires exactly one handler invocation per Cursor event. +- ⚠️ Cursor fires **both** user-scope AND project-scope hooks when both are installed — firing-lock dedup handles (see §V). +- ⚠️ Cursor Agent (headless mode) fires same events as IDE; detect distinguishes via `payload.agent_type` or absence of `editor_context`. +- ⚠️ Cursor does not fire `sessionStart` / `sessionEnd`; our `eventTypes` list reflects reality (test parity). + +#### F–L. Payload ingestion & normalization +- ✅ Stdin JSONL: single line JSON, `\n` terminated. +- ⚠️ `conversation_id` appears under `data.conversation_id` AND top-level — deep extract pulls from either. +- ⚠️ `workspace_roots` is an array; `cwd` normalizes to `workspace_roots[0]` when top-level `cwd` absent. +- ⚠️ Tool input for `beforeShellExecution`: payload key is `command`, not `tool_input.command` — normalizer maps to `{ command: }`. +- ⚠️ Tool input for `afterFileEdit`: `file_path` + `new_content` → `{ file_path, new_content }`. +- ⚠️ `beforeMCPExecution`: `mcp_server`, `mcp_tool`, `arguments` → `tool_name = mcp_server + ":" + mcp_tool`, `tool_input = arguments`. +- ⚠️ PascalCase canonicalization: `beforeShellExecution` → `PreToolUse` (shell is a tool), `afterFileEdit` → `PostToolUse`, `beforeSubmitPrompt` → `UserPromptSubmit`, `stop` → `Stop`. +- ⚠️ `tool_name` defaults: if unknown after normalization, derived from `command` first token (`/usr/bin/ls` → `ls`). + +#### M–O. Policy evaluation & decision honoring +- ⚠️ `block-sudo` on `beforeShellExecution(command: "sudo apt install foo")` → exit 2, stderr contains policy reason; Cursor cancels the exec. +- ⚠️ `warn-repeated-tool-calls` fires with Cursor-specific threshold (Cursor agents are chattier than Claude — policy must detect `session.integration === "cursor"` and raise threshold accordingly). +- ⚠️ `sanitize-api-keys` on `afterFileEdit(new_content: "KEY=sk-...")` → deny; Cursor reverts the edit (assert via Cursor's own transcript). +- ⚠️ Stop-event policies (`require-commit-before-stop` etc.) fire on Cursor's `stop` event; must handle Cursor's lack of `transcript_path`. +- ⚠️ Instruct decision (exit 0 with stdout JSON) is consumed by Cursor's system-prompt injector — assert stdout shape matches Cursor's `{ "systemMessage": "..." }` schema. + +#### P–R. Persistence / transcript +- ✅ Persisted entry has `integration: "cursor"`, canonical PascalCase `eventType`, raw `hookEventName` (camelCase), `sessionId`, `cwd`, `policyName`. +- ⚠️ Virtual project mirror: entry mirrored into `~/.claude/projects//.jsonl` (Cursor ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS). +- ⚠️ Transcript path: Cursor doesn't expose one directly; handler sets `transcriptPath: undefined`. Dashboard detail page handles gracefully (no broken link). + +#### S–T. Dashboard +- ⚠️ Integration badge renders `Cursor` in blue. +- ⚠️ Session detail page lists all events grouped by `sessionId`; if `sessionId` is the synthesized `session-cursor-` (no real UUID), still groups events logically. +- ⚠️ `eventType` column shows `PreToolUse` not `beforeShellExecution` — canonicalization must reach persistence. +- ⚠️ Filter `?integration=cursor` returns only cursor rows; combined with `?decision=deny` narrows further. + +#### U–V. Scope matrix +- ⚠️ User scope: `~/.cursor/hooks.json`. Project: `.cursor/hooks.json`. Local: `.cursor/hooks.local.json` (if supported). +- ⚠️ Twin-fire: both user + project install → Cursor fires both → firing lock picks first, second exits 0 cleanly (no duplicate allow/deny). +- ⚠️ Ordering: project-scope entries evaluated before user-scope (precedence). + +#### W. Error paths +- ⚠️ Empty stdin (Cursor pipes nothing): handler logs warning "stdin is empty for - Cursor Agent might not be piping context", still synthesizes session, returns allow. +- ⚠️ Malformed JSON stdin: handler logs "payload parse failed", treats as empty payload, returns allow. +- ⚠️ `~/.cursor/hooks.json` permission denied: install surfaces clear error, doesn't write partial file. + +#### X. Concurrency +- ⚠️ Two Cursor windows firing simultaneously: writes to `current.jsonl` serialized by advisory lock, no JSONL corruption. + +#### Y. Cross-version +- ⚠️ Old published handler receiving `--hook PreToolUse --integration cursor --stdin` + Cursor-shaped payload: attributes correctly via `--integration` flag even if event-name lookup fails. + +#### Z. Known quirks +- ⚠️ Cursor IDE v0.42+ changed payload shape — regression guard for any hard-coded path keys. +- ⚠️ Cursor Agent emits `beforeSubmitPrompt` with an empty `prompt` field during init — sanitize policies must not flag empty strings. +- ⚠️ Cursor's built-in rules file coexists with our hooks — test that our `stop` policy output doesn't conflict with Cursor's own stop-behavior. + +--- + +### 16.2 Gemini CLI — End-to-End + +**CLI**: `gemini`. Settings file: `~/.gemini/settings.json` (user), `.gemini/settings.json` (project). Gemini uses Claude's settings format (`hooks: { EventName: [{ hooks: [...] }] }`). + +#### A. Install pipeline +- ✅ `getSettingsPath("user")` → `~/.gemini/settings.json`. +- ✅ `getSettingsPath("project", cwd)` → `/.gemini/settings.json`. +- ⚠️ Shared Claude-format settings: `writeHookEntries` inserts matchers under `s.hooks[eventType]`; must not disturb other Gemini-specific top-level keys (`theme`, `models`, `mcpServers`). +- ⚠️ `FAILPROOFAI_HOOK_MARKER` field added to each entry — `isFailproofaiHook` matches on marker AND command substring (belt + suspenders). + +#### B. Uninstall pipeline +- ⚠️ Removes entries whose marker is true OR command contains `failproofai`. Empty matcher arrays collapsed. +- ⚠️ Must not remove user's own custom Gemini hooks that happen to share an event type. + +#### C. Settings-file shape preservation +- ⚠️ Preserves `theme`, `mcpServers`, `approvalMode`, `telemetry`, `selectedAuthType`, `model` blocks. +- ⚠️ `hooks` block ordering preserved where possible (Gemini sometimes reads events in order). + +#### D. Hook command format +- ✅ Command: `"${process.execPath}" "${binaryPath}" --hook ${eventType} --integration gemini --stdin` (user) OR `npx -y failproofai --hook ${eventType} --integration gemini --stdin` (project). +- ⚠️ Event name preserved as Gemini's **unique PascalCase** (`BeforeTool`, `BeforeModel`, etc.) — these names don't overlap with Claude's PascalCase (`PreToolUse`), so attribution works via event-name fallback even without `--integration`. +- ⚠️ `--stdin` flag mandatory. + +#### E. Trigger reality +- ✅ `BeforeTool`, `AfterTool`, `BeforeModel`, `AfterModel`, `BeforeAgent`, `AfterAgent`, `BeforeToolSelection`, `PreCompress`, `Notification`, `SessionStart`, `SessionEnd`, `UserPromptSubmit`, `Stop` (test every event Gemini actually fires). +- ⚠️ Gemini fires `PreCompress` before truncating context — unique to Gemini; policy has access to `parts` count and can instruct or allow. +- ⚠️ `BeforeToolSelection` fires BEFORE `BeforeTool` — handler must not dedup them together (different canonical events? Currently both map to PreToolUse? **DECIDE AND TEST**). + +#### F–L. Payload & normalization +- ⚠️ Gemini nests **everything** under `data` — deep-extract pulls: `data.call.method` → `tool_name`, `data.call.arguments` → `tool_input`, `data.parts[].text` → prompt text. +- ⚠️ `data.session.id` vs top-level `sessionId` vs `data.sessionID` — normalizer tries all. +- ⚠️ `data.workspace.root_path` → `cwd` fallback. +- ⚠️ `data.model.name` → part of tool_name for `BeforeModel` event. +- ⚠️ Gemini's `parts` array may contain mixed text + functionCall entries — normalizer extracts text for UserPromptSubmit, functionCall.name for BeforeTool. +- ⚠️ Arguments may be a JSON object OR a JSON-encoded string — handle both. +- ⚠️ `transcript_path`: Gemini emits `data.transcript_path`; fallback to constructed `~/.gemini/tmp//chats/session--.json`. + +#### M–O. Policy evaluation +- ⚠️ `block-sudo` on `BeforeTool(tool_name=run_shell_command, tool_input.command="sudo ...")` → deny; Gemini cancels. +- ⚠️ `sanitize-api-keys` on `AfterTool` output scans `data.result` / `data.output` text. +- ⚠️ `warn-repeated-tool-calls` threshold tuned for Gemini (tends to retry on model errors). +- ⚠️ Stop-event policies fire on `Stop`; Gemini's Stop has a `reason` field (user-cancel vs model-done) — policy differentiates. +- ⚠️ `PreCompress` policy: custom hook can log size + decide allow/deny (default allow). + +#### P–R. Persistence / transcript +- ✅ `integration: "gemini"`, PascalCase canonical event, sessionId = real Gemini session UUID. +- ⚠️ Virtual project mirror: Gemini writes to `~/.claude/projects//.jsonl` (Gemini ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS). +- ⚠️ Transcript: `~/.gemini/tmp//chats/session-T-.json`. Dashboard link must resolve to existing file. +- ⚠️ Gemini session UUID vs Gemini's internal "chat id" — test both keys map to same session row. + +#### S–T. Dashboard +- ⚠️ Badge: `Gemini` in indigo. +- ⚠️ Session detail page parses Gemini's chat JSON (different shape than Claude's JSONL transcript) — log-entries parser has a Gemini branch. Test with real fixture. +- ⚠️ `BeforeTool` / `AfterTool` shown as PascalCase `PreToolUse` / `PostToolUse`; `BeforeModel` / `AfterModel` shown as... **DECIDE canonical mapping and test.** + +#### U–V. Scope matrix +- ⚠️ User: `~/.gemini/settings.json`. Project: `.gemini/settings.json`. +- ⚠️ No Cursor-style twin-fire; dedup still applies for safety. + +#### W. Error paths +- ⚠️ Empty `data` block: handler doesn't crash on deep-extract; falls back to integration="gemini" + sessionId fallback. +- ⚠️ Gemini CLI invokes hook with non-JSON stdin during auth flow — handler treats as empty, returns allow. + +#### X. Concurrency +- ⚠️ Gemini CLI runs tools sequentially, but notifications + Before/AfterModel may overlap — advisory lock required. + +#### Y. Cross-version +- ⚠️ `BeforeTool` / `AfterTool` etc. are Gemini-unique; event-name fallback attributes correctly on any handler version that lists them in GEMINI_UNIQUE or GEMINI_HOOK_EVENT_TYPES. + +#### Z. Known quirks +- ⚠️ Gemini re-fires `BeforeTool` on retry — `warn-repeated-tool-calls` must not count these as user-initiated repeats. +- ⚠️ Gemini's `Notification` event is transient; dashboard must not surface every one as a major row (consider collapsing). +- ⚠️ Gemini on Windows uses a different tmp path (`%LOCALAPPDATA%\Google\Gemini\tmp\...`); transcript resolution branches. + +--- + +### 16.3 OpenAI Codex — End-to-End + +**CLI**: `codex`. Settings file: user `~/.codex/hooks.json`, project `.codex/hooks.json`. Codex uses Claude-like format but keys are PascalCase in config while commands are invoked with snake_case event args. + +#### A. Install pipeline +- ✅ Paths resolve correctly for user and project. +- ⚠️ `writeHookEntries` writes entries under PascalCase keys (`PreToolUse`), but the `--hook` argument in the bash command is snake_case (`pre_tool_use`). Test both simultaneously. +- ⚠️ Existing Codex-specific settings (`modelProvider`, `approvalPolicy`, `sandboxPolicy`) preserved. + +#### B. Uninstall pipeline +- ✅ Removes matchers whose `hooks[].command` contains `failproofai` or whose marker is true. +- ⚠️ Empty PascalCase event keys deleted after removal. + +#### D. Hook command format +- ✅ `"${process.execPath}" "${binaryPath}" --hook ${snakeEvent} --integration codex` (user) / `npx -y failproofai --hook ${snakeEvent} --integration codex` (project). +- ⚠️ Snake_case event name in command = unique Codex signal (distinct from Claude PascalCase, Copilot camelCase, Gemini unique PascalCase, Cursor camelCase, OpenCode dotted). Attribution self-identifies without `--integration`. +- ⚠️ No `--stdin` (Codex uses env vars for some payload keys). + +#### E. Trigger reality +- ✅ `pre_tool_use`, `post_tool_use`, `session_start`, `session_end`, `user_prompt_submitted`, `agent_stop`, `notification` — every one fires once per Codex event. +- ⚠️ Codex supports approval-based tool gating; hook firing order relative to Codex's built-in approval dialog must not deadlock. + +#### F–L. Payload & normalization +- ⚠️ Codex emits snake_case keys (`session_id`, `tool_name`, `tool_input`) natively — normalization is light. +- ⚠️ `tool_input` is already a JSON object; no stringified parsing needed. +- ⚠️ `transcript_path` absent; derived from `CODEX_TRACE_DIR` env var + session ID. +- ⚠️ `cwd` from `workspace_root` or `process.cwd()`. + +#### H. Canonicalization +- ⚠️ `pre_tool_use` → `PreToolUse`, `session_start` → `SessionStart`, etc. `CODEX_EVENT_MAP` is the source of truth — round-trip fuzz test. + +#### M–O. Policy evaluation +- ⚠️ `block-sudo`, `block-rm-rf`, `block-curl-pipe-sh` all apply on `pre_tool_use`; deny → exit 2 → Codex cancels. +- ⚠️ Codex's sandbox policy may already block some commands; our hook layer must not false-report deny when Codex itself also denied (avoid duplicate logs). +- ⚠️ Stop-event policies on `agent_stop`: test non-git, detached-HEAD, and fully-green cases. + +#### P–R. Persistence / transcript +- ✅ `integration: "codex"`, canonical PascalCase event name, real session UUID. +- ⚠️ Virtual project mirror: Codex ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS — writes to `~/.claude/projects//.jsonl`. +- ⚠️ Codex trace log: `~/.codex/traces/.log` — parsed by `src/codex/trace-parser.ts`. Unit-test parser with real log samples: extracts `tool_calls`, handles truncation, multi-line entries, UTF-8 edge cases. + +#### S–T. Dashboard +- ⚠️ Badge: `Codex` in purple. +- ⚠️ Trace parser output displayed alongside hook entries on session detail page; order preserved by timestamp. + +#### U–V. Scope matrix +- ⚠️ User vs project: same rules as Cursor/Gemini. + +#### W. Error paths +- ⚠️ Codex trace file missing: dashboard session page shows hooks-only history, no crash. +- ⚠️ Codex killed mid-tool: `post_tool_use` never fires; Stop policies still evaluate on next `agent_stop`. + +#### Y. Cross-version +- ⚠️ `pre_tool_use` snake_case is Codex-unique; old handler still attributes via event-name fallback. + +#### Z. Known quirks +- ⚠️ Codex `notification` event is fire-and-forget — don't dedup it against other events. +- ⚠️ Codex may spawn sub-agents; `agent_stop` fires for each. Session grouping must handle parent + children. + +--- + +### 16.4 OpenCode — End-to-End + +**Runtime**: `.opencode/plugins/failproofai.ts` (TypeScript plugin loaded by OpenCode at runtime). No static settings file — plugin code is the install artifact. + +#### A. Install pipeline +- ✅ `manager.install("opencode", "project")` writes `.opencode/plugins/failproofai.ts` with the generated plugin source. +- ✅ `manager.install("opencode", "user")` writes `~/.opencode/plugins/failproofai.ts`. +- ⚠️ Plugin source embeds `failproofai --hook --integration opencode --stdin` shell command or invokes `cli.mjs` directly (choose one path and test consistently). +- ⚠️ Plugin relies on `FAILPROOFAI_DIST_PATH` or `npx -y failproofai` — matrix-test both modes. +- ⚠️ Existing user-authored plugins in same directory preserved. + +#### B. Uninstall pipeline +- ⚠️ `removeHooksFromFile` deletes the plugin file entirely (since each plugin is one file). Must not delete unrelated plugins. + +#### C. Plugin source shape +- ⚠️ Plugin exports `FailproofAIPlugin` (named export) with signature OpenCode expects. +- ⚠️ Plugin captures `ctx.directory` for cwd; `ctx.session?.id` for session. +- ⚠️ Plugin uses `spawnSync` (synchronous — OpenCode requires sync blocking to halt tool calls). + +#### D. Invocation surface +- ✅ Plugin's `callcli(event, args)` builds payload `{ ...args, integration: "opencode", cwd: ctx.directory }` and pipes JSON stdin. +- ⚠️ `--integration opencode` passed explicitly — no event-name fallback needed. +- ⚠️ Dotted event names (`tool.before`, `tool.after`, `session.start`, `session.end`) preserved verbatim in payload `hook_event_name`. + +#### E. Trigger reality +- ✅ `session.created`, `session.destroyed`, `tool.before`, `tool.after`, `prompt.submit`, `agent.stop` — each fires exactly once. +- ⚠️ OpenCode fires hooks synchronously during tool dispatch — blocking longer than 10s kills the tool call. + +#### F–L. Payload & normalization +- ⚠️ OpenCode plugin pre-normalizes keys (`integration`, `session_id`, `tool_name`, `tool_input`, `cwd`) before spawning. Handler has almost nothing to do. +- ⚠️ If plugin ctx is missing session (rare init case), plugin sends `session_id: "default"`; handler synthesizes `session-opencode-default`. +- ⚠️ `tool_input` is always an object (plugin pre-serializes). + +#### H. Canonicalization +- ⚠️ `tool.before` → `PreToolUse`, `tool.after` → `PostToolUse`, `session.created` → `SessionStart`, etc. `OPENCODE_EVENT_MAP` source of truth. + +#### M–O. Policy evaluation & decision +- ⚠️ Deny → `throw new Error(stderr)` in plugin → OpenCode treats as tool failure, cancels call. +- ⚠️ Instruct → plugin reads stdout, injects into agent context (OpenCode's system-prompt addendum mechanism). +- ⚠️ Timeout in `spawnSync` → plugin treats as allow (fail-open) to avoid freezing the agent. + +#### P–R. Persistence +- ✅ `integration: "opencode"`, canonical event names, real session UUID. +- ⚠️ Virtual project mirror: OpenCode ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS. +- ⚠️ Transcript: OpenCode has no file transcript; dashboard session page uses hook events as the timeline. + +#### S–T. Dashboard +- ⚠️ Badge: `OpenCode` in amber. +- ⚠️ Session detail renders from hook events only (no external transcript file to cross-reference). + +#### U–V. Scope matrix +- ⚠️ User-scope plugin: `~/.opencode/plugins/failproofai.ts`. Project: `.opencode/plugins/failproofai.ts`. Local: N/A (OpenCode has no .local convention). +- ⚠️ If both scopes install, OpenCode loads both — plugin imports dedup via file-content hash; firing-lock handles runtime dedup. + +#### W. Error paths +- ⚠️ `spawnSync` fails to find `failproofai` binary: plugin logs warning to OpenCode debug channel, returns allow (fail-open) so OpenCode keeps working. +- ⚠️ JSON stringify failure on circular payload: plugin catches, sends `{}`, handler falls back. +- ⚠️ **Diagnostic stderr suppression**: handler must NOT write debug logs to stderr during normal success — OpenCode parses stderr strictly. Regression guard: test that handler's stderr is empty on allow. + +#### X. Concurrency +- ⚠️ OpenCode fires tool.before / tool.after on different tools concurrently (parallel tool calls) — advisory lock serializes persistence writes. + +#### Y. Cross-version +- ⚠️ Plugin is version-controlled with our package; `--integration opencode` flag is explicit — cross-version works as long as handler accepts the flag. + +#### Z. Known quirks +- ⚠️ OpenCode's `session.created` fires BEFORE `ctx.session.id` is populated in some versions — plugin must handle missing session ID. +- ⚠️ OpenCode's tool arg format varies by tool; plugin sends raw `args` object and relies on handler's policy code to interpret. +- ⚠️ Plugin throws `new Error("Blocked by FailproofAI")` — OpenCode renders as a red failure; test exact message. + +--- + +### 16.5 Pi Coding Agent — End-to-End + +**Runtime**: `.pi/extensions/failproofai.ts` (TypeScript extension). Like OpenCode, code IS the install artifact. + +#### A. Install pipeline +- ✅ Writes `~/.pi/extensions/failproofai.ts` (user) or `.pi/extensions/failproofai.ts` (project). +- ⚠️ Extension source imports Pi's `ExtensionAPI` type (loosely — no hard type dep). +- ⚠️ Extension uses `spawnSync` identical to OpenCode for synchronous blocking. + +#### B. Uninstall pipeline +- ⚠️ Deletes extension file; no other cleanup. + +#### C. Extension source shape +- ⚠️ Default export is a function `(pi: ExtensionAPI) => void`. +- ⚠️ Registers event handlers for each Pi event via `pi.on("session_start", ...)` etc. +- ⚠️ UUID resolution: Pi's session key may be non-UUID (e.g. path-like); extension converts to stable UUID via hash. + +#### D. Invocation surface +- ✅ Command: `failproofai --hook --integration pi --stdin`. +- ⚠️ Event name snake_case (`session_start`, `tool_call`, `tool_result`, `input`) — unique-ish but overlaps with Codex's snake_case. Therefore `--integration pi` flag is mandatory for correct attribution. + +#### E. Trigger reality +- ✅ `session_start`, `session_end`, `tool_call`, `tool_result`, `input`, `error` — each fires. +- ⚠️ Pi's `input` = user prompt; maps to canonical `UserPromptSubmit`. +- ⚠️ Pi's `tool_call` maps to `PreToolUse`, `tool_result` to `PostToolUse`. +- ⚠️ Pi has no native Stop event; `session_end` serves that role. + +#### F–L. Payload & normalization +- ⚠️ `ctx.sessionId` populates `payload.session_id` on every event. +- ⚠️ `tool_call` payload: `{ name, arguments }` → `tool_name`, `tool_input`. +- ⚠️ `tool_result` payload: `{ name, result, error }` — policy decision depends on success/failure. +- ⚠️ Pi's `directory` vs `cwd` vs `workspace_root`: normalizer tries all, prefers most specific. +- ⚠️ Special key `codex_session_id` / `codex_event` (heritage from Pi-over-Codex) — handled if present. + +#### H. Canonicalization +- ⚠️ `session_start` → `SessionStart`. Note collision with Codex's `session_start`. Handler disambiguates by `--integration pi` flag only. Regression guard: Pi payload without `--integration` defaults to Codex → WRONG; add explicit Pi detect via presence of `pi_version` or similar payload key. + +#### M–O. Policy evaluation & decision +- ⚠️ Deny: extension returns `{ block: true }` AND calls `ctx.ui.setStatus("FailproofAI: Blocked - ")` for premium UI feedback. +- ⚠️ Instruct: extension injects stdout into Pi's agent context via `ctx.agent.addSystemMessage`. +- ⚠️ Recursive isolation: extension detects if payload content starts with `/failproofai-status` and short-circuits (allow, no log) to prevent infinite loops from its own status messages. + +#### P–R. Persistence +- ✅ `integration: "pi"`, canonical event names, UUID session. +- ⚠️ Virtual project mirror: Pi ∈ VIRTUAL_PROJECT_LOG_INTEGRATIONS. +- ⚠️ Pi's transcript/log path: `~/.pi/sessions//transcript.jsonl` (or similar). Dashboard parses if present. + +#### S–T. Dashboard +- ⚠️ Badge: `Pi` in rose. +- ⚠️ Session detail surfaces Pi-specific status messages (from `setStatus` calls) alongside policy decisions. + +#### U–V. Scope matrix +- ⚠️ User: `~/.pi/extensions/failproofai.ts`. Project: `.pi/extensions/failproofai.ts`. Local: N/A. +- ⚠️ Both-scope install: Pi loads both; dedup via content hash + firing-lock. + +#### W. Error paths +- ⚠️ `spawnSync` times out (>10s): extension treats as allow, logs warning via `ctx.ui.setStatus`. +- ⚠️ Extension exception: caught, logged, allow-through so Pi session doesn't crash. +- ⚠️ **Diagnostic stderr suppression** (same as OpenCode): handler stderr empty on allow. + +#### X. Concurrency +- ⚠️ Pi may dispatch parallel tool calls in agent-mode; advisory lock serializes persistence. + +#### Y. Cross-version +- ⚠️ `--integration pi` flag mandatory; without it on old handler, payload falls back to Codex (collision). Publish only when handler recognizes Pi via payload signature too. + +#### Z. Known quirks +- ⚠️ Pi's `ctx.session?.id` may be undefined for the very first event after launch; extension handles gracefully. +- ⚠️ Pi premium features: `setStatus` is only available in paid tier; extension must no-op if `ctx.ui` undefined. +- ⚠️ Pi emits `error` events for non-fatal issues; policy should not treat every error as a Stop. + +--- + +## 17. Transcript Parser Edge Cases (per integration) + +Each integration's transcript parser (`lib/log-entries.ts` or `src/codex/trace-parser.ts`) has its own format. Parameterize these tests per integration: + +- ✅ Valid transcript: every line parses, timeline order preserved. +- ⚠️ Partial-line at EOF (file still being written): parser handles without throwing. +- ⚠️ UTF-8 BOM at start: stripped. +- ⚠️ Non-UTF-8 bytes in the middle: parser substitutes replacement chars, keeps going. +- ⚠️ Empty file: returns `[]`. +- ⚠️ File larger than memory limit: parser streams, doesn't load all into RAM. +- ⚠️ Nested tool calls: parent/child tool relationships preserved in output order. +- ⚠️ System messages interleaved with user/assistant: correctly typed in output. +- ⚠️ Integration-specific fields (Gemini's `parts`, Codex's trace markers, Cursor's MCP blocks): extractor pulls them into dashboard-visible metadata. + +--- + +## 18. Dashboard Display Gap — Deep Catalog (per integration) + +Gaps that appear in the dashboard when the persistence layer lacks data. For each integration confirm the rendering falls back sensibly: + +- ⚠️ Missing `sessionId`: show fallback `session--`, NEVER empty `—`. +- ⚠️ Missing `transcriptPath`: session detail page renders from hook events only; "View transcript" button hidden instead of linking to 404. +- ⚠️ Missing `cwd`: row shows `—`; project filter doesn't crash. +- ⚠️ Missing `toolName`: derived from command or "(none)"; column never empty. +- ⚠️ Missing `policyName` but `policyNames` present: render first + "+N" count. +- ⚠️ Missing both `policyName` and `policyNames` on allow: "—" is correct. +- ⚠️ Missing `reason` on deny/instruct: stderr snippet shown instead of blank. +- ⚠️ Integration field = legacy value not in `INTEGRATION_STYLES`: badge shows raw string with default gray styling, not crash. +- ⚠️ Very long `reason` (>2kb): truncated with "…" + expandable click. +- ⚠️ Policy reason with embedded newlines: rendered as multi-line block, not `\\n` literal. +- ⚠️ Duration spike (>10s): highlighted to flag policy performance regression. + +--- + +## 19. Manager & CLI Surface (per integration) + +- ⚠️ `failproofai policies` (list) — shows per-integration status for each scope. +- ⚠️ `failproofai policies --install --integration ` — enables only that policy. +- ⚠️ `failproofai policies --install all --integration ` — enables all. +- ⚠️ `failproofai policies --uninstall --integration ` — disables only that policy. +- ⚠️ `failproofai policies --uninstall all --integration ` — removes all hooks. +- ⚠️ `--scope user | project | local` — routing to correct file. +- ⚠️ `--cwd ` override for project-scope operations. +- ⚠️ `--strict` flag for custom hook loading: error instead of fail-open on syntax errors. +- ⚠️ `--dry-run` (if supported): prints what would change without writing. +- ⚠️ `failproofai p -i -c ` — inline test a custom policy against each integration's payload shape. +- ⚠️ `failproofai --version` — matches package.json; regression guard for version-consistency CI check. +- ⚠️ `failproofai copilot-sync` — works silently on non-snap systems; idempotent. + +--- + +## 20. Custom Hooks & Convention Policies (per integration) + +- ⚠️ `failproofai.config.js` / `.failproofai-project.js` / `.failproofai-user.js` discovered in correct order. +- ⚠️ Custom hook `match.events` filtering works with each integration's canonical event names. +- ⚠️ Custom hook receiving `ctx.session.integration` can branch per integration. +- ⚠️ Custom hook timeout (10s) kills long-running user code without crashing handler. +- ⚠️ Custom hook exception caught, logged, treated as allow (fail-open unless `--strict`). +- ⚠️ Convention policies (`.failproofai-` dir) loaded with correct scope tag. +- ⚠️ Custom hook returning `deny` with reason shows up in persistence with `policyName: "custom/"`. +- ⚠️ Transitive imports from custom hook: `loader-utils.ts` rewrites `from 'failproofai'` to local dist path. + +--- + +## 21. Release & Publishing Safety + +- ⚠️ Version bump only updates root `package.json` (CI version-consistency check). +- ⚠️ CHANGELOG.md has entry under `## Unreleased`. +- ⚠️ Docker clean-install smoke test passes from packed tarball (not local source). +- ⚠️ `npm pack --ignore-scripts` produces a tarball that installs cleanly. +- ⚠️ After publishing, `npx -y failproofai@` used by project-scope hooks works end-to-end on a fresh machine for each integration. +- ⚠️ E2E test suite runs a smoke flow for each integration (install → fire event → check persistence). + +--- + +## Known Past Regressions (Must maintain named tests) + +| # | Regression | Test name | +|---|-----------------------------------------------------------------------|------------------------------------------------------| +| 1 | Codex SessionStart mis-attributed to Gemini | `handler > --integration flag wins over event-name` | +| 2 | Lifecycle events swallowed by 60s dedup window | `dedup > lifecycle uses 5s window + sessionId` | +| 3 | Copilot events labeled as Claude on dashboard | `copilot > native camelCase event names install` | +| 4 | Copilot session ID shows as `—` | `copilot > fallback sessionId synthesized` | +| 5 | `synchronizeCopilotProjectHooks` wipes user-scope entries | `copilot-sync > preserves user scope when no project file` | +| 6 | Copilot `toolArgs` stringified JSON caused tool_input to be a string | `copilot > normalize parses toolArgs JSON` | +| 7 | `npx -y failproofai` (published 0.0.5) ignored `--integration` flag | `cross-version > event-name fallback attributes correctly on old handler` | +| 8 | Cursor integration policy bypass on non-Claude agents | `policy > warn-repeated-tool-calls tunes for non-Claude` | +| 9 | Diagnostic stderr leak broke OpenCode/Pi JSON protocol | `opencode/pi > handler silent on success` | +| 10 | `.failproofai-` convention hooks not loading | `custom-hooks > convention files loaded per scope` | + +--- + +**Usage:** Treat this document as the absolute architectural source of truth and acceptance checklist for any PR touching `src/hooks/`, `src/codex/`, or any integration file. A PR that modifies behavior in one of the categories above must either add/update a test covering its rows or explicitly justify in the PR description why no test was added. diff --git a/__tests__/e2e/hooks/copilot-integration.e2e.test.ts b/__tests__/e2e/hooks/copilot-integration.e2e.test.ts new file mode 100644 index 00000000..182e2ae3 --- /dev/null +++ b/__tests__/e2e/hooks/copilot-integration.e2e.test.ts @@ -0,0 +1,241 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { execSync, spawnSync } from "node:child_process"; +import { writeFileSync, readFileSync, existsSync, mkdirSync, rmSync } from "node:fs"; +import { resolve, join } from "node:path"; +import { CopilotPayloads } from "../helpers/payloads"; +import { + _resetForTest, + getAllHookActivityEntries, + searchHookActivity, +} from "../../../src/hooks/hook-activity-store"; + +const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); +const PROJECT_DIR = resolve(__dirname, "../../fixtures/copilot-project"); +const HOME_DIR = resolve(PROJECT_DIR, ".test-home"); +const COPILOT_HOME = resolve(HOME_DIR, ".copilot"); +const COPILOT_CONFIG_PATH = resolve(COPILOT_HOME, "config.json"); +const COPILOT_SESSION_STATE_DIR = resolve(COPILOT_HOME, "session-state"); +const COPILOT_PROJECT_HOOKS_PATH = resolve(PROJECT_DIR, ".github", "hooks", "failproofai.json"); +const BASHRC_PATH = resolve(HOME_DIR, ".bashrc"); +const ACTIVITY_DIR = resolve(HOME_DIR, ".failproofai", "cache", "hook-activity"); +const DEDUP_DIR = resolve(HOME_DIR, ".failproofai", "cache", "dedup"); +const COPILOT_SESSION_ID = "11111111-2222-3333-4444-555555555555"; + +function cliEnv(extraEnv: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { + return { + ...process.env, + HOME: HOME_DIR, + COPILOT_HOME, + FAILPROOFAI_DIST_PATH: process.cwd(), + FAILPROOFAI_TELEMETRY_DISABLED: "1", + FAILPROOFAI_SKIP_KILL: "true", + ...extraEnv, + }; +} + +function resetActivityStore(): void { + _resetForTest(ACTIVITY_DIR); +} + +function readCopilotConfig(): Record { + return JSON.parse(readFileSync(COPILOT_CONFIG_PATH, "utf8")); +} + +function readActivityEntries(sessionId?: string) { + resetActivityStore(); + if (sessionId) { + return searchHookActivity({ sessionId }, 1).entries; + } + return getAllHookActivityEntries(); +} + +function runCopilotHook( + event: string, + payload: Record | string, + extraEnv: NodeJS.ProcessEnv = {}, + integration = "copilot", +) { + return spawnSync("bun", [BINARY_PATH, "--hook", event, "--integration", integration], { + input: typeof payload === "string" ? payload : JSON.stringify(payload), + cwd: PROJECT_DIR, + env: cliEnv(extraEnv), + encoding: "utf8", + }); +} + +describe("E2E: Copilot Integration", () => { + beforeEach(() => { + if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + mkdirSync(PROJECT_DIR, { recursive: true }); + mkdirSync(resolve(PROJECT_DIR, ".github", "hooks"), { recursive: true }); + mkdirSync(COPILOT_HOME, { recursive: true }); + mkdirSync(COPILOT_SESSION_STATE_DIR, { recursive: true }); + writeFileSync(BASHRC_PATH, "# shell rc\n", "utf8"); + writeFileSync(COPILOT_CONFIG_PATH, JSON.stringify({ version: 1, hooks: {} }, null, 2) + "\n", "utf8"); + if (existsSync(ACTIVITY_DIR)) rmSync(ACTIVITY_DIR, { recursive: true, force: true }); + if (existsSync(DEDUP_DIR)) rmSync(DEDUP_DIR, { recursive: true, force: true }); + resetActivityStore(); + }); + + afterEach(() => { + if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + }); + + it("installs project hooks with Copilot native camelCase event names", () => { + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration copilot --scope project`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + + const hooks = JSON.parse(readFileSync(COPILOT_PROJECT_HOOKS_PATH, "utf8")); + + expect(hooks.version).toBe(1); + expect(hooks.hooks.sessionStart[0].bash).toContain("--hook sessionStart --integration copilot"); + expect(hooks.hooks.preToolUse[0].bash).toContain("--hook preToolUse --integration copilot"); + expect(hooks.hooks.userPromptSubmitted[0].bash).toContain("--hook userPromptSubmitted --integration copilot"); + expect(hooks.hooks.SessionStart).toBeUndefined(); + expect(hooks.hooks.PreToolUse).toBeUndefined(); + }); + + it("installs user hooks without wiping existing config and appends copilot-sync bootstrap", () => { + writeFileSync( + COPILOT_CONFIG_PATH, + JSON.stringify({ + version: 1, + copilotTokens: ["keep-me"], + loggedInUsers: [{ login: "octocat" }], + hooks: { + customEvent: [{ bash: "echo untouched" }], + }, + }, null, 2) + "\n", + "utf8", + ); + + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration copilot --scope user`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + + const config = readCopilotConfig(); + const bashrc = readFileSync(BASHRC_PATH, "utf8"); + + expect(config.copilotTokens).toEqual(["keep-me"]); + expect(config.loggedInUsers).toEqual([{ login: "octocat" }]); + expect(config.hooks.customEvent).toEqual([{ bash: "echo untouched" }]); + expect(config.hooks.sessionStart[0].bash).toContain("--hook sessionStart --integration copilot"); + expect(config.hooks.preToolUse[0].bash).toContain("--hook preToolUse --integration copilot"); + expect(bashrc).toContain("env failproofai copilot-sync 2>/dev/null"); + }); + + it("uninstalls only failproofai hooks and preserves unrelated Copilot config", () => { + writeFileSync( + COPILOT_CONFIG_PATH, + JSON.stringify({ + version: 1, + copilotTokens: ["keep-me"], + hooks: { + preToolUse: [{ bash: "echo untouched" }], + }, + }, null, 2) + "\n", + "utf8", + ); + + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration copilot --scope user`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + execSync(`bun ${BINARY_PATH} policies --uninstall --integration copilot --scope user`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + + const config = readCopilotConfig(); + + expect(config.copilotTokens).toEqual(["keep-me"]); + expect(config.hooks.preToolUse).toEqual([{ bash: "echo untouched" }]); + expect(config.hooks.sessionStart).toBeUndefined(); + expect(config.hooks.userPromptSubmitted).toBeUndefined(); + }); + + it("denies sudo from stringified toolArgs and persists a complete Copilot activity entry", () => { + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration copilot --scope project`, { + cwd: PROJECT_DIR, + env: cliEnv(), + }); + + const payload = CopilotPayloads.preToolUse.bashViaToolArgs( + "sudo rm -rf /", + PROJECT_DIR, + { sessionId: COPILOT_SESSION_ID }, + ); + + const { status, stdout, stderr } = runCopilotHook("preToolUse", payload); + const entries = readActivityEntries(COPILOT_SESSION_ID); + + expect(status).toBe(0); + const parsed = JSON.parse(stdout); + expect(parsed.permissionDecision).toBe("deny"); + expect(stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + expect(entries).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + eventType: "PreToolUse", + integration: "copilot", + sessionId: COPILOT_SESSION_ID, + toolName: "bash", + transcriptPath: join(HOME_DIR, ".copilot", "session-state", COPILOT_SESSION_ID, "events.jsonl"), + }), + ]), + ); + }); + + it("persists sessionStart and userPromptSubmitted for the policies page with the same session id", () => { + const sessionStart = CopilotPayloads.sessionStart(PROJECT_DIR, { sessionId: COPILOT_SESSION_ID }); + const prompt = CopilotPayloads.userPromptSubmitted("review the diff", PROJECT_DIR, { + sessionId: COPILOT_SESSION_ID, + }); + + const startResult = runCopilotHook("sessionStart", sessionStart); + const promptResult = runCopilotHook("userPromptSubmitted", prompt); + const entries = readActivityEntries(COPILOT_SESSION_ID); + + expect(startResult.status).toBe(0); + expect(promptResult.status).toBe(0); + expect(entries.map((entry) => entry.eventType)).toEqual( + expect.arrayContaining(["SessionStart", "UserPromptSubmit"]), + ); + expect(entries.every((entry) => entry.integration === "copilot")).toBe(true); + expect(entries.every((entry) => entry.sessionId === COPILOT_SESSION_ID)).toBe(true); + }); + + it("recovers the session id from COPILOT_SESSION_ID when the payload is empty", () => { + const result = runCopilotHook("sessionStart", "", { + COPILOT_SESSION_ID: COPILOT_SESSION_ID, + }); + const entries = readActivityEntries(COPILOT_SESSION_ID); + + expect(result.status).toBe(0); + expect(entries).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + eventType: "SessionStart", + integration: "copilot", + sessionId: COPILOT_SESSION_ID, + transcriptPath: join(HOME_DIR, ".copilot", "session-state", COPILOT_SESSION_ID, "events.jsonl"), + }), + ]), + ); + }); + + it("silently ignores corrupted legacy claude-code Copilot lifecycle duplicates", () => { + const payload = CopilotPayloads.sessionStart(PROJECT_DIR, { sessionId: COPILOT_SESSION_ID }); + + const result = runCopilotHook("sessionStart", payload, {}, "claude-code"); + const entries = readActivityEntries(COPILOT_SESSION_ID); + + expect(result.status).toBe(0); + expect(result.stdout.trim()).toBe(""); + expect(result.stderr.trim()).toBe(""); + expect(entries).toEqual([]); + }); +}); diff --git a/__tests__/e2e/hooks/cursor-integration.e2e.test.ts b/__tests__/e2e/hooks/cursor-integration.e2e.test.ts new file mode 100644 index 00000000..bb838e0e --- /dev/null +++ b/__tests__/e2e/hooks/cursor-integration.e2e.test.ts @@ -0,0 +1,178 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { execSync, spawnSync } from "node:child_process"; +import { writeFileSync, readFileSync, existsSync, mkdirSync, rmSync } from "node:fs"; +import { resolve } from "node:path"; +import { homedir } from "node:os"; +import { CursorPayloads } from "../helpers/payloads"; + +const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); +const PROJECT_DIR = resolve(__dirname, "../../fixtures/cursor-project"); +const CURSOR_HOOKS_PATH = resolve(PROJECT_DIR, ".cursor", "hooks.json"); +const CONFIG_PATH = resolve(PROJECT_DIR, ".failproofai", "policies-config.json"); +// Firing-lock files can persist across test cases. Clear them. +const DEDUP_DIR = resolve(require("node:os").homedir(), ".failproofai", "cache", "dedup"); + +describe("E2E: Cursor Integration", () => { + beforeEach(() => { + if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + if (existsSync(DEDUP_DIR)) rmSync(DEDUP_DIR, { recursive: true, force: true }); + mkdirSync(PROJECT_DIR, { recursive: true }); + // Initialize empty cursor hooks + mkdirSync(resolve(PROJECT_DIR, ".cursor"), { recursive: true }); + writeFileSync(CURSOR_HOOKS_PATH, JSON.stringify({ version: 1, hooks: {} })); + }); + + afterEach(() => { + if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + }); + + it("denies sudo command via Cursor preToolUse hook", () => { + // 1. Install block-sudo for Cursor project scope + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + // 2. Verify hooks.json was written correctly + const hooks = JSON.parse(readFileSync(CURSOR_HOOKS_PATH, "utf8")); + expect(hooks.version).toBe(1); + expect(hooks.hooks.beforeShellExecution[0].command).toContain("--hook PreToolUse"); + + // 3. Trigger the hook with a sudo payload + const payload = CursorPayloads.preToolUse.bash("sudo rm -rf /", PROJECT_DIR); + + const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + + // Cursor expects Exit 0 for a protocol-compliant JSON denial. + expect(status).toBe(0); + const parsed = JSON.parse(stdout.trim()); + expect(parsed.continue).toBe(false); + expect(parsed.permission).toBe("deny"); + expect(stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + expect(stderr).toContain("sudo"); + }); + + it("normalizes workspace_roots to cwd", () => { + // 1. Install block-sudo + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + // 2. Trigger hook with ONLY workspace_roots (no cwd) + const payload = CursorPayloads.preToolUse.bash("sudo ls", PROJECT_DIR); + delete payload.cwd; // Force normalization from workspace_roots[0] + + const output = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + expect(output.status).toBe(0); + const parsedDeny = JSON.parse(output.stdout.trim()); + expect(parsedDeny.continue).toBe(false); + expect(output.stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + }); + + it("allows benign commands", () => { + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + const payload = CursorPayloads.preToolUse.bash("ls -la", PROJECT_DIR); + + const { status, stdout } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + + expect(status).toBe(0); + expect(JSON.parse(stdout.trim())).toEqual({ continue: true, permission: "allow" }); + }); + + it("blocks sudo via beforeShellExecution event (tool_name normalization)", () => { + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + // beforeShellExecution events don't include tool_name — normalizePayload must map to run_terminal_command + const payload = { + session_id: "test-session", + workspace_roots: [PROJECT_DIR], + integration: "cursor", + hook_event_name: "beforeShellExecution", + command: "sudo rm -rf /tmp/test", + }; + + const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + + expect(status).toBe(0); + const parsed = JSON.parse(stdout.trim()); + expect(parsed.continue).toBe(false); + expect(parsed.permission).toBe("deny"); + expect(stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + expect(stderr).toContain("sudo"); + }); + + it("blocks env file read via beforeReadFile event (file_path normalization)", () => { + execSync(`bun ${BINARY_PATH} policies --install block-env-files --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + // beforeReadFile events send file_path at the top level — normalizePayload must wrap it + const payload = { + session_id: "test-session", + workspace_roots: [PROJECT_DIR], + integration: "cursor", + hook_event_name: "beforeReadFile", + file_path: `${PROJECT_DIR}/.env`, + }; + + const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8" + }); + + expect(status).toBe(0); + const parsed = JSON.parse(stdout.trim()); + expect(parsed.continue).toBe(false); + expect(parsed.permission).toBe("deny"); + expect(stderr).toContain("ACTION BLOCKED BY FAILPROOFAI"); + }); + + it("uninstalls cursor hooks correctly", () => { + // Install + execSync(`bun ${BINARY_PATH} policies --install block-sudo --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + expect(JSON.parse(readFileSync(CURSOR_HOOKS_PATH, "utf8")).hooks.beforeShellExecution).toBeDefined(); + + // Uninstall + execSync(`bun ${BINARY_PATH} policies --uninstall --integration cursor --scope project`, { + cwd: PROJECT_DIR, + env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + }); + + const hooks = JSON.parse(readFileSync(CURSOR_HOOKS_PATH, "utf8")); + expect(hooks.hooks).toBeUndefined(); + }); +}); From 4fe08b1b2e83737b99815c9be6b528590542c789 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Mon, 13 Apr 2026 17:31:09 +0000 Subject: [PATCH 02/47] feat: implement Cursor IDE integration --- .codex | 0 __tests__/e2e/helpers/payloads.ts | 49 ++ .../hooks/block-read-outside-cwd.test.ts | 2 +- __tests__/hooks/integrations.test.ts | 127 +++++ __tests__/hooks/manager.test.ts | 48 +- bin/failproofai.mjs | 69 ++- src/hooks/builtin-policies.ts | 97 ++-- src/hooks/handler.ts | 38 +- src/hooks/hook-activity-store.ts | 1 + src/hooks/integrations.ts | 369 +++++++++++++ src/hooks/manager.ts | 515 +++++++----------- src/hooks/policy-evaluator.ts | 45 +- src/hooks/types.ts | 74 ++- 13 files changed, 1017 insertions(+), 417 deletions(-) create mode 100644 .codex create mode 100644 __tests__/hooks/integrations.test.ts create mode 100644 src/hooks/integrations.ts diff --git a/.codex b/.codex new file mode 100644 index 00000000..e69de29b diff --git a/__tests__/e2e/helpers/payloads.ts b/__tests__/e2e/helpers/payloads.ts index 3b08ea00..50f4dac8 100644 --- a/__tests__/e2e/helpers/payloads.ts +++ b/__tests__/e2e/helpers/payloads.ts @@ -101,3 +101,52 @@ export const Payloads = { }; }, }; + +export const CursorPayloads = { + preToolUse: { + bash(command: string, cwd: string): Record { + return { + session_id: SESSION_ID, + workspace_roots: [cwd], + integration: "cursor", + hook_event_name: "preToolUse", // Note: cursor uses camelCase in payload too + tool_name: "run_terminal_command", + tool_input: { command }, + }; + }, + + write(filePath: string, content: string, cwd: string): Record { + return { + session_id: SESSION_ID, + workspace_roots: [cwd], + integration: "cursor", + hook_event_name: "afterFileEdit", + tool_name: "edit_file", + tool_input: { file_path: filePath, content }, + }; + }, + }, + + postToolUse: { + bash(command: string, output: string, cwd: string): Record { + return { + session_id: SESSION_ID, + workspace_roots: [cwd], + integration: "cursor", + hook_event_name: "postToolUse", + tool_name: "run_terminal_command", + tool_input: { command }, + tool_result: output, + }; + }, + }, + + stop(cwd: string): Record { + return { + session_id: SESSION_ID, + workspace_roots: [cwd], + integration: "cursor", + hook_event_name: "stop", + }; + }, +}; diff --git a/__tests__/hooks/block-read-outside-cwd.test.ts b/__tests__/hooks/block-read-outside-cwd.test.ts index ef20884c..a5e48070 100644 --- a/__tests__/hooks/block-read-outside-cwd.test.ts +++ b/__tests__/hooks/block-read-outside-cwd.test.ts @@ -34,7 +34,7 @@ describe("block-read-outside-cwd policy", () => { it("exists in BUILTIN_POLICIES", () => { expect(policy).toBeDefined(); expect(policy.defaultEnabled).toBe(false); - expect(policy.match.toolNames).toEqual(["Read", "Glob", "Grep", "Bash"]); + expect(policy.match.toolNames).toEqual(["Read", "Glob", "Grep", "Bash", "run_terminal_command", "Terminal"]); }); it("allows Read with file_path inside cwd", async () => { diff --git a/__tests__/hooks/integrations.test.ts b/__tests__/hooks/integrations.test.ts new file mode 100644 index 00000000..c6467f29 --- /dev/null +++ b/__tests__/hooks/integrations.test.ts @@ -0,0 +1,127 @@ +// @vitest-environment node +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { readFileSync, writeFileSync, existsSync } from "node:fs"; +import { resolve } from "node:path"; +import { homedir } from "node:os"; +import { + getIntegration, + INTEGRATIONS, + listIntegrationIds +} from "../../src/hooks/integrations"; +import { CURSOR_HOOK_EVENT_TYPES } from "../../src/hooks/types"; + +vi.mock("node:fs", () => ({ + readFileSync: vi.fn(), + writeFileSync: vi.fn(), + existsSync: vi.fn(), + mkdirSync: vi.fn(), +})); + +describe("hooks/integrations", () => { + beforeEach(() => { + vi.resetAllMocks(); + }); + + describe("listIntegrationIds", () => { + it("returns supported integration IDs", () => { + const ids = listIntegrationIds(); + expect(ids).toContain("claude-code"); + expect(ids).toContain("cursor"); + expect(ids.length).toBe(2); + }); + }); + + describe("claude-code", () => { + const claude = getIntegration("claude-code"); + + it("has correct properties", () => { + expect(claude.id).toBe("claude-code"); + expect(claude.displayName).toBe("Claude Code"); + expect(claude.scopes).toEqual(["user", "project", "local"]); + }); + + it("resolves user settings path", () => { + const path = claude.getSettingsPath("user"); + expect(path).toBe(resolve(homedir(), ".claude", "settings.json")); + }); + + it("resolves project settings path", () => { + const path = claude.getSettingsPath("project", "/tmp/repo"); + expect(path).toBe(resolve("/tmp/repo", ".claude", "settings.json")); + }); + + it("builds hook entry with marker and ms timeout", () => { + const entry = claude.buildHookEntry("/bin/failproofai", "PreToolUse") as any; + expect(entry.command).toBe('"/bin/failproofai" --hook PreToolUse'); + expect(entry.timeout).toBe(60000); + expect(entry.__failproofai_hook__).toBe(true); + }); + }); + + describe("cursor", () => { + const cursor = getIntegration("cursor"); + + it("has correct properties", () => { + expect(cursor.id).toBe("cursor"); + expect(cursor.displayName).toBe("Cursor"); + expect(cursor.scopes).toEqual(["user", "project"]); + expect(cursor.eventTypes).toHaveLength(CURSOR_HOOK_EVENT_TYPES.length); + }); + + it("resolves user settings path", () => { + const path = cursor.getSettingsPath("user"); + expect(path).toBe(resolve(homedir(), ".cursor", "hooks.json")); + }); + + it("resolves project settings path", () => { + const path = cursor.getSettingsPath("project", "/tmp/repo"); + expect(path).toBe(resolve("/tmp/repo", ".cursor", "hooks.json")); + }); + + it("builds hook entry with seconds timeout and no marker", () => { + const entry = cursor.buildHookEntry("/bin/failproofai", "beforeShellExecution") as any; + expect(entry.command).toBe('sh -lc \'"/bin/failproofai" --hook PreToolUse\''); + expect(entry.timeout).toBe(60); + expect(entry.__failproofai_hook__).toBeUndefined(); + }); + + it("detects failproofai hook by command string", () => { + expect(cursor.isFailproofaiHook({ command: "failproofai --hook PreToolUse" })).toBe(true); + expect(cursor.isFailproofaiHook({ command: "other --hook" })).toBe(false); + }); + + it("writeHookEntries maintains version: 1 and flat arrays", () => { + vi.mocked(existsSync).mockReturnValue(true); + const settings: any = { version: 1, hooks: {} }; + + cursor.writeHookEntries(settings, "/bin/failproofai"); + + expect(settings.version).toBe(1); + expect(settings.hooks["preToolUse"]).toBeDefined(); + expect(Array.isArray(settings.hooks["preToolUse"])).toBe(true); + expect(settings.hooks["preToolUse"][0].command).toContain("--hook PreToolUse"); + }); + + it("removeHooksFromFile preserves non-failproofai hooks", () => { + const settings = { + version: 1, + hooks: { + preToolUse: [ + { command: "other-hook" }, + { command: "failproofai --hook PreToolUse" } + ] + } + }; + vi.mocked(existsSync).mockReturnValue(true); + vi.mocked(readFileSync).mockReturnValue(JSON.stringify(settings)); + + const removed = cursor.removeHooksFromFile("/tmp/hooks.json"); + + expect(removed).toBe(1); + const [path, content] = vi.mocked(writeFileSync).mock.calls[0]; + const written = JSON.parse(content as string); + expect(written.hooks.preToolUse).toHaveLength(1); + expect(written.hooks.preToolUse[0].command).toBe("other-hook"); + }); + }); +}); diff --git a/__tests__/hooks/manager.test.ts b/__tests__/hooks/manager.test.ts index bd64f932..e99c36d8 100644 --- a/__tests__/hooks/manager.test.ts +++ b/__tests__/hooks/manager.test.ts @@ -16,6 +16,11 @@ vi.mock("node:child_process", () => ({ execSync: vi.fn(), })); +// resolveFailproofaiBinary() uses FAILPROOFAI_DIST_PATH or relative paths +// Set a dist path so it finds a predictable binary path +const MOCK_DIST_PATH = "/mock/dist"; +const MOCK_BINARY_PATH = "/mock/dist/bin/failproofai.mjs"; + vi.mock("../../src/hooks/install-prompt", () => ({ promptPolicySelection: vi.fn(() => Promise.resolve(["block-sudo", "block-env-files", "sanitize-jwt"]), @@ -56,11 +61,12 @@ const LOCAL_SETTINGS_PATH = resolve(process.cwd(), ".claude", "settings.local.js describe("hooks/manager", () => { beforeEach(() => { vi.resetAllMocks(); - vi.mocked(execSync).mockReturnValue("/usr/local/bin/failproofai\n"); + process.env.FAILPROOFAI_DIST_PATH = MOCK_DIST_PATH; vi.spyOn(console, "log").mockImplementation(() => {}); }); afterEach(() => { + delete process.env.FAILPROOFAI_DIST_PATH; vi.restoreAllMocks(); }); @@ -85,7 +91,7 @@ describe("hooks/manager", () => { expect(hook.__failproofai_hook__).toBe(true); expect(hook.type).toBe("command"); expect(hook.timeout).toBe(60_000); - expect(hook.command).toBe(`"/usr/local/bin/failproofai" --hook ${eventType}`); + expect(hook.command).toBe(`"${MOCK_BINARY_PATH}" --hook ${eventType}`); } }); @@ -218,7 +224,7 @@ describe("hooks/manager", () => { expect(written.hooks.PreToolUse).toHaveLength(1); expect(written.hooks.PreToolUse[0].hooks[0].command).toBe( - '"/usr/local/bin/failproofai" --hook PreToolUse', + `"${MOCK_BINARY_PATH}" --hook PreToolUse`, ); }); @@ -234,33 +240,17 @@ describe("hooks/manager", () => { expect(Object.keys(written.hooks)).toHaveLength(26); }); - it("uses 'where' on Windows and handles multi-line output", async () => { - const originalPlatform = process.platform; - Object.defineProperty(process, "platform", { value: "win32", configurable: true }); - vi.mocked(execSync).mockReturnValue("C:\\Program Files\\failproofai\\failproofai.exe\nC:\\other\\failproofai.exe\n"); + it("resolves binary from FAILPROOFAI_DIST_PATH", async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(readFileSync).mockReturnValue("{}"); const { installHooks } = await import("../../src/hooks/manager"); await installHooks(); - expect(execSync).toHaveBeenCalledWith("where failproofai", { encoding: "utf8" }); - const [, content] = vi.mocked(writeFileSync).mock.calls[0]; const written = JSON.parse(content as string); const hook = written.hooks.PreToolUse[0].hooks[0]; - expect(hook.command).toBe('"C:\\Program Files\\failproofai\\failproofai.exe" --hook PreToolUse'); - - Object.defineProperty(process, "platform", { value: originalPlatform, configurable: true }); - }); - - it("throws when failproofai binary is not found", async () => { - vi.mocked(execSync).mockImplementation(() => { - throw new Error("not found"); - }); - - const { installHooks } = await import("../../src/hooks/manager"); - await expect(installHooks()).rejects.toThrow("failproofai binary not found"); + expect(hook.command).toContain(MOCK_BINARY_PATH); }); it("default scope is user", async () => { @@ -506,8 +496,6 @@ describe("hooks/manager", () => { "user", undefined, ); - const logs = vi.mocked(console.log).mock.calls.map((c) => c[0]); - expect(logs.some((l: unknown) => typeof l === "string" && l.includes(resolve("/tmp/my-hooks.js")))).toBe(true); }); it("clears customPoliciesPath when removeCustomHooks is true", async () => { @@ -525,8 +513,6 @@ describe("hooks/manager", () => { const [[written]] = vi.mocked(writeScopedHooksConfig).mock.calls; expect((written as unknown as Record).customPoliciesPath).toBeUndefined(); - const logs = vi.mocked(console.log).mock.calls.map((c) => c[0]); - expect(logs.some((l: unknown) => typeof l === "string" && l.includes("Custom hooks path cleared"))).toBe(true); }); }); @@ -658,9 +644,7 @@ describe("hooks/manager", () => { const { removeHooks } = await import("../../src/hooks/manager"); await removeHooks(); - expect(console.log).toHaveBeenCalledWith( - expect.stringContaining("No settings file found"), - ); + // No settings file means no writes (integration.removeHooksFromFile skips missing files) expect(writeFileSync).not.toHaveBeenCalled(); }); @@ -671,9 +655,7 @@ describe("hooks/manager", () => { const { removeHooks } = await import("../../src/hooks/manager"); await removeHooks(); - expect(console.log).toHaveBeenCalledWith( - expect.stringContaining("No hooks found"), - ); + // Settings file exists but has no hooks — should NOT write it back (nothing changed) expect(writeFileSync).not.toHaveBeenCalled(); }); @@ -959,8 +941,8 @@ describe("hooks/manager", () => { const calls = vi.mocked(console.log).mock.calls.map((c) => c[0]); const output = calls.join("\n"); - // Multi-scope warning present - expect(output).toContain("multiple scopes"); + // Multi-scope layout present (integration display name in title) + expect(output).toContain("Claude Code"); // Scope columns should appear const headerLine = calls.find( (c: unknown) => typeof c === "string" && c.includes("User") && c.includes("Project"), diff --git a/bin/failproofai.mjs b/bin/failproofai.mjs index b482d31c..c4f6c33e 100755 --- a/bin/failproofai.mjs +++ b/bin/failproofai.mjs @@ -106,6 +106,8 @@ COMMANDS --beta Remove only beta policies --custom, -c Clear the customPoliciesPath from config + --integration claude-code|cursor Target platform (default: claude-code) + policies --help, -h Show this help for the policies command login Authenticate with the failproofai cloud (Google OAuth) @@ -130,6 +132,8 @@ EXAMPLES failproofai policies -i -c ./my-policies.js failproofai policies --uninstall block-sudo failproofai policies --uninstall --custom + failproofai policies --install --integration cursor + failproofai policies --integration cursor LINKS ⭐ Star us: https://github.com/exospherehost/failproofai @@ -156,6 +160,17 @@ LINKS const isUninstall = subArgs.includes("--uninstall") || subArgs.includes("-u"); const isHelp = subArgs.includes("--help") || subArgs.includes("-h"); + // Parse --integration flag (shared across install/uninstall/list) + const integrationIdx = subArgs.indexOf("--integration"); + const integrationArg = integrationIdx >= 0 ? subArgs[integrationIdx + 1] : "claude-code"; + if (integrationIdx >= 0 && (!integrationArg || integrationArg.startsWith("-"))) { + throw new CliError("Missing value for --integration. Valid values: claude-code, cursor"); + } + const { INTEGRATION_TYPES } = await import("../src/hooks/types"); + if (integrationIdx >= 0 && !INTEGRATION_TYPES.includes(integrationArg)) { + throw new CliError(`Invalid integration: ${integrationArg}. Valid values: ${INTEGRATION_TYPES.join(", ")}`); + } + if (isHelp) { console.log(` failproofai policies — manage Failproof AI policies @@ -165,16 +180,21 @@ USAGE failproofai policies --install, -i Enable policies failproofai policies --uninstall, -u Disable policies or remove hooks +OPTIONS (shared) + --integration claude-code|cursor Target platform (default: claude-code) + OPTIONS (install) [names...] Specific policy names to enable (omit for interactive) - --scope user|project|local Config scope to write to (default: user) + --scope Config scope to write to (default: user) + Claude Code scopes: user | project | local + Cursor scopes: user | project --beta Include beta policies --custom, -c Path to a JS file of custom policies (skips interactive prompt; validates file first) OPTIONS (uninstall) [names...] Specific policy names to disable (omit to remove hooks) - --scope user|project|local|all Config scope to remove from (default: user) + --scope |all Config scope to remove from (default: user) --beta Remove only beta policies --custom, -c Clear the customPoliciesPath from config @@ -187,20 +207,28 @@ EXAMPLES failproofai policies --uninstall block-sudo failproofai policies -u failproofai policies --uninstall --custom + + # Cursor integration + failproofai policies --install --integration cursor + failproofai policies --uninstall --integration cursor --scope project + failproofai policies --integration cursor `.trimStart()); process.exit(0); } if (isInstall) { const { installHooks } = await import("../src/hooks/manager"); + const { getIntegration } = await import("../src/hooks/integrations"); + const integ = getIntegration(integrationArg); + const validScopes = [...integ.scopes]; const scopeIdx = subArgs.indexOf("--scope"); const scope = scopeIdx >= 0 ? subArgs[scopeIdx + 1] : "user"; if (scopeIdx >= 0 && (!scope || scope.startsWith("-"))) { - throw new CliError("Missing value for --scope. Valid values: user, project, local"); + throw new CliError(`Missing value for --scope. Valid values: ${validScopes.join(", ")}`); } - if (scopeIdx >= 0 && !["user", "project", "local"].includes(scope)) { - throw new CliError(`Invalid scope: ${scope}. Valid values: user, project, local`); + if (scopeIdx >= 0 && !validScopes.includes(scope)) { + throw new CliError(`Invalid scope: ${scope}. Valid values for ${integ.displayName}: ${validScopes.join(", ")}`); } const customIdx = subArgs.includes("--custom") ? subArgs.indexOf("--custom") @@ -214,12 +242,13 @@ EXAMPLES const includeBeta = subArgs.includes("--beta"); // Collect positional policy names — args that don't start with - and aren't - // values consumed by --scope or --custom/-c (tracked by index, not value, - // so a policy named "user" isn't incorrectly dropped by the default scope). + // values consumed by --scope, --custom/-c, or --integration (tracked by index, + // not value, so a policy named "user" isn't incorrectly dropped). const consumedIdxs = new Set(); if (scopeIdx >= 0) consumedIdxs.add(scopeIdx + 1); if (customIdx >= 0) consumedIdxs.add(customIdx + 1); - const flags = new Set(["--install", "-i", "--scope", "--beta", "--custom", "-c"]); + if (integrationIdx >= 0) consumedIdxs.add(integrationIdx + 1); + const flags = new Set(["--install", "-i", "--scope", "--beta", "--custom", "-c", "--integration"]); const unknownInstallFlag = subArgs.find((a) => a.startsWith("-") && !flags.has(a)); if (unknownInstallFlag) { throw new CliError(`Unknown flag: ${unknownInstallFlag}\nRun \`failproofai policies --help\` for usage.`); @@ -244,20 +273,25 @@ EXAMPLES includeBeta, undefined, customPoliciesPath, + false, + integrationArg, ); process.exit(0); } if (isUninstall) { const { removeHooks } = await import("../src/hooks/manager"); + const { getIntegration } = await import("../src/hooks/integrations"); + const integ = getIntegration(integrationArg); + const validScopes = [...integ.scopes, "all"]; const scopeIdx = subArgs.indexOf("--scope"); const scope = scopeIdx >= 0 ? subArgs[scopeIdx + 1] : "user"; if (scopeIdx >= 0 && (!scope || scope.startsWith("-"))) { - throw new CliError("Missing value for --scope. Valid values: user, project, local, all"); + throw new CliError(`Missing value for --scope. Valid values: ${validScopes.join(", ")}`); } - if (scopeIdx >= 0 && !["user", "project", "local", "all"].includes(scope)) { - throw new CliError(`Invalid scope: ${scope}. Valid values: user, project, local, all`); + if (scopeIdx >= 0 && !validScopes.includes(scope)) { + throw new CliError(`Invalid scope: ${scope}. Valid values for ${integ.displayName}: ${validScopes.join(", ")}`); } const betaOnly = subArgs.includes("--beta"); @@ -265,7 +299,8 @@ EXAMPLES const consumedIdxs = new Set(); if (scopeIdx >= 0) consumedIdxs.add(scopeIdx + 1); - const flags = new Set(["--uninstall", "-u", "--scope", "--beta", "--custom", "-c"]); + if (integrationIdx >= 0) consumedIdxs.add(integrationIdx + 1); + const flags = new Set(["--uninstall", "-u", "--scope", "--beta", "--custom", "-c", "--integration"]); const unknownUninstallFlag = subArgs.find((a) => a.startsWith("-") && !flags.has(a)); if (unknownUninstallFlag) { throw new CliError(`Unknown flag: ${unknownUninstallFlag}\nRun \`failproofai policies --help\` for usage.`); @@ -279,7 +314,7 @@ EXAMPLES policyNames.length > 0 ? policyNames : undefined, scope, undefined, - { betaOnly, removeCustomHooks }, + { betaOnly, removeCustomHooks, integration: integrationArg }, ); process.exit(0); } @@ -287,7 +322,7 @@ EXAMPLES // Default: list policies // Accept --list as a no-op alias (common intuition), reject all other unknown flags // and unexpected positional args (e.g. "hi"). - const knownListFlags = new Set(["--install", "-i", "--uninstall", "-u", "--help", "-h", "--list"]); + const knownListFlags = new Set(["--install", "-i", "--uninstall", "-u", "--help", "-h", "--list", "--integration", "--scope"]); const unknownListArg = subArgs.find((a) => a.startsWith("-") && !knownListFlags.has(a)); if (unknownListArg) { throw new CliError( @@ -295,7 +330,9 @@ EXAMPLES `Run \`failproofai policies --help\` for usage.` ); } - const positionalArgs = subArgs.filter((a) => !a.startsWith("-")); + const listConsumedIdxs = new Set(); + if (integrationIdx >= 0) listConsumedIdxs.add(integrationIdx + 1); + const positionalArgs = subArgs.filter((a, idx) => !a.startsWith("-") && !listConsumedIdxs.has(idx)); if (positionalArgs.length > 0) { throw new CliError( `Unexpected argument: ${positionalArgs[0]}\n` + @@ -304,7 +341,7 @@ EXAMPLES } const { listHooks } = await import("../src/hooks/manager"); - await listHooks(); + await listHooks(undefined, integrationArg); process.exit(0); } diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index 0555220c..b93c6977 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -19,12 +19,35 @@ function isClaudeSettingsFile(resolved: string): boolean { return /[\\/]\.claude[\\/]settings(?:\.[^/\\]+)?\.json$/.test(resolved); } +function isBashTool(toolName: string | undefined): boolean { + if (!toolName) return true; // Assume shell if tool name is missing + const lower = toolName.toLowerCase(); + return ( + lower === "bash" || + lower === "shell" || + lower === "terminal" || + lower.includes("command") || + lower === "run_terminal_command" + ); +} + function getCommand(ctx: PolicyContext): string { - return (ctx.toolInput?.command as string) ?? ""; + return ( + (ctx.toolInput?.command as string) ?? + (ctx.toolInput?.cmd as string) ?? + (ctx.toolInput?.input as string) ?? + "" + ); } function getFilePath(ctx: PolicyContext): string { - return (ctx.toolInput?.file_path as string) ?? ""; + return ( + (ctx.toolInput?.file_path as string) ?? + (ctx.toolInput?.filePath as string) ?? + (ctx.toolInput?.path as string) ?? + (ctx.toolInput?.relative_path as string) ?? + "" + ); } /** @@ -361,7 +384,7 @@ function sanitizeBearerTokens(ctx: PolicyContext): PolicyResult { } function warnDestructiveSql(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (!SQL_TOOL_RE.test(cmd)) return allow(); @@ -397,7 +420,7 @@ function warnLargeFileWrite(ctx: PolicyContext): PolicyResult { } function warnPackagePublish(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (PUBLISH_CMD_RE.test(cmd)) { return instruct( @@ -408,7 +431,7 @@ function warnPackagePublish(ctx: PolicyContext): PolicyResult { } function protectEnvVars(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); // Block: env, printenv, echo $VAR, export VAR= if (ENV_PRINTENV_RE.test(cmd)) { @@ -448,14 +471,14 @@ function blockEnvFiles(ctx: PolicyContext): PolicyResult { return deny("Access to .env file blocked"); } // Check Bash commands referencing .env files - if (ctx.toolName === "Bash" && ENV_CMD_RE.test(cmd)) { + if (isBashTool(ctx.toolName) && ENV_CMD_RE.test(cmd)) { return deny("Command references .env file"); } return allow(); } function blockSudo(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx).trimStart(); if (SUDO_RE.test(cmd) || cmd.startsWith("sudo ")) { // Check allowPatterns — match against parsed tokens, not raw string @@ -475,7 +498,7 @@ function blockSudo(ctx: PolicyContext): PolicyResult { } function blockCurlPipeSh(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (CURL_PIPE_SH_RE.test(cmd)) { return deny("Piping downloads to shell is blocked"); @@ -496,7 +519,7 @@ function extractGitPushArgs(cmd: string): string[] { } function blockPushMaster(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const protectedBranches = ((ctx.params?.protectedBranches ?? ["main", "master"]) as string[]); if (protectedBranches.length === 0) return allow(); const args = extractGitPushArgs(getCommand(ctx)); @@ -552,7 +575,7 @@ function rmTargetIsAllowed(cmd: string, allowPaths: string[]): boolean { } function blockRmRf(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); const hasDestructivePath = parseArgvTokens(cmd).some((token) => { const normalized = token.replace(/\/\*$/, "").replace(/\/+$/, "") || (token.startsWith("/") ? "/" : ""); @@ -594,7 +617,7 @@ function blockRmRf(ctx: PolicyContext): PolicyResult { } function blockForcePush(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const args = extractGitPushArgs(getCommand(ctx)); if (args.some((a) => FORCE_PUSH_RE.test(a))) { return deny("Force-pushing is blocked"); @@ -684,7 +707,7 @@ function blockReadOutsideCwd(ctx: PolicyContext): PolicyResult { const allowPaths = ((ctx.params?.allowPaths ?? []) as string[]); // For Bash tool: check read-like commands for absolute paths outside cwd - if (ctx.toolName === "Bash") { + if (isBashTool(ctx.toolName)) { const cmd = getCommand(ctx); if (!READ_LIKE_CMDS.test(cmd)) return allow(); @@ -734,7 +757,7 @@ function blockReadOutsideCwd(ctx: PolicyContext): PolicyResult { } function blockWorkOnMain(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (!GIT_COMMIT_MERGE_RE.test(cmd)) return allow(); @@ -754,7 +777,7 @@ function blockWorkOnMain(ctx: PolicyContext): PolicyResult { } function blockFailproofaiCommands(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); // Block direct failproofai CLI invocations @@ -809,7 +832,7 @@ async function warnRepeatedToolCalls(ctx: PolicyContext): Promise } function warnGitAmend(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (GIT_AMEND_RE.test(cmd)) { return instruct( @@ -820,7 +843,7 @@ function warnGitAmend(ctx: PolicyContext): PolicyResult { } function warnGitStashDrop(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (GIT_STASH_DROP_RE.test(cmd)) { return instruct( @@ -831,7 +854,7 @@ function warnGitStashDrop(ctx: PolicyContext): PolicyResult { } function warnAllFilesStaged(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (GIT_ADD_ALL_RE.test(cmd)) { return instruct( @@ -842,7 +865,7 @@ function warnAllFilesStaged(ctx: PolicyContext): PolicyResult { } function warnSchemaAlteration(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); if (!SQL_TOOL_RE.test(cmd)) return allow(); if (SCHEMA_ALTER_RE.test(cmd)) { @@ -854,7 +877,7 @@ function warnSchemaAlteration(ctx: PolicyContext): PolicyResult { } function warnGlobalPackageInstall(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); const isGlobal = NPM_GLOBAL_RE.test(cmd) || @@ -941,7 +964,7 @@ function preferPackageManager(ctx: PolicyContext): PolicyResult { } function warnBackgroundProcess(ctx: PolicyContext): PolicyResult { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = getCommand(ctx); const isBackground = NOHUP_RE.test(cmd) || @@ -1314,7 +1337,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "protect-env-vars", description: "Prevent commands that read environment variables", fn: protectEnvVars, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Environment", }, @@ -1330,7 +1353,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-read-outside-cwd", description: "Block file reads outside the session working directory", fn: blockReadOutsideCwd, - match: { events: ["PreToolUse"], toolNames: ["Read", "Glob", "Grep", "Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Read", "Glob", "Grep", "Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Environment", params: { @@ -1345,7 +1368,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-sudo", description: "Block sudo commands", fn: blockSudo, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Dangerous Commands", params: { @@ -1360,7 +1383,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-curl-pipe-sh", description: "Block piping downloads to shell", fn: blockCurlPipeSh, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Dangerous Commands", }, @@ -1368,7 +1391,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-rm-rf", description: "Prevent catastrophic deletions", fn: blockRmRf, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Dangerous Commands", params: { @@ -1383,7 +1406,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-failproofai-commands", description: "Block failproofai CLI commands and uninstallation", fn: blockFailproofaiCommands, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Dangerous Commands", }, @@ -1406,7 +1429,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-push-master", description: "Block pushing to main/master", fn: blockPushMaster, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: true, category: "Git", params: { @@ -1421,7 +1444,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-force-push", description: "Prevent force-pushing to any branch", fn: blockForcePush, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", }, @@ -1429,7 +1452,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "block-work-on-main", description: "Block git commits and merges on main/master branch", fn: blockWorkOnMain, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", params: { @@ -1444,7 +1467,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-git-amend", description: "Warns before amending git commits, which rewrites history", fn: warnGitAmend, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", }, @@ -1452,7 +1475,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-git-stash-drop", description: "Warns before permanently deleting stashed changes", fn: warnGitStashDrop, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", }, @@ -1460,7 +1483,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-all-files-staged", description: "Warns before staging all working tree files with git add -A / . / --all", fn: warnAllFilesStaged, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Git", }, @@ -1468,7 +1491,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-destructive-sql", description: "Warn before executing destructive SQL (DROP/TRUNCATE/DELETE without WHERE) via database clients", fn: warnDestructiveSql, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Database", }, @@ -1476,7 +1499,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-schema-alteration", description: "Warns before SQL schema changes (ALTER TABLE with column or rename operations)", fn: warnSchemaAlteration, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Database", }, @@ -1484,7 +1507,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-package-publish", description: "Warn before publishing packages to public registries (npm, PyPI, crates.io, RubyGems, etc.)", fn: warnPackagePublish, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Packages & System", }, @@ -1492,7 +1515,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-global-package-install", description: "Warns before installing packages globally (npm -g, cargo install, etc.)", fn: warnGlobalPackageInstall, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Packages & System", }, @@ -1535,7 +1558,7 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ name: "warn-background-process", description: "Warns before starting detached or background processes", fn: warnBackgroundProcess, - match: { events: ["PreToolUse"], toolNames: ["Bash"] }, + match: { events: ["PreToolUse"], toolNames: ["Bash", "run_terminal_command", "Terminal"] }, defaultEnabled: false, category: "Packages & System", }, diff --git a/src/hooks/handler.ts b/src/hooks/handler.ts index 7d073e84..5509b301 100644 --- a/src/hooks/handler.ts +++ b/src/hooks/handler.ts @@ -5,7 +5,7 @@ * ~/.failproofai/policies-config.json, evaluates matching policies, persists * activity to disk, and returns the appropriate exit code + stdout response. */ -import type { HookEventType, SessionMetadata } from "./types"; +import type { HookEventType, SessionMetadata, IntegrationType } from "./types"; import type { PolicyFunction, PolicyResult } from "./policy-types"; import { readMergedHooksConfig } from "./hooks-config"; import { registerBuiltinPolicies } from "./builtin-policies"; @@ -21,7 +21,7 @@ import { hookLogInfo, hookLogWarn } from "./hook-logger"; export async function handleHookEvent(eventType: string): Promise { const startTime = performance.now(); - // Read stdin payload (Claude passes JSON) + // Read stdin payload (Claude/Cursor passes JSON) const MAX_STDIN_BYTES = 1_048_576; // 1 MB let payload = ""; try { @@ -40,14 +40,23 @@ export async function handleHookEvent(eventType: string): Promise { chunks.push(chunk); }); process.stdin.on("end", () => resolve(chunks.join(""))); + + // Handle the case where stdin is not a pipe or is empty + setTimeout(() => { + if (chunks.length === 0) resolve(""); + }, 100); + process.stdin.on("error", reject); - // If stdin is already closed or not piped, resolve immediately if (process.stdin.readableEnded) resolve(""); }); } catch { hookLogWarn(`stdin read failed for ${eventType}`); } + if (!payload) { + hookLogWarn(`stdin is empty for ${eventType} - Cursor Agent might not be piping context`); + } + let parsed: Record = {}; if (payload) { try { @@ -57,6 +66,28 @@ export async function handleHookEvent(eventType: string): Promise { } } + // Normalize Cursor payload: workspace_roots → cwd fallback + if (!parsed.cwd && Array.isArray(parsed.workspace_roots) && parsed.workspace_roots.length > 0) { + parsed.cwd = parsed.workspace_roots[0] as string; + } + + // Attempt to detect integration + let integration: IntegrationType = (parsed.integration as IntegrationType); + if (!integration) { + const hookName = (parsed.hook_event_name as string) || ""; + if ( + Array.isArray(parsed.workspace_roots) || + hookName.startsWith("before") || + hookName.startsWith("after") || + hookName === "preToolUse" || + hookName === "postToolUse" + ) { + integration = "cursor"; + } else { + integration = "claude-code"; + } + } + // Extract session metadata from payload const session: SessionMetadata = { sessionId: parsed.session_id as string | undefined, @@ -64,6 +95,7 @@ export async function handleHookEvent(eventType: string): Promise { cwd: parsed.cwd as string | undefined, permissionMode: parsed.permission_mode as string | undefined, hookEventName: parsed.hook_event_name as string | undefined, + integration, }; // Load enabled policies (merge across project/local/global scopes) diff --git a/src/hooks/hook-activity-store.ts b/src/hooks/hook-activity-store.ts index 98e64c43..724c9eca 100644 --- a/src/hooks/hook-activity-store.ts +++ b/src/hooks/hook-activity-store.ts @@ -52,6 +52,7 @@ export interface HookActivityEntry { cwd?: string; permissionMode?: string; hookEventName?: string; + integration?: string; } export interface HookActivityFilters { diff --git a/src/hooks/integrations.ts b/src/hooks/integrations.ts new file mode 100644 index 00000000..739d6e03 --- /dev/null +++ b/src/hooks/integrations.ts @@ -0,0 +1,369 @@ +/** + * Platform integration registry. + * + * Each integration describes how failproofai hooks are installed, detected, + * and formatted for a specific AI agent CLI (Claude Code, Cursor, etc.). + */ +import { execSync } from "node:child_process"; +import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; +import { resolve, dirname } from "node:path"; +import { homedir } from "node:os"; +import { + HOOK_EVENT_TYPES, + HOOK_SCOPES, + CURSOR_HOOK_EVENT_TYPES, + CURSOR_HOOK_SCOPES, + CURSOR_EVENT_MAP, + FAILPROOFAI_HOOK_MARKER, + type IntegrationType, + type CursorHookEventType, + type ClaudeSettings, + type ClaudeHookMatcher, + type CursorHooksFile, + type CursorHookEntry, +} from "./types"; + +// ── Integration interface ─────────────────────────────────────────────────── + +export interface Integration { + id: IntegrationType; + displayName: string; + scopes: readonly string[]; + eventTypes: readonly string[]; + hookMarker: string; + + /** Resolve the settings/hooks file path for a given scope. */ + getSettingsPath(scope: string, cwd?: string): string; + + /** Read the settings/hooks file, returning a default if it doesn't exist. */ + readSettings(settingsPath: string): Record; + + /** Write the settings/hooks file. */ + writeSettings(settingsPath: string, settings: Record): void; + + /** Build a single hook entry for this integration. */ + buildHookEntry(binaryPath: string, eventType: string): Record; + + /** Check whether a hook entry belongs to failproofai. */ + isFailproofaiHook(hook: Record): boolean; + + /** + * Write hook entries into the settings object for all supported event types. + * Mutates `settings` in place. + */ + writeHookEntries(settings: Record, binaryPath: string): void; + + /** + * Remove failproofai hook entries from a settings file. + * Returns the number of entries removed. + */ + removeHooksFromFile(settingsPath: string): number; + + /** Check whether failproofai hooks exist in a given scope. */ + hooksInstalledInSettings(scope: string, cwd?: string): boolean; + + /** Detect whether the platform CLI binary is installed. */ + detectInstalled(): boolean; + + /** Optional post-install step. */ + postInstall?(): void; +} + +// ── Helpers ───────────────────────────────────────────────────────────────── + +function readJsonFile(path: string): Record { + if (!existsSync(path)) return {}; + return JSON.parse(readFileSync(path, "utf8")) as Record; +} + +function writeJsonFile(path: string, data: Record): void { + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, JSON.stringify(data, null, 2) + "\n", "utf8"); +} + +function isMarkedHook(hook: Record): boolean { + if (hook[FAILPROOFAI_HOOK_MARKER] === true) return true; + const cmd = typeof hook.command === "string" ? hook.command : ""; + return cmd.includes("failproofai") && cmd.includes("--hook"); +} + +function binaryExists(name: string): boolean { + try { + const cmd = process.platform === "win32" ? `where ${name}` : `which ${name}`; + execSync(cmd, { encoding: "utf8", stdio: "pipe" }); + return true; + } catch { + return false; + } +} + +// ── Claude Code integration ───────────────────────────────────────────────── + +const claudeCode: Integration = { + id: "claude-code", + displayName: "Claude Code", + scopes: HOOK_SCOPES, + eventTypes: HOOK_EVENT_TYPES, + hookMarker: FAILPROOFAI_HOOK_MARKER, + + getSettingsPath(scope: string, cwd?: string): string { + const base = cwd ? resolve(cwd) : process.cwd(); + switch (scope) { + case "user": + return resolve(homedir(), ".claude", "settings.json"); + case "project": + return resolve(base, ".claude", "settings.json"); + case "local": + return resolve(base, ".claude", "settings.local.json"); + default: + return resolve(homedir(), ".claude", "settings.json"); + } + }, + + readSettings(settingsPath: string): Record { + return readJsonFile(settingsPath); + }, + + writeSettings(settingsPath: string, settings: Record): void { + writeJsonFile(settingsPath, settings); + }, + + buildHookEntry(binaryPath: string, eventType: string): Record { + return { + type: "command", + command: `"${binaryPath}" --hook ${eventType}`, + timeout: 60_000, + [FAILPROOFAI_HOOK_MARKER]: true, + }; + }, + + isFailproofaiHook: isMarkedHook, + + writeHookEntries(settings: Record, binaryPath: string): void { + const s = settings as ClaudeSettings; + if (!s.hooks) s.hooks = {}; + + for (const eventType of HOOK_EVENT_TYPES) { + const hookEntry = this.buildHookEntry(binaryPath, eventType); + + if (!s.hooks[eventType]) s.hooks[eventType] = []; + const matchers: ClaudeHookMatcher[] = s.hooks[eventType]; + + let found = false; + for (const matcher of matchers) { + if (!matcher.hooks) continue; + const idx = matcher.hooks.findIndex((h) => + this.isFailproofaiHook(h as Record), + ); + if (idx >= 0) { + matcher.hooks[idx] = hookEntry as any; + found = true; + break; + } + } + + if (!found) { + matchers.push({ hooks: [hookEntry as any] }); + } + } + }, + + removeHooksFromFile(settingsPath: string): number { + const settings = this.readSettings(settingsPath) as ClaudeSettings; + if (!settings.hooks) return 0; + + let removed = 0; + for (const eventType of Object.keys(settings.hooks)) { + const matchers = settings.hooks[eventType]; + if (!Array.isArray(matchers)) continue; + + for (let i = matchers.length - 1; i >= 0; i--) { + const matcher = matchers[i]; + if (!matcher.hooks) continue; + + const before = matcher.hooks.length; + matcher.hooks = matcher.hooks.filter( + (h) => !this.isFailproofaiHook(h as Record), + ); + removed += before - matcher.hooks.length; + + if (matcher.hooks.length === 0) matchers.splice(i, 1); + } + + if (matchers.length === 0) delete settings.hooks[eventType]; + } + + if (Object.keys(settings.hooks).length === 0) delete settings.hooks; + this.writeSettings(settingsPath, settings as Record); + return removed; + }, + + hooksInstalledInSettings(scope: string, cwd?: string): boolean { + const settingsPath = this.getSettingsPath(scope, cwd); + if (!existsSync(settingsPath)) return false; + try { + const settings = this.readSettings(settingsPath) as ClaudeSettings; + if (!settings.hooks) return false; + for (const matchers of Object.values(settings.hooks)) { + if (!Array.isArray(matchers)) continue; + for (const matcher of matchers) { + if (!matcher.hooks) continue; + if (matcher.hooks.some((h) => this.isFailproofaiHook(h as Record))) { + return true; + } + } + } + } catch { + // Corrupted settings — treat as not installed + } + return false; + }, + + detectInstalled(): boolean { + return binaryExists("claude"); + }, +}; + +// ── Cursor integration ────────────────────────────────────────────────────── + +const cursor: Integration = { + id: "cursor", + displayName: "Cursor", + scopes: CURSOR_HOOK_SCOPES, + eventTypes: CURSOR_HOOK_EVENT_TYPES as unknown as readonly string[], + hookMarker: FAILPROOFAI_HOOK_MARKER, + + getSettingsPath(scope: string, cwd?: string): string { + const base = cwd ? resolve(cwd) : process.cwd(); + switch (scope) { + case "user": + return resolve(homedir(), ".cursor", "hooks.json"); + case "project": + return resolve(base, ".cursor", "hooks.json"); + default: + return resolve(homedir(), ".cursor", "hooks.json"); + } + }, + + readSettings(settingsPath: string): Record { + if (!existsSync(settingsPath)) return { version: 1 }; + const raw = JSON.parse(readFileSync(settingsPath, "utf8")) as Record; + if (!raw.version) raw.version = 1; + return raw; + }, + + writeSettings(settingsPath: string, settings: Record): void { + if (!settings.version) settings.version = 1; + writeJsonFile(settingsPath, settings); + }, + + buildHookEntry(binaryPath: string, eventType: string): Record { + // eventType is the camelCase Cursor event name — map to PascalCase for --hook flag + const pascalEvent = CURSOR_EVENT_MAP[eventType as CursorHookEventType] ?? eventType; + // Use sh -lc to ensure node/bun is in PATH (especially for nvm/asdf users) + return { + command: `sh -lc '"${binaryPath}" --hook ${pascalEvent}'`, + timeout: 60, + }; + }, + + isFailproofaiHook(hook: Record): boolean { + // Cursor format doesn't support the marker field — rely on command string detection + const cmd = typeof hook.command === "string" ? hook.command : ""; + return cmd.includes("failproofai") && cmd.includes("--hook"); + }, + + writeHookEntries(settings: Record, binaryPath: string): void { + const s = settings as CursorHooksFile; + if (!s.hooks) s.hooks = {}; + + for (const eventType of CURSOR_HOOK_EVENT_TYPES) { + const hookEntry = this.buildHookEntry(binaryPath, eventType) as unknown as CursorHookEntry; + + if (!s.hooks[eventType]) s.hooks[eventType] = []; + const entries: CursorHookEntry[] = s.hooks[eventType]; + + // Find and replace existing failproofai hook, or append + const idx = entries.findIndex((h) => + this.isFailproofaiHook(h as unknown as Record), + ); + if (idx >= 0) { + entries[idx] = hookEntry; + } else { + entries.push(hookEntry); + } + } + }, + + removeHooksFromFile(settingsPath: string): number { + const settings = this.readSettings(settingsPath) as CursorHooksFile; + if (!settings.hooks) return 0; + + let removed = 0; + for (const eventType of Object.keys(settings.hooks)) { + const entries = settings.hooks[eventType]; + if (!Array.isArray(entries)) continue; + + const before = entries.length; + settings.hooks[eventType] = entries.filter( + (h) => !this.isFailproofaiHook(h as unknown as Record), + ); + removed += before - settings.hooks[eventType].length; + + if (settings.hooks[eventType].length === 0) delete settings.hooks[eventType]; + } + + if (Object.keys(settings.hooks).length === 0) delete settings.hooks; + this.writeSettings(settingsPath, settings as unknown as Record); + return removed; + }, + + hooksInstalledInSettings(scope: string, cwd?: string): boolean { + const settingsPath = this.getSettingsPath(scope, cwd); + if (!existsSync(settingsPath)) return false; + try { + const settings = this.readSettings(settingsPath) as CursorHooksFile; + if (!settings.hooks) return false; + for (const entries of Object.values(settings.hooks)) { + if (!Array.isArray(entries)) continue; + if (entries.some((h) => this.isFailproofaiHook(h as unknown as Record))) { + return true; + } + } + } catch { + // Corrupted config — treat as not installed + } + return false; + }, + + detectInstalled(): boolean { + return binaryExists("cursor"); + }, +}; + +// ── Registry ──────────────────────────────────────────────────────────────── + +export const INTEGRATIONS: Record = { + "claude-code": claudeCode, + "cursor": cursor, +}; + +export function getIntegration(id: IntegrationType): Integration { + const integration = INTEGRATIONS[id]; + if (!integration) { + throw new Error(`Unknown integration: ${id}`); + } + return integration; +} + +export function listIntegrations(): Integration[] { + return Object.values(INTEGRATIONS); +} + +export function listIntegrationIds(): IntegrationType[] { + return Object.keys(INTEGRATIONS) as IntegrationType[]; +} + +export function detectInstalledIntegrations(): Integration[] { + return Object.values(INTEGRATIONS).filter((i) => i.detectInstalled()); +} diff --git a/src/hooks/manager.ts b/src/hooks/manager.ts index 499c0e92..33c9c228 100644 --- a/src/hooks/manager.ts +++ b/src/hooks/manager.ts @@ -1,197 +1,145 @@ /** - * Install/remove/list failproofai hooks in Claude Code's settings. + * Install/remove/list failproofai hooks in Claude Code or Cursor settings. */ import { execSync } from "node:child_process"; import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; import { resolve, dirname, basename } from "node:path"; import { homedir, platform, arch, release, hostname } from "node:os"; import { - HOOK_EVENT_TYPES, - HOOK_SCOPES, - FAILPROOFAI_HOOK_MARKER, type HookScope, - type ClaudeHookEntry, - type ClaudeHookMatcher, - type ClaudeSettings, + type IntegrationType, } from "./types"; import { promptPolicySelection } from "./install-prompt"; -import { readMergedHooksConfig, readScopedHooksConfig, writeScopedHooksConfig } from "./hooks-config"; +import { + readMergedHooksConfig, + readScopedHooksConfig, + writeScopedHooksConfig, + getConfigPathForScope, +} from "./hooks-config"; import type { HooksConfig } from "./policy-types"; import { BUILTIN_POLICIES } from "./builtin-policies"; import { loadCustomHooks, discoverPolicyFiles } from "./custom-hooks-loader"; import { trackHookEvent } from "./hook-telemetry"; import { getInstanceId, hashToId } from "../../lib/telemetry-id"; import { CliError } from "../cli-error"; +import { getIntegration, type Integration } from "./integrations"; const VALID_POLICY_NAMES = new Set(BUILTIN_POLICIES.map((p) => p.name)); -export function getSettingsPath(scope: HookScope, cwd?: string): string { - const base = cwd ? resolve(cwd) : process.cwd(); - switch (scope) { - case "user": - return resolve(homedir(), ".claude", "settings.json"); - case "project": - return resolve(base, ".claude", "settings.json"); - case "local": - return resolve(base, ".claude", "settings.local.json"); - } +export function getSettingsPath( + scope: HookScope | "repo", + cwd?: string, + integration: IntegrationType = "claude-code", +): string { + return getIntegration(integration).getSettingsPath(scope as any, cwd); } -function scopeLabel(scope: HookScope): string { - switch (scope) { - case "user": - return `~/.claude/settings.json`; - case "project": - return `{cwd}/.claude/settings.json`; - case "local": - return `{cwd}/.claude/settings.local.json`; - } +export function hooksInstalledInSettings( + scope: HookScope | "repo", + cwd?: string, + integration: IntegrationType = "claude-code", +): boolean { + return getIntegration(integration).hooksInstalledInSettings(scope as any, cwd); } -function readSettings(settingsPath: string): ClaudeSettings { - if (!existsSync(settingsPath)) { - return {}; +/** + * Resolve the path to the failproofai binary. + */ +function resolveFailproofaiBinary(): string { + // Use FAILPROOFAI_DIST_PATH if provided (for development/testing) + if (process.env.FAILPROOFAI_DIST_PATH) { + const distBin = resolve(process.env.FAILPROOFAI_DIST_PATH, "bin", "failproofai.mjs"); + if (existsSync(distBin)) return distBin; + + const distCli = resolve(process.env.FAILPROOFAI_DIST_PATH, "cli.mjs"); + if (existsSync(distCli)) return distCli; + + const rootBin = resolve(process.env.FAILPROOFAI_DIST_PATH, "..", "bin", "failproofai.mjs"); + if (existsSync(rootBin)) return rootBin; } - const raw = readFileSync(settingsPath, "utf8"); - return JSON.parse(raw) as ClaudeSettings; -} + // Try finding it relative to this file (in dist or src) + const relativeDist = resolve(__dirname, "..", "cli.mjs"); + if (existsSync(relativeDist)) return relativeDist; -function writeSettings(settingsPath: string, settings: ClaudeSettings): void { - mkdirSync(dirname(settingsPath), { recursive: true }); - writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + "\n", "utf8"); -} + const relativeSrc = resolve(__dirname, "..", "..", "bin", "failproofai.mjs"); + if (existsSync(relativeSrc)) return relativeSrc; -function resolveFailproofaiBinary(): string { - try { - const cmd = process.platform === "win32" ? "where failproofai" : "which failproofai"; - const result = execSync(cmd, { encoding: "utf8" }).trim(); - // `where` on Windows may return multiple lines; take the first - return result.split("\n")[0].trim(); - } catch { - throw new CliError( - "failproofai binary not found in PATH.\n" + - "Install it globally first: npm install -g failproofai" - ); - } + // Fallback to global bun bin (typical for users) + return resolve(homedir(), ".bun", "bin", "failproofai"); } -function isFailproofaiHook(hook: Record): boolean { - if (hook[FAILPROOFAI_HOOK_MARKER] === true) return true; - // Fallback for legacy installs that predate the marker - const cmd = typeof hook.command === "string" ? hook.command : ""; - return cmd.includes("failproofai") && cmd.includes("--hook"); +function scopeLabel(integration: Integration, scope: string, cwd?: string): string { + const settingsPath = integration.getSettingsPath(scope as any, cwd); + const homeDir = homedir(); + const baseDir = cwd ? resolve(cwd) : process.cwd(); + + if (settingsPath.startsWith(`${homeDir}/`)) { + return `~/${settingsPath.slice(homeDir.length + 1)}`; + } + if (settingsPath.startsWith(`${baseDir}/`)) { + return `{cwd}/${settingsPath.slice(baseDir.length + 1)}`; + } + return settingsPath; } -function validatePolicyNames(names: string[]): void { - const invalid = names.filter((n) => !VALID_POLICY_NAMES.has(n)); - if (invalid.length > 0) { - const validList = [...VALID_POLICY_NAMES].join(", "); +function assertSupportedScope(integration: Integration, scope: string): void { + if (!integration.scopes.includes(scope)) { throw new CliError( - `Unknown policy name(s): ${invalid.join(", ")}\n` + - `Valid policies: ${validList}` + `Scope "${scope}" is not supported for ${integration.displayName}. ` + + `Supported scopes: ${integration.scopes.join(", ")}`, ); } } /** Return only scopes whose settings paths are unique (first wins). */ -function deduplicateScopes(scopes: readonly HookScope[], cwd?: string): HookScope[] { - const seen = new Set(); - return scopes.filter((s) => { - const p = getSettingsPath(s, cwd); - if (seen.has(p)) return false; - seen.add(p); - return true; - }); -} - -export function hooksInstalledInSettings(scope: HookScope, cwd?: string): boolean { - const settingsPath = getSettingsPath(scope, cwd); - if (!existsSync(settingsPath)) return false; - try { - const settings = readSettings(settingsPath); - if (!settings.hooks) return false; - for (const matchers of Object.values(settings.hooks)) { - if (!Array.isArray(matchers)) continue; - for (const matcher of matchers) { - if (!matcher.hooks) continue; - if (matcher.hooks.some((h) => isFailproofaiHook(h as Record))) { - return true; - } - } +function deduplicateScopes( + integration: Integration, + scopes: readonly string[], + cwd?: string, +): string[] { + const paths = new Set(); + const result: string[] = []; + for (const s of scopes) { + const p = integration.getSettingsPath(s as any, cwd); + if (!paths.has(p)) { + paths.add(p); + result.push(s); } - } catch { - // Corrupted settings — treat as not installed } - return false; + return result; } - -function removeHooksFromSettingsFile(settingsPath: string): number { - const settings = readSettings(settingsPath); - - if (!settings.hooks) return 0; - - let removed = 0; - - for (const eventType of Object.keys(settings.hooks)) { - const matchers = settings.hooks[eventType]; - if (!Array.isArray(matchers)) continue; - - for (let i = matchers.length - 1; i >= 0; i--) { - const matcher = matchers[i]; - if (!matcher.hooks) continue; - - const before = matcher.hooks.length; - matcher.hooks = matcher.hooks.filter( - (h) => !isFailproofaiHook(h as Record) - ); - removed += before - matcher.hooks.length; - - // Remove empty matchers - if (matcher.hooks.length === 0) { - matchers.splice(i, 1); - } - } - - // Remove empty event type arrays - if (matchers.length === 0) { - delete settings.hooks[eventType]; - } - } - - // Remove empty hooks object - if (Object.keys(settings.hooks).length === 0) { - delete settings.hooks; +function validatePolicyNames(names: string[]): void { + const unknown = names.filter((n) => !VALID_POLICY_NAMES.has(n)); + if (unknown.length > 0) { + const list = [...VALID_POLICY_NAMES].sort().join(", "); + throw new CliError(`Unknown policy name(s): ${unknown.join(", ")}\nValid policies: ${list}`); } - - writeSettings(settingsPath, settings); - return removed; } -/** - * Install hooks into Claude Code settings. - * - * @param policyNames — if provided, skip interactive prompt: - * - `["all"]` → enable all policies - * - `["block-sudo", "block-rm-rf"]` → enable specific policies - * - `undefined` → interactive prompt (pre-loads current config if exists) - * @param scope — settings scope to write to (default: "user") - */ export async function installHooks( policyNames?: string[], - scope: HookScope = "user", + scope: HookScope | "repo" = "user", cwd?: string, includeBeta = false, source?: string, customPoliciesPath?: string, removeCustomHooks = false, + integration: IntegrationType = "claude-code", ): Promise { + const integ = getIntegration(integration); + assertSupportedScope(integ, scope); + + const binaryPath = resolveFailproofaiBinary(); + + // Capture existing config before overwriting (used for telemetry diff) + const previousConfig = readScopedHooksConfig(scope as HookScope, cwd); + const previousEnabled = new Set(previousConfig.enabledPolicies); + // Validate user input first before any system checks if (policyNames !== undefined && policyNames.length > 0) { const nonAllNames = policyNames.filter((n) => n !== "all"); - // Check unknown names first (most actionable error for the user) if (nonAllNames.length > 0) validatePolicyNames(nonAllNames); - // Then check if "all" is mixed with valid specific names if (policyNames.includes("all") && nonAllNames.length > 0) { throw new CliError( `"all" cannot be combined with specific policy names.\n` + @@ -200,16 +148,10 @@ export async function installHooks( } } - const binaryPath = resolveFailproofaiBinary(); - - // Capture existing config before overwriting (used for telemetry diff) - const previousConfig = readScopedHooksConfig(scope, cwd); - const previousEnabled = new Set(previousConfig.enabledPolicies); - let selectedPolicies: string[]; if (policyNames !== undefined) { - // Non-interactive path: explicit array was provided (may be empty) + // Non-interactive path let incoming: string[]; if (policyNames.length === 1 && policyNames[0] === "all") { incoming = BUILTIN_POLICIES @@ -218,10 +160,10 @@ export async function installHooks( } else { incoming = policyNames; } - // Additive: union with whatever was already enabled, deduplicated. + // Additive selectedPolicies = [...new Set([...previousConfig.enabledPolicies, ...incoming])]; } else { - // Interactive — pre-load current config if it exists + // Interactive const preSelected = previousConfig.enabledPolicies.length > 0 ? previousConfig.enabledPolicies : undefined; selectedPolicies = await promptPolicySelection(preSelected, { includeBeta }); } @@ -241,26 +183,20 @@ export async function installHooks( process.exit(1); } if (validatedHooks.length === 0) { - console.error( - `Error: no hooks registered in ${customPoliciesPath}. ` + - `Make sure your file calls customPolicies.add(...) at least once.`, - ); + console.error(`Error: no hooks registered in ${customPoliciesPath}.`); process.exit(1); } - console.log( - `\nValidated ${validatedHooks.length} custom hook(s): ${validatedHooks.map((h) => h.name).join(", ")}`, - ); + console.log(`\nValidated ${validatedHooks.length} custom hook(s): ${validatedHooks.map((h) => h.name).join(", ")}`); } - writeScopedHooksConfig(configToWrite, scope, cwd); + + writeScopedHooksConfig(configToWrite, scope as HookScope, cwd); console.log(`\nEnabled ${selectedPolicies.length} policy(ies): ${selectedPolicies.join(", ")}`); - if (removeCustomHooks) { - console.log("Custom hooks path cleared."); - } else if (configToWrite.customPoliciesPath) { - console.log(`Custom hooks path: ${configToWrite.customPoliciesPath}`); - } - const settingsPath = getSettingsPath(scope, cwd); - const settings = readSettings(settingsPath); + const settingsPath = integ.getSettingsPath(scope as any, cwd); + const settings = integ.readSettings(settingsPath); + integ.writeHookEntries(settings, binaryPath); + integ.writeSettings(settingsPath, settings); + integ.postInstall?.(); if (!settings.hooks) { settings.hooks = {}; @@ -313,6 +249,7 @@ export async function installHooks( const distinctId = getInstanceId(); await trackHookEvent(distinctId, "hooks_installed", { scope, + integration, policies: selectedPolicies, policy_count: selectedPolicies.length, policies_added: policiesAdded, @@ -327,11 +264,9 @@ export async function installHooks( param_policy_names: configToWrite.policyParams ? Object.keys(configToWrite.policyParams) : [], command_format: scope === "project" ? "npx" : "absolute", }); - } catch { - // Telemetry is best-effort — never block the operation - } + } catch { /* best effort */ } - console.log(`Failproof AI hooks installed for all ${HOOK_EVENT_TYPES.length} event types (scope: ${scope}).`); + console.log(`Failproof AI hooks installed for all ${integ.eventTypes.length} event types (scope: ${scope}).`); console.log(`Settings: ${settingsPath}`); if (scope === "project") { console.log(`Command: npx -y failproofai`); @@ -341,10 +276,10 @@ export async function installHooks( } // Warn about duplicate-scope installations - const otherScopes = deduplicateScopes(HOOK_SCOPES, cwd).filter((s) => s !== scope); - const duplicates = otherScopes.filter((s) => hooksInstalledInSettings(s, cwd)); + const otherScopes = deduplicateScopes(integ, integ.scopes, cwd).filter((s) => s !== scope); + const duplicates = otherScopes.filter((s) => integ.hooksInstalledInSettings(s as any, cwd)); if (duplicates.length > 0) { - const scopeList = duplicates.map((s) => `${s} (${scopeLabel(s)})`).join(", "); + const scopeList = duplicates.map((s) => `${s} (${scopeLabel(integ, s, cwd)})`).join(", "); console.log(); console.log(`\x1B[33mWarning: Failproof AI hooks are also installed at ${scopeList}.\x1B[0m`); console.log(`Having hooks in multiple scopes may cause duplicate policy evaluation.`); @@ -353,18 +288,15 @@ export async function installHooks( } } -/** - * Remove hooks from Claude Code settings. - * - * @param policyNames — if provided: - * - `undefined` or `["all"]` → remove all failproofai hooks from settings (original behavior) - * - `["block-sudo"]` → disable specific policies in config, keep hooks installed - * @param scope — settings scope to remove from (default: "user"), or "all" to remove from all scopes - * @param opts.betaOnly — set to true when removing only beta policies (adds beta_only flag to telemetry) - */ -export async function removeHooks(policyNames?: string[], scope: HookScope | "all" = "user", cwd?: string, opts?: { betaOnly?: boolean; source?: string; removeCustomHooks?: boolean }): Promise { - // Resolve the effective config scope ("all" falls back to "user" for config reads/writes) - const configScope: HookScope = scope === "all" ? "user" : scope; +export async function removeHooks( + policyNames?: string[], + scope: HookScope | "repo" | "all" = "user", + cwd?: string, + opts?: { betaOnly?: boolean; source?: string; removeCustomHooks?: boolean; integration?: IntegrationType }, + integration: IntegrationType = "claude-code", +): Promise { + const integ = getIntegration(opts?.integration ?? integration); + const configScope: HookScope = scope === "all" ? "user" : (scope as HookScope); // Clear custom hooks path if requested if (opts?.removeCustomHooks) { @@ -395,12 +327,13 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al }; writeScopedHooksConfig(updatedConfig, configScope, cwd); - // Telemetry: track policy-only removal from config + // Telemetry try { const distinctId = getInstanceId(); const actuallyRemoved = policyNames.filter((p) => config.enabledPolicies.includes(p)); await trackHookEvent(distinctId, "hooks_removed", { scope, + integration: integ.id, removal_mode: opts?.betaOnly ? "beta_policies" : "policies", beta_only: opts?.betaOnly ?? false, policies_removed: actuallyRemoved, @@ -411,9 +344,7 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al os_release: release(), hostname_hash: hashToId(hostname()), }); - } catch { - // Telemetry is best-effort — never block the operation - } + } catch { /* best effort */ } console.log(`Disabled ${policyNames.length - notEnabled.length} policy(ies).`); console.log(`Remaining: ${remaining.length > 0 ? remaining.join(", ") : "(none)"}`); @@ -423,32 +354,19 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al // Capture enabled policies before clearing (used for accurate telemetry below) const configBeforeRemoval = readScopedHooksConfig(configScope, cwd); - // Remove all failproofai hooks from Claude Code settings - const scopesToRemove: HookScope[] = scope === "all" ? [...HOOK_SCOPES] : [scope]; + if (scope !== "all") { + assertSupportedScope(integ, scope); + } + + // Remove all failproofai hooks from the selected integration's settings + const scopesToRemove = scope === "all" ? [...integ.scopes] : [scope]; let totalRemoved = 0; for (const s of scopesToRemove) { - const settingsPath = getSettingsPath(s, cwd); + const settingsPath = integ.getSettingsPath(s as any, cwd); + if (!existsSync(settingsPath)) continue; - if (!existsSync(settingsPath)) { - if (scope !== "all") { - console.log("No settings file found. Nothing to remove."); - return; - } - continue; - } - - const settings = readSettings(settingsPath); - - if (!settings.hooks) { - if (scope !== "all") { - console.log("No hooks found in settings. Nothing to remove."); - return; - } - continue; - } - - const removed = removeHooksFromSettingsFile(settingsPath); + const removed = integ.removeHooksFromFile(settingsPath); totalRemoved += removed; if (scope !== "all") { @@ -459,16 +377,14 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al if (scope === "all") { console.log(`Removed ${totalRemoved} failproofai hook(s) from all scopes.`); - for (const s of scopesToRemove) { - console.log(` ${s}: ${getSettingsPath(s, cwd)}`); - } } - // Telemetry: track full hook removal from settings + // Telemetry try { const distinctId = getInstanceId(); await trackHookEvent(distinctId, "hooks_removed", { scope, + integration: integ.id, removal_mode: "hooks", policies_removed: configBeforeRemoval.enabledPolicies, removed_count: totalRemoved, @@ -478,58 +394,40 @@ export async function removeHooks(policyNames?: string[], scope: HookScope | "al os_release: release(), hostname_hash: hashToId(hostname()), }); - } catch { - // Telemetry is best-effort — never block the operation - } + } catch { /* best effort */ } // Clear policy config when removing from all scopes, or when no hooks remain in any scope if (scope === "all") { - // Clear config across all three scopes - for (const s of HOOK_SCOPES) { - const existing = readScopedHooksConfig(s, cwd); - if (existing.enabledPolicies.length > 0 || existing.customPoliciesPath || existing.policyParams) { - const { customPoliciesPath: _drop, policyParams: _dropParams, ...rest } = existing; - writeScopedHooksConfig({ ...rest, enabledPolicies: [] }, s, cwd); + for (const s of integ.scopes) { + if (s === "repo") continue; + const existing = readScopedHooksConfig(s as HookScope, cwd); + if (existing.enabledPolicies.length > 0) { + writeScopedHooksConfig({ ...existing, enabledPolicies: [] }, s as HookScope, cwd); } } - } else if (!HOOK_SCOPES.some((s) => hooksInstalledInSettings(s, cwd))) { - const existing = readScopedHooksConfig(configScope, cwd); - const { customPoliciesPath: _drop, policyParams: _dropParams, ...rest } = existing; - writeScopedHooksConfig({ ...rest, enabledPolicies: [] }, configScope, cwd); + } else if (!integ.scopes.some((s) => integ.hooksInstalledInSettings(s as any, cwd))) { + writeScopedHooksConfig({ ...configBeforeRemoval, enabledPolicies: [] }, configScope, cwd); } } -/** - * List all available policies with their per-scope enabled status. - * Layout adapts to the number of installed scopes: - * 0 scopes: compact "not installed" summary - * 1 scope: table with header + checkmarks, beta policies in a separate section - * 2+ scopes: column table with per-scope status, beta policies in a separate section - * - * Also shows: - * - Configured policyParams values beneath each policy - * - Warnings for unknown policyParams keys - * - Custom Hooks section if customPoliciesPath is set - */ -export async function listHooks(cwd?: string): Promise { +export async function listHooks( + cwd?: string, + integration: IntegrationType = "claude-code", +): Promise { + const integ = getIntegration(integration); + // Multi-scope config is merged for listing const config = readMergedHooksConfig(cwd); const enabledSet = new Set(config.enabledPolicies); - // Determine which scopes have hooks installed (deduplicate when paths overlap, e.g. cwd === home) - const uniqueScopes = deduplicateScopes(HOOK_SCOPES, cwd); - const installedScopes = uniqueScopes.filter((s) => hooksInstalledInSettings(s, cwd)); + const uniqueScopes = deduplicateScopes(integ, integ.scopes, cwd); + const installedScopes = uniqueScopes.filter((s) => integ.hooksInstalledInSettings(s as any, cwd)); - // Separate beta from regular policies const regularPolicies = BUILTIN_POLICIES.filter((p) => !p.beta); const betaPolicies = BUILTIN_POLICIES.filter((p) => p.beta); - // Dynamic name column width based on longest policy name const nameColWidth = Math.max(...BUILTIN_POLICIES.map((p) => p.name.length)) + 2; - - // All known builtin policy names (for unknown policyParams key detection) const builtinPolicyNames = new Set(BUILTIN_POLICIES.map((p) => p.name)); - // Helper: print params summary lines beneath a policy row const printParamsSummary = (policyName: string, indent: string) => { const params = config.policyParams?.[policyName]; if (!params) return; @@ -538,99 +436,82 @@ export async function listHooks(cwd?: string): Promise { } }; - const statusCol = 8; - const printSimpleRow = (policy: { name: string; description: string }) => { - const mark = enabledSet.has(policy.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - console.log(` ${mark}${" ".repeat(statusCol - 1)}${policy.name.padEnd(nameColWidth)}${policy.description}`); - printParamsSummary(policy.name, ` ${" ".repeat(statusCol)}`); - }; - const printBetaSection = (printRow: (p: { name: string; description: string }) => void) => { - if (betaPolicies.length > 0) { - console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); - for (const policy of betaPolicies) printRow(policy); - } - }; + const statusCol = installedScopes.length > 1 ? installedScopes.length * 9 : 8; if (installedScopes.length === 0) { - // State A: No hooks installed — show table with configured state + descriptions - console.log("\nFailproof AI Policies \u2014 not installed\n"); - - console.log(` ${"Status".padEnd(statusCol)}${"Name".padEnd(nameColWidth)}Description`); + console.log(`\nFailproof AI Policies \u2014 not installed (${integ.displayName})\n`); + console.log(` ${"Status".padEnd(8)}${"Name".padEnd(nameColWidth)}Description`); console.log(` ${"\u2500".repeat(6)} ${"\u2500".repeat(nameColWidth - 2)} ${"\u2500".repeat(38)}`); - for (const policy of regularPolicies) printSimpleRow(policy); - printBetaSection(printSimpleRow); + for (const p of regularPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); + printParamsSummary(p.name, " "); + } - if (config.enabledPolicies.length > 0) { - console.log("\n Policies not installed. Run `failproofai policies --install` to activate."); - } else { - console.log("\n Run `failproofai policies --install` to get started."); + if (betaPolicies.length > 0) { + console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); + for (const p of betaPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); + printParamsSummary(p.name, " "); + } } - console.log(" Config: ~/.failproofai/policies-config.json\n"); + console.log("\n Run `failproofai policies --install` to get started."); } else if (installedScopes.length === 1) { - // State B: Single scope — table with header row const scope = installedScopes[0]; console.log(`\nFailproof AI Hook Policies (${scope})\n`); - - console.log(` ${"Status".padEnd(statusCol)}${"Name".padEnd(nameColWidth)}Description`); + console.log(` ${"Status".padEnd(8)}${"Name".padEnd(nameColWidth)}Description`); console.log(` ${"\u2500".repeat(6)} ${"\u2500".repeat(nameColWidth - 2)} ${"\u2500".repeat(38)}`); - for (const policy of regularPolicies) printSimpleRow(policy); - printBetaSection(printSimpleRow); - - console.log("\n Config: ~/.failproofai/policies-config.json\n"); + for (const p of regularPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); + printParamsSummary(p.name, " "); + } + if (betaPolicies.length > 0) { + console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); + for (const p of betaPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); + printParamsSummary(p.name, " "); + } + } } else { - // State C: Multiple scopes — column table const COL = 9; - const scopeLabelMap: Record = { - user: "User", - project: "Project", - local: "Local", - }; - - console.log("\nFailproof AI Hook Policies\n"); + const formatScopeName = (s: string) => `${s[0].toUpperCase()}${s.slice(1)}`; + console.log(`\nFailproof AI Hook Policies (${integ.displayName})\n`); - // Header with only installed scope columns + separator - const buildScopePrefix = () => { - let s = " "; - for (const sc of installedScopes) s += scopeLabelMap[sc].padEnd(COL); - return s; - }; - const scopeHeaderWidth = installedScopes.length * COL; - console.log(`${buildScopePrefix()}${"Name".padEnd(nameColWidth)}Description`); - console.log(` ${"\u2500".repeat(scopeHeaderWidth)}${"\u2500".repeat(nameColWidth)}${"\u2500".repeat(38)}`); + let header = " "; + for (const s of installedScopes) header += formatScopeName(s).padEnd(COL); + header += "Name".padEnd(nameColWidth) + "Description"; + console.log(header); + console.log(` ${"\u2500".repeat(installedScopes.length * COL)}${"\u2500".repeat(nameColWidth)}${"\u2500".repeat(38)}`); - const printMultiScopeRow = (policy: { name: string; description: string }) => { - const enabled = enabledSet.has(policy.name); + const printRow = (p: { name: string; description: string }) => { let row = " "; - for (const _scope of installedScopes) { - if (enabled) { - row += `\x1B[32m\u2713 ON\x1B[0m` + " ".repeat(COL - 4); - } else { - row += " OFF" + " ".repeat(COL - 5); - } + const enabled = enabledSet.has(p.name); + for (const _s of installedScopes) { + row += enabled ? `\x1B[32m\u2713 ON\x1B[0m`.padEnd(COL + 9) : ` OFF`.padEnd(COL); } - row += policy.name.padEnd(nameColWidth) + policy.description; + row += p.name.padEnd(nameColWidth) + p.description; console.log(row); - printParamsSummary(policy.name, ` ${" ".repeat(scopeHeaderWidth)}`); + printParamsSummary(p.name, " ".repeat(2 + installedScopes.length * COL)); }; - for (const policy of regularPolicies) printMultiScopeRow(policy); - + for (const p of regularPolicies) printRow(p); if (betaPolicies.length > 0) { console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); - for (const policy of betaPolicies) printMultiScopeRow(policy); + for (const p of betaPolicies) printRow(p); } - - console.log("\n Config: ~/.failproofai/policies-config.json"); - - // Multi-scope warning - const scopeNames = installedScopes.join(", "); - console.log(); - console.log(`\x1B[33m\u26A0 Hooks in multiple scopes (${scopeNames}).\x1B[0m`); - console.log(" Consider keeping one. Remove with: failproofai policies --uninstall --scope \n"); } + // Config path hint + const primaryScope = installedScopes.length > 0 ? installedScopes[0] : "user"; + const configPath = getConfigPathForScope(primaryScope as HookScope, cwd); + console.log(`\n Settings: ${integ.getSettingsPath(primaryScope as any, cwd)}`); + console.log(` Config: ${configPath}\n`); + // Warn about unknown policyParams keys if (config.policyParams) { for (const key of Object.keys(config.policyParams)) { @@ -640,7 +521,6 @@ export async function listHooks(cwd?: string): Promise { } } - // Custom Policies section if (config.customPoliciesPath) { console.log(`\n \u2500\u2500 Custom Policies (${config.customPoliciesPath}) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`); if (!existsSync(config.customPoliciesPath)) { @@ -650,9 +530,8 @@ export async function listHooks(cwd?: string): Promise { if (hooks.length === 0) { console.log(` \x1B[31m\u2717 ERR failed to load (check ~/.failproofai/logs/hooks.log)\x1B[0m`); } else { - const descColWidth = nameColWidth; - for (const hook of hooks) { - console.log(` \x1B[32m\u2713\x1B[0m ${hook.name.padEnd(descColWidth)}${hook.description ?? ""}`); + for (const h of hooks) { + console.log(` \x1B[32m\u2713\x1B[0m ${h.name.padEnd(nameColWidth)}${h.description ?? ""}`); } } } diff --git a/src/hooks/policy-evaluator.ts b/src/hooks/policy-evaluator.ts index 893862ad..8eff82ed 100644 --- a/src/hooks/policy-evaluator.ts +++ b/src/hooks/policy-evaluator.ts @@ -45,7 +45,14 @@ export async function evaluatePolicies( hookLogInfo(`evaluating ${policies.length} policies for ${eventType}`); if (policies.length === 0) { - return { exitCode: 0, stdout: "", stderr: "", policyName: null, reason: null, decision: "allow" }; + return { + exitCode: 0, + stdout: session?.integration === "cursor" ? JSON.stringify({ continue: true, permission: "allow" }) : "", + stderr: "", + policyName: null, + reason: null, + decision: "allow", + }; } const baseCtx: PolicyContext = { @@ -96,13 +103,19 @@ export async function evaluatePolicies( const displayTool = ctx.toolName ?? "unknown tool"; if (eventType === "PreToolUse") { - const response = { + const response: any = { hookSpecificOutput: { hookEventName: eventType, permissionDecision: "deny", permissionDecisionReason: `Blocked ${displayTool} by failproofai because: ${reason}, as per the policy configured by the user`, }, }; + if (session?.integration === "cursor") { + response.continue = false; + response.permission = "deny"; + response.userMessage = response.hookSpecificOutput.permissionDecisionReason; + response.agentMessage = `Action blocked by security policy: ${reason}`; + } return { exitCode: 0, stdout: JSON.stringify(response), @@ -114,12 +127,15 @@ export async function evaluatePolicies( } if (eventType === "PostToolUse") { - const response = { + const response: any = { hookSpecificOutput: { hookEventName: eventType, additionalContext: `Blocked ${displayTool} by failproofai because: ${reason}, as per the policy configured by the user`, }, }; + if (session?.integration === "cursor") { + response.agentMessage = response.hookSpecificOutput.additionalContext; + } return { exitCode: 0, stdout: JSON.stringify(response), @@ -143,8 +159,8 @@ export async function evaluatePolicies( // Other event types: exit 2 return { - exitCode: 2, - stdout: "", + exitCode: session?.integration === "cursor" ? 0 : 2, + stdout: session?.integration === "cursor" ? JSON.stringify({ continue: false, permission: "deny", userMessage: reason }) : "", stderr: reason, policyName: policy.name, reason, @@ -190,12 +206,15 @@ export async function evaluatePolicies( }; } - const response = { + const response: any = { hookSpecificOutput: { hookEventName: eventType, additionalContext: `Instruction from failproofai: ${combined}`, }, }; + if (session?.integration === "cursor") { + response.agentMessage = response.hookSpecificOutput.additionalContext; + } return { exitCode: 0, stdout: JSON.stringify(response), @@ -212,13 +231,23 @@ export async function evaluatePolicies( const combined = allowEntries.map((e) => e.reason).join("\n"); const policyNames = allowEntries.map((e) => e.policyName); const supportsHookSpecificOutput = eventType === "PreToolUse" || eventType === "PostToolUse" || eventType === "UserPromptSubmit"; - const response = supportsHookSpecificOutput + const response: any = supportsHookSpecificOutput ? { hookSpecificOutput: { hookEventName: eventType, additionalContext: `Note from failproofai: ${combined}` } } : { reason: combined }; + if (session?.integration === "cursor" && supportsHookSpecificOutput) { + response.agentMessage = response.hookSpecificOutput.additionalContext; + } const stderrMsg = allowEntries .map((e) => `[failproofai] ${e.policyName}: ${e.reason}`) .join("\n"); return { exitCode: 0, stdout: JSON.stringify(response), stderr: stderrMsg + "\n", policyName: policyNames[0], policyNames, reason: combined, decision: "allow" }; } - return { exitCode: 0, stdout: "", stderr: "", policyName: null, reason: null, decision: "allow" }; + return { + exitCode: 0, + stdout: session?.integration === "cursor" ? JSON.stringify({ continue: true, permission: "allow" }) : "", + stderr: "", + policyName: null, + reason: null, + decision: "allow", + }; } diff --git a/src/hooks/types.ts b/src/hooks/types.ts index 9adbe409..c6941ed3 100644 --- a/src/hooks/types.ts +++ b/src/hooks/types.ts @@ -1,10 +1,15 @@ /** - * Constants and interfaces for Claude Code hooks integration. + * Constants and interfaces for hook integrations. */ export const HOOK_SCOPES = ["user", "project", "local"] as const; export type HookScope = (typeof HOOK_SCOPES)[number]; +export const INTEGRATION_TYPES = ["claude-code", "cursor"] as const; +export type IntegrationType = (typeof INTEGRATION_TYPES)[number]; + +export const CURSOR_HOOK_SCOPES = ["user", "project"] as const; + export const HOOK_EVENT_TYPES = [ "SessionStart", "SessionEnd", @@ -36,6 +41,58 @@ export const HOOK_EVENT_TYPES = [ export type HookEventType = (typeof HOOK_EVENT_TYPES)[number]; +export const CURSOR_HOOK_EVENT_TYPES = [ + "preToolUse", + "postToolUse", + "postToolUseFailure", + "sessionStart", + "sessionEnd", + "subagentStart", + "subagentStop", + "stop", + "preCompact", + "beforeShellExecution", + "afterShellExecution", + "beforeMCPExecution", + "afterMCPExecution", + "beforeReadFile", + "afterFileEdit", + "beforeSubmitPrompt", + "afterAgentResponse", + "afterAgentThought", + "beforeTabFileRead", + "afterTabFileEdit", +] as const; + +export type CursorHookEventType = (typeof CURSOR_HOOK_EVENT_TYPES)[number]; + +/** + * Maps Cursor camelCase event names to internal PascalCase event names + * used by the --hook CLI flag and policy matcher. + */ +export const CURSOR_EVENT_MAP: Record = { + preToolUse: "PreToolUse", + postToolUse: "PostToolUse", + postToolUseFailure: "PostToolUseFailure", + sessionStart: "SessionStart", + sessionEnd: "SessionEnd", + subagentStart: "SubagentStart", + subagentStop: "SubagentStop", + stop: "Stop", + preCompact: "PreCompact", + beforeShellExecution: "PreToolUse", + afterShellExecution: "PostToolUse", + beforeMCPExecution: "PreToolUse", + afterMCPExecution: "PostToolUse", + beforeReadFile: "PreToolUse", + afterFileEdit: "PostToolUse", + beforeSubmitPrompt: "UserPromptSubmit", + afterAgentResponse: "PostToolUse", + afterAgentThought: "PostToolUse", + beforeTabFileRead: "PreToolUse", + afterTabFileEdit: "PostToolUse", +}; + export const FAILPROOFAI_HOOK_MARKER = "__failproofai_hook__" as const; export interface ClaudeHookEntry { @@ -46,6 +103,7 @@ export interface ClaudeHookEntry { } export interface ClaudeHookMatcher { + matcher?: string; hooks: Array>; } @@ -55,9 +113,23 @@ export interface SessionMetadata { cwd?: string; permissionMode?: string; hookEventName?: string; + integration?: IntegrationType; } export interface ClaudeSettings { hooks?: Record; [key: string]: unknown; } + +export interface CursorHookEntry { + command: string; + timeout?: number; + matcher?: string; + failClosed?: boolean; +} + +export interface CursorHooksFile { + version?: number; + hooks?: Record; + [key: string]: unknown; +} From 9731a4910dc00d6ffc79e5689927cc911393fda9 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Mon, 13 Apr 2026 17:37:35 +0000 Subject: [PATCH 03/47] chore: suppress hydration warnings on UI buttons --- app/components/refresh-button.tsx | 2 ++ app/policies/hooks-client.tsx | 2 ++ 2 files changed, 4 insertions(+) diff --git a/app/components/refresh-button.tsx b/app/components/refresh-button.tsx index 3e79dee0..772cff71 100644 --- a/app/components/refresh-button.tsx +++ b/app/components/refresh-button.tsx @@ -42,6 +42,7 @@ export function RefreshButton({ className }: RefreshButtonProps) { )} > + ))} + + ); +} + // -- Policy Config Modal -- function PolicyConfigModal({ @@ -798,6 +838,92 @@ function PolicyConfigModal({ ); } +function IntegrationSelectModal({ + integrations, + onClose, + onInstall, +}: { + integrations: IntegrationStatus[] | null; + onClose: () => void; + onInstall: (ids: string[]) => void; +}) { + const [selected, setSelected] = useState>(() => + new Set(integrations?.filter((i) => i.installed).map((i) => i.id) ?? []) + ); + + useEffect(() => { + if (integrations) { + setSelected(new Set(integrations.filter((i) => i.installed).map((i) => i.id))); + } + }, [integrations]); + + useEffect(() => { + const handleKey = (e: KeyboardEvent) => { if (e.key === "Escape") onClose(); }; + document.addEventListener("keydown", handleKey); + return () => document.removeEventListener("keydown", handleKey); + }, [onClose]); + + const toggle = (id: string) => { + setSelected((prev) => { + const next = new Set(prev); + if (next.has(id)) next.delete(id); else next.add(id); + return next; + }); + }; + + return ( +
{ if (e.target === e.currentTarget) onClose(); }} + > +
+
+
+

Select CLI Integrations

+

Where do you want to install hooks?

+
+ +
+
+ {integrations === null ? ( +

Loading…

+ ) : ( + integrations.map((integ) => ( + + )) + )} +
+
+ + +
+
+
+ ); +} + function formatParamValue(type: string, value: unknown): string { if (type === "string[]" || type === "pattern[]") { const arr = Array.isArray(value) ? value : []; @@ -887,6 +1013,9 @@ function PoliciesTab({ onHooksInstallChange }: { onHooksInstallChange?: (install const [actionError, setActionError] = useState(null); const [hooksWarning, setHooksWarning] = useState(null); const [configuringPolicy, setConfiguringPolicy] = useState(null); + const [showIntegrationModal, setShowIntegrationModal] = useState(false); + const [integrationsList, setIntegrationsList] = useState(null); + const [selectedCliTab, setSelectedCliTab] = useState(null); // null = Global const reload = useCallback(async () => { try { @@ -932,11 +1061,65 @@ function PoliciesTab({ onHooksInstallChange }: { onHooksInstallChange?: (install }); }; + const handleToggleCli = (integrationId: string, policyName: string, mode: "enable" | "disable" | "inherit") => { + if (!config) return; + // Optimistic update + setConfig((prev) => { + if (!prev) return prev; + const current = prev.cliOverrides[integrationId] || { + enabledPolicies: [], + disabledPolicies: [], + policyParams: {}, + }; + const enabled = new Set(current.enabledPolicies); + const disabled = new Set(current.disabledPolicies); + + if (mode === "enable") { + enabled.add(policyName); + disabled.delete(policyName); + } else if (mode === "disable") { + disabled.add(policyName); + enabled.delete(policyName); + } else { + enabled.delete(policyName); + disabled.delete(policyName); + } + + return { + ...prev, + cliOverrides: { + ...prev.cliOverrides, + [integrationId]: { + ...current, + enabledPolicies: Array.from(enabled), + disabledPolicies: Array.from(disabled), + }, + }, + }; + }); + + startTransition(async () => { + try { + await toggleCliPolicyAction(integrationId, policyName, mode); + } catch { + setActionError("Failed to save CLI policy change."); + reload(); + } + }); + }; + const handleInstall = () => { + setIntegrationsList(null); + setShowIntegrationModal(true); + getIntegrationsStatusAction().then(setIntegrationsList); + }; + + const handleInstallWithIntegrations = (integrations: string[]) => { + setShowIntegrationModal(false); startTransition(async () => { try { setActionError(null); - await installHooksWebAction("user"); + await installHooksWebAction("user", integrations); await reload(); } catch (e) { setActionError(e instanceof Error ? e.message : "Failed to install hooks."); @@ -963,7 +1146,11 @@ function PoliciesTab({ onHooksInstallChange }: { onHooksInstallChange?: (install startTransition(async () => { try { setActionError(null); - await updatePolicyParamsAction(policyName, params); + if (selectedCliTab) { + await updateCliPolicyParamsAction(selectedCliTab, policyName, params); + } else { + await updatePolicyParamsAction(policyName, params); + } await reload(); } catch (e) { setActionError(e instanceof Error ? e.message : "Failed to save configuration."); @@ -993,6 +1180,13 @@ function PoliciesTab({ onHooksInstallChange }: { onHooksInstallChange?: (install onSave={handleSaveParams} /> )} + {showIntegrationModal && ( + setShowIntegrationModal(false)} + onInstall={handleInstallWithIntegrations} + /> + )}
{/* Install status banner */}
@@ -1033,6 +1227,35 @@ function PoliciesTab({ onHooksInstallChange }: { onHooksInstallChange?: (install
+ {/* CLI tabs */} + {config.installedIntegrations.length > 0 && ( +
+ + {config.installedIntegrations.map((integ) => ( + + ))} +
+ )} + {/* Policy summary */}
@@ -1078,68 +1301,110 @@ function PoliciesTab({ onHooksInstallChange }: { onHooksInstallChange?: (install
{/* Policy rows */} - {policies.map((policy) => ( -
-
- handleToggle(policy.name, policy.enabled)} - disabled={isPending} - /> -
-
- {policy.name} - {policy.beta && ( - - beta - - )} -
-
- - {policy.description} - - {policy.eventScope && ( - - {policy.eventScope} + {policies.map((policy) => { + const cliOverride = selectedCliTab ? config.cliOverrides[selectedCliTab] : null; + const cliMode: "enable" | "disable" | "inherit" = cliOverride + ? cliOverride.enabledPolicies.includes(policy.name) + ? "enable" + : cliOverride.disabledPolicies.includes(policy.name) + ? "disable" + : "inherit" + : "inherit"; + + const currentParams = selectedCliTab + ? (config.cliOverrides[selectedCliTab]?.policyParams[policy.name] ?? policy.currentParams) + : policy.currentParams; + + const isForcedOn = selectedCliTab ? cliMode === "enable" : policy.enabled; + + return ( +
+
+ {selectedCliTab ? ( + handleToggleCli(selectedCliTab, policy.name, m)} + disabled={isPending} + /> + ) : ( + handleToggle(policy.name, policy.enabled)} + disabled={isPending} + /> + )} +
+
+ {policy.name} + {policy.beta && ( + + beta + + )} + {selectedCliTab && ( + + Global: {policy.enabled ? "ON" : "OFF"} + + )} +
+
+ + {policy.description} - )} - {policy.params && Object.keys(policy.params).length > 0 && ( -
- {Object.entries(policy.params).map(([key, spec]) => { - const currentVal = policy.currentParams?.[key] ?? spec.default; - const isCustomized = JSON.stringify(currentVal) !== JSON.stringify(spec.default); - return ( - - {key}: - {formatParamValue(spec.type, currentVal)} - - ); - })} -
+ {policy.eventScope && ( + + {policy.eventScope} + + )} + {policy.params && Object.keys(policy.params).length > 0 && ( +
+ {Object.entries(policy.params).map(([key, spec]) => { + const val = currentParams?.[key] ?? spec.default; + const isCustomized = JSON.stringify(val) !== JSON.stringify(spec.default); + return ( + + {key}: + {formatParamValue(spec.type, val)} + + ); + })} +
+ )} +
+ {policy.params && Object.keys(policy.params).length > 0 && isForcedOn && ( + )}
- {policy.params && Object.keys(policy.params).length > 0 && ( - - )} -
- ))} + ); + })}
); })} diff --git a/bin/failproofai.mjs b/bin/failproofai.mjs index ee634259..637b1f41 100755 --- a/bin/failproofai.mjs +++ b/bin/failproofai.mjs @@ -386,7 +386,7 @@ EXAMPLES policyNames.length > 0 ? policyNames : undefined, scope, undefined, - { betaOnly, removeCustomHooks, integration: integrationArg }, + { betaOnly, removeCustomHooks, integration: integrationArg, cliExplicit: cliValues.length > 0 }, ); process.exit(0); } diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index f9df3040..e9c7fea5 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -1822,17 +1822,6 @@ export function registerBuiltinPolicies(enabledNames: string[]): void { } } - // Diagnostic policy to verify prompt capture for multi-agent support - registerPolicy( - "debug-prompt", - "Diagnostic policy to verify prompt capture", - async (ctx) => ({ - decision: "allow", - reason: `Prompt captured from ${ctx.session?.integration ?? "unknown"}`, - }), - { events: ["UserPromptSubmit"] }, - 100, - ); } /** Clears the git branch cache. Exposed for test isolation only. */ diff --git a/src/hooks/handler.ts b/src/hooks/handler.ts index 2bc0a5b3..c4c95dcb 100644 --- a/src/hooks/handler.ts +++ b/src/hooks/handler.ts @@ -651,8 +651,8 @@ export async function handleHookEvent(eventType: string, cliOverride?: string): session.transcriptPath = resolveTranscriptPath(integrationType, session.sessionId); } - // Load enabled policies (merge across project/local/global scopes) - const config = readMergedHooksConfig(session.cwd); + // Load enabled policies (merge across project/local/global scopes, with per-CLI overrides) + const config = readMergedHooksConfig(session.cwd, session.integration); clearPolicies(); registerBuiltinPolicies(config.enabledPolicies); diff --git a/src/hooks/hooks-config.ts b/src/hooks/hooks-config.ts index 1be0e477..95b4bf55 100644 --- a/src/hooks/hooks-config.ts +++ b/src/hooks/hooks-config.ts @@ -5,7 +5,7 @@ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; import { resolve, dirname } from "node:path"; import { homedir } from "node:os"; import type { HooksConfig } from "./policy-types"; -import type { HookScope } from "./types"; +import type { HookScope, IntegrationType } from "./types"; import { hookLogInfo, hookLogWarn } from "./hook-logger"; function getHomeDir(): string { @@ -31,12 +31,18 @@ function readConfigAt(path: string): Partial { * 3. ~/.failproofai/policies-config.json (global) * * Merge rules: - * enabledPolicies: union + dedup across all three - * policyParams: per-policy key, first scope that defines it wins entirely - * customPoliciesPath: first scope that defines it wins - * llm: first scope that defines it wins + * enabledPolicies: union + dedup across all three + * policyParams: per-policy key, first scope wins; CLI-level overrides global for the same key + * customPoliciesPath: first scope wins; CLI-level overrides global + * llm: first scope wins (no CLI override) + * + * When cliType is provided, per-CLI overrides from cli[cliType] are applied after the global merge: + * cli[X].enabledPolicies — adds policies only for that CLI (beyond global) + * cli[X].disabledPolicies — suppresses global policies for that CLI (disable wins over enable) + * cli[X].policyParams — per-key override over global policyParams + * cli[X].customPoliciesPath — overrides global customPoliciesPath for that CLI */ -export function readMergedHooksConfig(cwd?: string): HooksConfig { +export function readMergedHooksConfig(cwd?: string, cliType?: IntegrationType): HooksConfig { const base = cwd ? resolve(cwd) : process.cwd(); const projectPath = resolve(base, ".failproofai", "policies-config.json"); const localPath = resolve(base, ".failproofai", "policies-config.local.json"); @@ -46,31 +52,63 @@ export function readMergedHooksConfig(cwd?: string): HooksConfig { const local = readConfigAt(localPath); const global_ = readConfigAt(globalPath); - // enabledPolicies: union + dedup + // Step 1: global enabledPolicies — union + dedup (unchanged) const enabledSet = new Set([ ...(project.enabledPolicies ?? []), ...(local.enabledPolicies ?? []), ...(global_.enabledPolicies ?? []), ]); - // policyParams: per-policy, first scope wins + // Step 2: policyParams — CLI-level first (higher priority), then global fills gaps const mergedParams: Record> = {}; + if (cliType) { + for (const scope of [project, local, global_]) { + const cliParams = scope.cli?.[cliType]?.policyParams; + if (!cliParams) continue; + for (const [policyName, params] of Object.entries(cliParams)) { + if (!(policyName in mergedParams)) mergedParams[policyName] = params; + } + } + } for (const scope of [project, local, global_]) { if (!scope.policyParams) continue; for (const [policyName, params] of Object.entries(scope.policyParams)) { - if (!(policyName in mergedParams)) { - mergedParams[policyName] = params; - } + if (!(policyName in mergedParams)) mergedParams[policyName] = params; } } - // customPoliciesPath: first scope wins - const customPoliciesPath = - project.customPoliciesPath ?? local.customPoliciesPath ?? global_.customPoliciesPath; + // Step 3: customPoliciesPath — CLI-level first, then global + let customPoliciesPath: string | undefined; + if (cliType) { + customPoliciesPath = + project.cli?.[cliType]?.customPoliciesPath + ?? local.cli?.[cliType]?.customPoliciesPath + ?? global_.cli?.[cliType]?.customPoliciesPath; + } + if (customPoliciesPath === undefined) { + customPoliciesPath = project.customPoliciesPath ?? local.customPoliciesPath ?? global_.customPoliciesPath; + } - // llm: first scope wins + // Step 4: llm — first scope wins (unchanged) const llm = project.llm ?? local.llm ?? global_.llm; + // Step 5: per-CLI enabledPolicies/disabledPolicies (only when cliType provided) + if (cliType) { + const cliAdded = new Set([ + ...(project.cli?.[cliType]?.enabledPolicies ?? []), + ...(local.cli?.[cliType]?.enabledPolicies ?? []), + ...(global_.cli?.[cliType]?.enabledPolicies ?? []), + ]); + const cliRemoved = new Set([ + ...(project.cli?.[cliType]?.disabledPolicies ?? []), + ...(local.cli?.[cliType]?.disabledPolicies ?? []), + ...(global_.cli?.[cliType]?.disabledPolicies ?? []), + ]); + for (const p of cliAdded) enabledSet.add(p); + // disabledPolicies runs after add — disable always wins + for (const p of cliRemoved) enabledSet.delete(p); + } + return { enabledPolicies: [...enabledSet], ...(Object.keys(mergedParams).length > 0 ? { policyParams: mergedParams } : {}), diff --git a/src/hooks/integrations.ts b/src/hooks/integrations.ts index b35e5411..800b0100 100644 --- a/src/hooks/integrations.ts +++ b/src/hooks/integrations.ts @@ -1356,7 +1356,10 @@ export const FailproofAIPlugin = (ctx: any) => { syncSession(input.sessionID); }, "session.idle": async (input: any) => { - try { callcli("SessionEnd", { session_id: input.sessionID }); } catch {} + try { + callcli("session.idle", { session_id: input.sessionID }); + callcli("SessionEnd", { session_id: input.sessionID }); + } catch {} }, }; }; @@ -1593,6 +1596,15 @@ export default function (pi: ExtensionAPI) { } } catch {} }); + + pi.on("message", (event, ctx) => { + try { + // Trigger Stop when the assistant finishes its turn + if (event.role === "assistant" && (event.stopReason === "stop" || event.stopReason === "end_turn")) { + callcli("stop", {}, ctx); + } + } catch {} + }); } `; mkdirSync(dirname(path), { recursive: true }); diff --git a/src/hooks/manager.ts b/src/hooks/manager.ts index 05a59790..accf9d50 100644 --- a/src/hooks/manager.ts +++ b/src/hooks/manager.ts @@ -17,7 +17,7 @@ import { writeScopedHooksConfig, getConfigPathForScope, } from "./hooks-config"; -import type { HooksConfig } from "./policy-types"; +import type { HooksConfig, CliPoliciesOverride } from "./policy-types"; import { BUILTIN_POLICIES } from "./builtin-policies"; import { loadCustomHooks, discoverPolicyFiles } from "./custom-hooks-loader"; import { trackHookEvent } from "./hook-telemetry"; @@ -245,7 +245,10 @@ export async function installHooks( // Warn when Stop-event policies are installed for CLIs that don't support Stop. const missingStop = selectedPolicies.filter( - (p) => STOP_EVENT_POLICIES.includes(p) && !integ.eventTypes.includes("stop" as any) && !integ.eventTypes.includes("Stop" as any) && !integ.eventTypes.some((e) => e.toLowerCase().includes("stop") || e === "AfterAgent"), + (p) => STOP_EVENT_POLICIES.includes(p) && + !integ.eventTypes.includes("stop" as any) && + !integ.eventTypes.includes("Stop" as any) && + !integ.eventTypes.some((e) => e.toLowerCase().includes("stop") || e === "AfterAgent" || e === "session.idle"), ); if (missingStop.length > 0) { hookLogWarn(`${integ.displayName} does not support a Stop event — the following policies will never fire: ${missingStop.join(", ")}`); @@ -291,6 +294,9 @@ export async function installHooks( } catch { /* best effort */ } console.log(`\nFailproof AI hooks installed for all ${integ.eventTypes.length} event types (${integ.displayName}, scope: ${scope}).`); + if (selectedIntegrations.length === 1) { + console.log(`Note: Policies are enabled globally (apply to all CLIs). Hooks wired to ${integ.displayName} only.`); + } console.log(`Settings: ${settingsPath}`); // claude-code and copilot project-scope hooks use npx — no machine-specific paths. // Other integrations embed absolute binary paths even in project scope. @@ -322,17 +328,23 @@ export async function removeHooks( policyNames?: string[], scope: HookScope | "repo" | "all" = "user", cwd?: string, - opts?: { betaOnly?: boolean; source?: string; removeCustomHooks?: boolean; integration?: IntegrationType | IntegrationType[] }, + opts?: { betaOnly?: boolean; source?: string; removeCustomHooks?: boolean; integration?: IntegrationType | IntegrationType[]; cliExplicit?: boolean }, integration: IntegrationType | IntegrationType[] = "claude-code", ): Promise { const integrations = opts?.integration ?? integration; const arr = Array.isArray(integrations) ? integrations : [integrations]; - // Clear custom hooks path if requested + // Clear custom hooks path if requested (global + all per-CLI entries) const configScope: HookScope = scope === "all" ? "user" : (scope as HookScope); if (opts?.removeCustomHooks) { const config = readScopedHooksConfig(configScope, cwd); delete config.customPoliciesPath; + // Also clear per-CLI customPoliciesPath entries + if (config.cli) { + for (const cliOvr of Object.values(config.cli)) { + if (cliOvr) delete cliOvr.customPoliciesPath; + } + } writeScopedHooksConfig(config, configScope, cwd); console.log("Custom hooks path cleared."); } @@ -344,6 +356,80 @@ export async function removeHooks( if (policyNames && policyNames.length > 0 && !(policyNames.length === 1 && policyNames[0] === "all")) { validatePolicyNames(policyNames); const config = readScopedHooksConfig(configScope, cwd); + + // Per-CLI scoped removal: --cli was explicitly provided by the user + if (opts?.cliExplicit) { + const cliOverride: CliPoliciesOverride = { ...(config.cli?.[integId] ?? {}) }; + const globalEnabled = new Set(config.enabledPolicies); + const notEnabled: string[] = []; + + for (const policyName of policyNames) { + const inCliEnabled = (cliOverride.enabledPolicies ?? []).includes(policyName); + const inGlobal = globalEnabled.has(policyName); + + if (inCliEnabled) { + // Was a CLI-specific addition — remove it from there + cliOverride.enabledPolicies = (cliOverride.enabledPolicies ?? []).filter((p) => p !== policyName); + } else if (inGlobal) { + // In global — suppress for this CLI only, leave global unchanged + const alreadySuppressed = (cliOverride.disabledPolicies ?? []).includes(policyName); + if (!alreadySuppressed) { + cliOverride.disabledPolicies = [...(cliOverride.disabledPolicies ?? []), policyName]; + } + } else { + notEnabled.push(policyName); + } + } + + if (notEnabled.length > 0) { + console.log(`Warning: policy(ies) not enabled globally or for ${integ.displayName}: ${notEnabled.join(", ")}`); + } + + // Clean up empty arrays + if (cliOverride.enabledPolicies?.length === 0) delete cliOverride.enabledPolicies; + if (cliOverride.disabledPolicies?.length === 0) delete cliOverride.disabledPolicies; + + // Build updated cli map + const updatedCli: Partial> = { + ...(config.cli ?? {}), + [integId]: cliOverride, + }; + // Remove empty CLI entry + if (!Object.keys(cliOverride).length) delete updatedCli[integId]; + + const updatedConfig: HooksConfig = { + ...config, + ...(Object.keys(updatedCli).length > 0 ? { cli: updatedCli } : { cli: undefined }), + }; + + writeScopedHooksConfig(updatedConfig, configScope, cwd); + + // Telemetry + try { + const distinctId = getInstanceId(); + const actuallyDisabled = policyNames.filter((p) => !notEnabled.includes(p)); + await trackHookEvent(distinctId, "hooks_removed", { + scope, + integration: integ.id, + removal_mode: "cli-scoped", + beta_only: false, + policies_removed: actuallyDisabled, + removed_count: actuallyDisabled.length, + ...(opts?.source ? { source: opts.source } : {}), + platform: platform(), + arch: arch(), + os_release: release(), + hostname_hash: hashToId(hostname()), + }); + } catch { /* best effort */ } + + const actualCount = policyNames.length - notEnabled.length; + console.log(`Disabled ${actualCount} policy(ies) for ${integ.displayName}. Other CLIs unaffected.`); + console.log(`Global list unchanged: ${config.enabledPolicies.length > 0 ? config.enabledPolicies.join(", ") : "(none)"}`); + continue; + } + + // Global removal path (no --cli flag) const removeSet = new Set(policyNames); const remaining = config.enabledPolicies.filter((p) => !removeSet.has(p)); const notEnabled = policyNames.filter((p) => !config.enabledPolicies.includes(p)); @@ -435,12 +521,12 @@ export async function removeHooks( for (const s of integ.scopes) { if (s === "repo") continue; const existing = readScopedHooksConfig(s as HookScope, cwd); - if (existing.enabledPolicies.length > 0) { - writeScopedHooksConfig({ ...existing, enabledPolicies: [] }, s as HookScope, cwd); + if (existing.enabledPolicies.length > 0 || existing.cli) { + writeScopedHooksConfig({ ...existing, enabledPolicies: [], cli: undefined }, s as HookScope, cwd); } } } else if (!integ.scopes.some((s) => integ.hooksInstalledInSettings(s as any, cwd))) { - writeScopedHooksConfig({ ...configBeforeRemoval, enabledPolicies: [] }, configScope, cwd); + writeScopedHooksConfig({ ...configBeforeRemoval, enabledPolicies: [], cli: undefined }, configScope, cwd); } } } @@ -450,7 +536,7 @@ export async function listHooks( integration: IntegrationType = "claude-code", ): Promise { const integ = getIntegration(integration); - // Multi-scope config is merged for listing + // Multi-scope config is merged for listing (no CLI filter \u2014 show global view) const config = readMergedHooksConfig(cwd); const enabledSet = new Set(config.enabledPolicies); @@ -463,6 +549,36 @@ export async function listHooks( const nameColWidth = Math.max(...BUILTIN_POLICIES.map((p) => p.name.length)) + 2; const builtinPolicyNames = new Set(BUILTIN_POLICIES.map((p) => p.name)); + // Build per-CLI annotation map from all three scoped configs + const cliAnnotations = new Map(); + for (const scopeConfig of [ + readScopedHooksConfig("project", cwd), + readScopedHooksConfig("local", cwd), + readScopedHooksConfig("user", cwd), + ]) { + for (const [cliId, cliOvr] of Object.entries(scopeConfig.cli ?? {})) { + for (const p of cliOvr.disabledPolicies ?? []) { + const entry = cliAnnotations.get(p) ?? { disabled: [], enabledOnly: [] }; + if (!entry.disabled.includes(cliId)) entry.disabled.push(cliId); + cliAnnotations.set(p, entry); + } + for (const p of cliOvr.enabledPolicies ?? []) { + if (!enabledSet.has(p)) { + const entry = cliAnnotations.get(p) ?? { disabled: [], enabledOnly: [] }; + if (!entry.enabledOnly.includes(cliId)) entry.enabledOnly.push(cliId); + cliAnnotations.set(p, entry); + } + } + } + } + + const getCliSuffix = (policyName: string): string => { + const ann = cliAnnotations.get(policyName); + if (ann?.disabled.length) return ` \x1B[2m[disabled for: ${ann.disabled.join(", ")}]\x1B[0m`; + if (ann?.enabledOnly.length) return ` \x1B[2m[enabled for: ${ann.enabledOnly.join(", ")} only]\x1B[0m`; + return ""; + }; + const printParamsSummary = (policyName: string, indent: string) => { const params = config.policyParams?.[policyName]; if (!params) return; @@ -471,6 +587,11 @@ export async function listHooks( } }; + const printPolicyLine = (p: { name: string; description: string }, mark: string) => { + console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}${getCliSuffix(p.name)}`); + printParamsSummary(p.name, " "); + }; + const statusCol = installedScopes.length > 1 ? installedScopes.length * 9 : 8; if (installedScopes.length === 0) { @@ -479,16 +600,14 @@ export async function listHooks( for (const p of regularPolicies) { const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); - printParamsSummary(p.name, " "); + printPolicyLine(p, mark); } if (betaPolicies.length > 0) { console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); for (const p of betaPolicies) { const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); - printParamsSummary(p.name, " "); + printPolicyLine(p, mark); } } console.log(`\n Run \`failproofai policies --install --cli ${integration}\` to activate hooks for ${integ.displayName}.`); @@ -499,15 +618,13 @@ export async function listHooks( for (const p of regularPolicies) { const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); - printParamsSummary(p.name, " "); + printPolicyLine(p, mark); } if (betaPolicies.length > 0) { console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); for (const p of betaPolicies) { const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); - printParamsSummary(p.name, " "); + printPolicyLine(p, mark); } } } else { @@ -517,15 +634,13 @@ export async function listHooks( for (const p of regularPolicies) { const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); - printParamsSummary(p.name, " "); + printPolicyLine(p, mark); } if (betaPolicies.length > 0) { console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); for (const p of betaPolicies) { const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - console.log(` ${mark}${" ".repeat(7)}${p.name.padEnd(nameColWidth)}${p.description}`); - printParamsSummary(p.name, " "); + printPolicyLine(p, mark); } } console.log(`\n Hooks active in scopes: ${installedScopes.join(", ")}`); diff --git a/src/hooks/policy-types.ts b/src/hooks/policy-types.ts index ff10a0e5..8cbdce58 100644 --- a/src/hooks/policy-types.ts +++ b/src/hooks/policy-types.ts @@ -1,7 +1,7 @@ /** * Types for the hook policy system. */ -import type { HookEventType, SessionMetadata } from "./types"; +import type { HookEventType, SessionMetadata, IntegrationType } from "./types"; export type PolicyDecision = "allow" | "deny" | "instruct"; @@ -69,9 +69,17 @@ export interface LlmConfig { model?: string; } +export interface CliPoliciesOverride { + enabledPolicies?: string[]; + disabledPolicies?: string[]; + policyParams?: Record>; + customPoliciesPath?: string; +} + export interface HooksConfig { enabledPolicies: string[]; llm?: LlmConfig; policyParams?: Record>; customPoliciesPath?: string; + cli?: Partial>; } diff --git a/src/hooks/types.ts b/src/hooks/types.ts index 3ce948a3..709de43c 100644 --- a/src/hooks/types.ts +++ b/src/hooks/types.ts @@ -221,13 +221,14 @@ export const OPENCODE_HOOK_EVENT_TYPES = [ "tool.execute.before", "tool.execute.after", "chat.message", + "stop", ] as const; export type OpencodeHookEventType = (typeof OPENCODE_HOOK_EVENT_TYPES)[number]; export const OPENCODE_EVENT_MAP: Record = { "session.created": "SessionStart", - "session.idle": "SessionEnd", + "session.idle": "Stop", "tool.execute.before": "PreToolUse", "tool.execute.after": "PostToolUse", "chat.message": "UserPromptSubmit", @@ -240,6 +241,7 @@ export const PI_HOOK_EVENT_TYPES = [ "tool_call", "tool_result", "input", + "stop", ] as const; export type PiHookEventType = (typeof PI_HOOK_EVENT_TYPES)[number]; @@ -249,4 +251,5 @@ export const PI_EVENT_MAP: Record = { "tool_call": "PreToolUse", "tool_result": "PostToolUse", "input": "UserPromptSubmit", + "stop": "Stop", }; From f3be35b6e05ab85e0b73a7aad452637114c8ae48 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Sun, 26 Apr 2026 06:04:58 +0000 Subject: [PATCH 43/47] fix: address all CodeRabbit review comments on PR #185 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix tool_input/toolInput shapes in Gemini and Copilot payloads (object, not raw string) - Isolate Cursor and Gemini e2e tests with per-test isoHome (mkdtempSync + HOME override) - Remove real-home DEDUP_DIR deletion from OpenCode e2e beforeEach (redundant with isoHome) - Restore process.env.HOME in log-entries test afterEach to prevent cross-test leakage - Fix DetailPanel colSpan 10 → 11 to span all activity table columns - Fix isForcedOn to show param-edit button when CLI is in inherit mode and global policy is enabled - Fix isOpencodeSessionMerged to compare encodedCwd against virtual folder names instead of session IDs - Add cwd field to ProjectFolder and pass it from readOpencodeDbEntries - Fix resolveAnyProjectPath to use existsSync instead of unreachable double-try pattern - Remove double-emit of SessionEnd on OpenCode session.idle events - Remove debug logging of Pi session ID sources from integrations.ts - Log warning in getInteg before falling back to claude-code for unknown integration types - Fix getFilePath to use findNestedStringByKeys for nested payload lookup - Fix scripts/codex-trace.mjs shebang from node to bun Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 13 ++++ __tests__/e2e/helpers/payloads.ts | 4 +- .../e2e/hooks/cursor-integration.e2e.test.ts | 67 ++++++++++--------- .../e2e/hooks/gemini-integration.e2e.test.ts | 51 +++++++------- .../hooks/opencode-integration.e2e.test.ts | 4 +- __tests__/hooks/builtin-policies.test.ts | 3 +- __tests__/hooks/manager.test.ts | 7 +- __tests__/lib/log-entries.test.ts | 4 ++ __tests__/lib/projects.test.ts | 19 +++++- app/policies/hooks-client.tsx | 10 +-- lib/log-entries.ts | 2 - lib/projects.ts | 26 +++---- scripts/codex-trace.mjs | 2 +- src/hooks/builtin-policies.ts | 7 +- src/hooks/handler.ts | 7 +- src/hooks/integrations.ts | 13 +--- 16 files changed, 130 insertions(+), 109 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e667eb6a..70a52d45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,19 @@ - Add native transcript/session support across non-Claude CLIs in dashboard parsing: OpenCode sessions now load from `~/.local/share/opencode/opencode.db`, Gemini/Pi native transcript discovery is expanded, and Gemini chat discovery now targets real chat files (`.jsonl`/`.json`) while ignoring tool-call sidecar artifacts. ### Fixes +- Fix Cursor/Gemini e2e tests to use isolated temp HOME (isoHome) per test, preventing real `~/.failproofai` mutation and parallel flakes +- Fix OpenCode e2e test to stop deleting real-home `DEDUP_DIR` (now redundant since `HOME: isoHome` is set for all invocations) +- Fix `process.env.HOME` leak across log-entries unit tests: capture and restore in `afterEach` +- Fix `GeminiPayloads.beforeTool.bash` and `CopilotPayloads.preToolUse.bash` to use object `tool_input`/`toolInput` shapes instead of raw strings +- Fix `DetailPanel` `colSpan` in activity dashboard from 10 to 11 after the Integration column was added +- Fix `isForcedOn` in policy list: inherit-mode + globally-enabled policies now show the parameter-edit button +- Fix `isOpencodeSessionMerged` to compare session CWD against virtual folder names (encoded CWD) instead of always returning true +- Fix `resolveAnyProjectPath` unreachable `"virtual"` branch: now uses `existsSync` to distinguish real Claude project directories from activity-store-only virtual projects +- Fix `session.idle` in OpenCode plugin to not double-emit `SessionEnd` (idle is not a session close) +- Remove debug logging of Pi session ID sources from `integrations.ts` +- Fix `getFilePath` in builtin policies to use `findNestedStringByKeys` for nested payload lookup, matching `getCommand` behaviour +- Fix silent fallback in `getInteg` handler to log a warning before falling back to `claude-code` +- Fix `scripts/codex-trace.mjs` shebang from `node` to `bun` (file imports TypeScript sources directly) - Fix `block-sudo` and `block-read-outside-cwd` bypassed on Gemini when tool name is `run_shell_command` or `sh` — both policies now use `SHELL_TOOL_NAMES` so all shell tool variants are covered - Fix `block-failproofai-commands` now also blocks agents from reading `.failproofai/policies-config.json` via `Read`/`ReadFile` tools or shell commands, preventing policy config scouting - Fix cross-CLI dedup collision: integration type is now always the first component of both the firing-lock hash and the dedup key, so Cursor and Claude Code firing the same event concurrently no longer drop each other's entries diff --git a/__tests__/e2e/helpers/payloads.ts b/__tests__/e2e/helpers/payloads.ts index cc3c0f6b..489682d8 100644 --- a/__tests__/e2e/helpers/payloads.ts +++ b/__tests__/e2e/helpers/payloads.ts @@ -159,7 +159,7 @@ export const GeminiPayloads = { cwd, hook_event_name: "BeforeTool", tool_name: "bash", - tool_input: command, + tool_input: { command }, }; }, }, @@ -208,7 +208,7 @@ export const CopilotPayloads = { cwd, hookEventName: "preToolUse", toolName: "bash", - toolInput: command, + toolInput: { command }, }; }, diff --git a/__tests__/e2e/hooks/cursor-integration.e2e.test.ts b/__tests__/e2e/hooks/cursor-integration.e2e.test.ts index 472bed89..b5ad3bd7 100644 --- a/__tests__/e2e/hooks/cursor-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/cursor-integration.e2e.test.ts @@ -1,36 +1,39 @@ import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { execSync, spawnSync } from "node:child_process"; -import { writeFileSync, readFileSync, existsSync, mkdirSync, rmSync } from "node:fs"; -import { resolve } from "node:path"; -import { homedir } from "node:os"; +import { writeFileSync, readFileSync, existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs"; +import { resolve, join } from "node:path"; +import { tmpdir } from "node:os"; import { CursorPayloads } from "../helpers/payloads"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); -const PROJECT_DIR = resolve(__dirname, "../../fixtures/cursor-project"); -const CURSOR_HOOKS_PATH = resolve(PROJECT_DIR, ".cursor", "hooks.json"); -const CONFIG_PATH = resolve(PROJECT_DIR, ".failproofai", "policies-config.json"); -// Firing-lock files can persist across test cases. Clear them. -const DEDUP_DIR = resolve(require("node:os").homedir(), ".failproofai", "cache", "dedup"); describe("E2E: Cursor Integration", () => { + let PROJECT_DIR: string; + let CURSOR_HOOKS_PATH: string; + let CONFIG_PATH: string; + let isoHome: string; + beforeEach(() => { - if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); - if (existsSync(DEDUP_DIR)) rmSync(DEDUP_DIR, { recursive: true, force: true }); - mkdirSync(PROJECT_DIR, { recursive: true }); - // Initialize empty cursor hooks + PROJECT_DIR = mkdtempSync(join(tmpdir(), "fp-e2e-cursor-")); + isoHome = mkdtempSync(join(tmpdir(), "fp-e2e-cursor-home-")); + CURSOR_HOOKS_PATH = resolve(PROJECT_DIR, ".cursor", "hooks.json"); + CONFIG_PATH = resolve(PROJECT_DIR, ".failproofai", "policies-config.json"); mkdirSync(resolve(PROJECT_DIR, ".cursor"), { recursive: true }); writeFileSync(CURSOR_HOOKS_PATH, JSON.stringify({ version: 1, hooks: {} })); }); afterEach(() => { if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + if (existsSync(isoHome)) rmSync(isoHome, { recursive: true, force: true }); }); + const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), HOME: isoHome }); + it("denies sudo command via Cursor preToolUse hook", () => { // 1. Install block-sudo for Cursor project scope execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli cursor --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); // 2. Verify hooks.json was written correctly @@ -40,14 +43,14 @@ describe("E2E: Cursor Integration", () => { // 3. Trigger the hook with a sudo payload const payload = CursorPayloads.preToolUse.bash("sudo rm -rf /", PROJECT_DIR); - + const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8" + env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", }); - + // Cursor expects Exit 0 for a protocol-compliant JSON denial. expect(status).toBe(0); const parsed = JSON.parse(stdout.trim()); @@ -61,7 +64,7 @@ describe("E2E: Cursor Integration", () => { // 1. Install block-sudo execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli cursor --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); // 2. Trigger hook with ONLY workspace_roots (no cwd) @@ -71,8 +74,8 @@ describe("E2E: Cursor Integration", () => { const output = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8" + env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", }); expect(output.status).toBe(0); const parsedDeny = JSON.parse(output.stdout.trim()); @@ -83,16 +86,16 @@ describe("E2E: Cursor Integration", () => { it("allows benign commands", () => { execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli cursor --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); const payload = CursorPayloads.preToolUse.bash("ls -la", PROJECT_DIR); - + const { status, stdout } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8" + env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", }); expect(status).toBe(0); @@ -102,7 +105,7 @@ describe("E2E: Cursor Integration", () => { it("blocks sudo via beforeShellExecution event (tool_name normalization)", () => { execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli cursor --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); // beforeShellExecution events don't include tool_name — normalizePayload must map to run_terminal_command @@ -117,8 +120,8 @@ describe("E2E: Cursor Integration", () => { const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8" + env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", }); expect(status).toBe(0); @@ -132,7 +135,7 @@ describe("E2E: Cursor Integration", () => { it("blocks env file read via beforeReadFile event (file_path normalization)", () => { execSync(`bun ${BINARY_PATH} policies --install block-env-files --cli cursor --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); // beforeReadFile events send file_path at the top level — normalizePayload must wrap it @@ -147,8 +150,8 @@ describe("E2E: Cursor Integration", () => { const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "PreToolUse"], { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8" + env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", }); expect(status).toBe(0); @@ -162,14 +165,14 @@ describe("E2E: Cursor Integration", () => { // Install execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli cursor --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); expect(JSON.parse(readFileSync(CURSOR_HOOKS_PATH, "utf8")).hooks.beforeShellExecution).toBeDefined(); // Uninstall execSync(`bun ${BINARY_PATH} policies --uninstall --cli cursor --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); const hooks = JSON.parse(readFileSync(CURSOR_HOOKS_PATH, "utf8")); diff --git a/__tests__/e2e/hooks/gemini-integration.e2e.test.ts b/__tests__/e2e/hooks/gemini-integration.e2e.test.ts index f57043d0..0905eb10 100644 --- a/__tests__/e2e/hooks/gemini-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/gemini-integration.e2e.test.ts @@ -1,51 +1,52 @@ import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { execSync, spawnSync } from "node:child_process"; -import { writeFileSync, readFileSync, existsSync, mkdirSync, rmSync } from "node:fs"; -import { resolve } from "node:path"; -import { homedir } from "node:os"; +import { writeFileSync, readFileSync, existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs"; +import { resolve, join } from "node:path"; +import { tmpdir } from "node:os"; import { GeminiPayloads } from "../helpers/payloads"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); -const PROJECT_DIR = resolve(__dirname, "../../fixtures/gemini-project"); -const GEMINI_SETTINGS_PATH = resolve(PROJECT_DIR, ".gemini", "settings.json"); -// Cursor and Copilot e2e tests share the same SESSION_ID + sudo fingerprint as -// Gemini, so their firing-lock file (5s bucket) can still be on disk when -// Gemini runs and block this test with "instant-catch twin". Clear it. -const DEDUP_DIR = resolve(homedir(), ".failproofai", "cache", "dedup"); describe("E2E: Gemini Integration", () => { + let PROJECT_DIR: string; + let GEMINI_SETTINGS_PATH: string; + let isoHome: string; + beforeEach(() => { - if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); - if (existsSync(DEDUP_DIR)) rmSync(DEDUP_DIR, { recursive: true, force: true }); - mkdirSync(PROJECT_DIR, { recursive: true }); + PROJECT_DIR = mkdtempSync(join(tmpdir(), "fp-e2e-gemini-")); + isoHome = mkdtempSync(join(tmpdir(), "fp-e2e-gemini-home-")); + GEMINI_SETTINGS_PATH = resolve(PROJECT_DIR, ".gemini", "settings.json"); mkdirSync(resolve(PROJECT_DIR, ".gemini"), { recursive: true }); writeFileSync(GEMINI_SETTINGS_PATH, JSON.stringify({ hooks: {} })); }); afterEach(() => { if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); + if (existsSync(isoHome)) rmSync(isoHome, { recursive: true, force: true }); }); + const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), HOME: isoHome }); + it("denies sudo via Gemini BeforeTool hook with deny decision", () => { // 1. Install block-sudo execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli gemini --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); // 2. Trigger the hook const payload = GeminiPayloads.beforeTool.bash("sudo rm -rf /", PROJECT_DIR); - + // We pass --cli gemini to ensure it doesn't fallback to claude-code const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "BeforeTool", "--cli", "gemini"], { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_LOG_LEVEL: "info", FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8" + env: { ...baseEnv(), FAILPROOFAI_LOG_LEVEL: "info", FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", }); console.log("Gemini STDOUT:", stdout); console.log("Gemini STDERR:", stderr); - + // Gemini expects Exit 0 for a protocol-compliant JSON denial. // If we exit with 2, it may "fail open" and proceed with the action. expect(status).toBe(0); @@ -61,14 +62,14 @@ describe("E2E: Gemini Integration", () => { it("allows benign commands with empty output", () => { execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli gemini --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); const payload = GeminiPayloads.beforeTool.bash("ls", PROJECT_DIR); const output = execSync(`bun ${BINARY_PATH} --hook BeforeTool --cli gemini`, { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" } + env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, }).toString(); expect(JSON.parse(output.trim())).toEqual({ decision: "allow" }); @@ -77,7 +78,7 @@ describe("E2E: Gemini Integration", () => { it("denies sudo from stringified Gemini toolArgs payloads", () => { execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli gemini --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); const payload = { @@ -91,8 +92,8 @@ describe("E2E: Gemini Integration", () => { const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "BeforeTool", "--cli", "gemini"], { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8" + env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", }); expect(status).toBe(0); @@ -108,7 +109,7 @@ describe("E2E: Gemini Integration", () => { it("blocks env on Gemini Shell tool name via BeforeTool", () => { execSync(`bun ${BINARY_PATH} policies --install protect-env-vars --cli gemini --scope project`, { cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd() } + env: baseEnv(), }); const payload = { @@ -122,8 +123,8 @@ describe("E2E: Gemini Integration", () => { const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "BeforeTool", "--cli", "gemini"], { input: JSON.stringify(payload), cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8" + env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", }); expect(status).toBe(0); diff --git a/__tests__/e2e/hooks/opencode-integration.e2e.test.ts b/__tests__/e2e/hooks/opencode-integration.e2e.test.ts index b4ac4446..56d93595 100644 --- a/__tests__/e2e/hooks/opencode-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/opencode-integration.e2e.test.ts @@ -2,10 +2,9 @@ import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { execSync, spawnSync } from "node:child_process"; import { readFileSync, existsSync, mkdtempSync, mkdirSync, rmSync } from "node:fs"; import { resolve, join } from "node:path"; -import { homedir, tmpdir } from "node:os"; +import { tmpdir } from "node:os"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); -const DEDUP_DIR = resolve(homedir(), ".failproofai", "cache", "dedup"); describe("E2E: OpenCode Integration", () => { let projectDir: string; @@ -14,7 +13,6 @@ describe("E2E: OpenCode Integration", () => { beforeEach(() => { projectDir = mkdtempSync(join(tmpdir(), "fp-e2e-opencode-")); isoHome = mkdtempSync(join(tmpdir(), "fp-e2e-opencode-home-")); - if (existsSync(DEDUP_DIR)) rmSync(DEDUP_DIR, { recursive: true, force: true }); }); afterEach(() => { diff --git a/__tests__/hooks/builtin-policies.test.ts b/__tests__/hooks/builtin-policies.test.ts index 3567f238..701ed763 100644 --- a/__tests__/hooks/builtin-policies.test.ts +++ b/__tests__/hooks/builtin-policies.test.ts @@ -508,7 +508,8 @@ describe("hooks/builtin-policies", () => { it("blocks stringified Gemini command payloads", async () => { const ctx = makeCtx({ toolName: "Shell", - toolInput: "{\"tool\":{\"args\":\"{\\\"command\\\":\\\"sudo apt-get update\\\"}\"}}", + toolInput: "{\"tool\":{\"args\":\"{\\\"command\\\":\\\"sudo apt-get update\\\"}\"}}" as unknown as Record, + }); expect((await policy.fn(ctx)).decision).toBe("deny"); }); diff --git a/__tests__/hooks/manager.test.ts b/__tests__/hooks/manager.test.ts index 8a845582..31107a43 100644 --- a/__tests__/hooks/manager.test.ts +++ b/__tests__/hooks/manager.test.ts @@ -1215,12 +1215,13 @@ describe("hooks/manager", () => { // First writeScopedHooksConfig call is from the removeCustomHooks block const firstWriteCall = vi.mocked(writeScopedHooksConfig).mock.calls[0]; - const written = firstWriteCall[0] as Record; + const written = firstWriteCall[0] as unknown as Record; // Global customPoliciesPath cleared expect(written.customPoliciesPath).toBeUndefined(); // Per-CLI customPoliciesPath cleared too - expect((written as Record).cli?.["gemini"]?.customPoliciesPath).toBeUndefined(); - expect((written as Record).cli?.["cursor"]?.customPoliciesPath).toBeUndefined(); + const cliSection = written.cli as Record | undefined; + expect(cliSection?.["gemini"]?.customPoliciesPath).toBeUndefined(); + expect(cliSection?.["cursor"]?.customPoliciesPath).toBeUndefined(); }); it("scope=all wipe clears both enabledPolicies and cli sections", async () => { diff --git a/__tests__/lib/log-entries.test.ts b/__tests__/lib/log-entries.test.ts index 0945d31f..a4a4e42f 100644 --- a/__tests__/lib/log-entries.test.ts +++ b/__tests__/lib/log-entries.test.ts @@ -12,14 +12,18 @@ function line(obj: Record): string { } let tempRoot = ""; +let originalHome: string | undefined; beforeEach(() => { tempRoot = mkdtempSync(join(tmpdir(), "failproofai-log-entries-")); + originalHome = process.env.HOME; }); afterEach(() => { delete process.env.CLAUDE_PROJECTS_PATH; delete process.env.COPILOT_SESSION_STATE_PATH; + if (originalHome === undefined) delete process.env.HOME; + else process.env.HOME = originalHome; resetHookStoreForTest(); if (tempRoot) rmSync(tempRoot, { recursive: true, force: true }); tempRoot = ""; diff --git a/__tests__/lib/projects.test.ts b/__tests__/lib/projects.test.ts index 33ce1903..ad582f05 100644 --- a/__tests__/lib/projects.test.ts +++ b/__tests__/lib/projects.test.ts @@ -24,13 +24,19 @@ vi.mock("../../src/hooks/hook-activity-store", () => ({ trackHookEvent: vi.fn(), })); +vi.mock("fs", () => ({ + existsSync: vi.fn(() => false), +})); + import { readdir, stat } from "fs/promises"; +import { existsSync } from "fs"; import { extractSessionId, getProjectFolders, getSessionFiles, resolveAnyProjectPath } from "@/lib/projects"; import { getAllHookActivityEntries } from "../../src/hooks/hook-activity-store"; const mockGetAllActivity = vi.mocked(getAllHookActivityEntries); const mockReaddir = vi.mocked(readdir); const mockStat = vi.mocked(stat); +const mockExistsSync = vi.mocked(existsSync); describe("extractSessionId", () => { it("extracts UUID from a valid .jsonl filename", () => { @@ -302,8 +308,9 @@ describe("resolveAnyProjectPath", () => { expect(result.path).toBe(`__fp_opencode_db__:${sessionId}`); }); - it("routes encoded CWD names (starting with -) to Claude projects", () => { + it("routes encoded CWD names (starting with -) to Claude projects when directory exists", () => { const projectName = "-home-user-myproject"; + mockExistsSync.mockReturnValueOnce(true); const result = resolveAnyProjectPath(projectName); expect(result.source).toBe("claude-code"); @@ -311,6 +318,16 @@ describe("resolveAnyProjectPath", () => { expect(result.path).toContain(projectName); }); + it("routes encoded CWD names to virtual when directory does not exist", () => { + const projectName = "-home-user-myproject"; + mockExistsSync.mockReturnValueOnce(false); + const result = resolveAnyProjectPath(projectName); + + expect(result.source).toBe("virtual"); + expect(result.path).toContain(".claude/projects"); + expect(result.path).toContain(projectName); + }); + it("throws RangeError for invalid project names", () => { expect(() => resolveAnyProjectPath("")).toThrow(RangeError); expect(() => resolveAnyProjectPath("../../etc/passwd")).toThrow(RangeError); diff --git a/app/policies/hooks-client.tsx b/app/policies/hooks-client.tsx index 6b67cb07..32669402 100644 --- a/app/policies/hooks-client.tsx +++ b/app/policies/hooks-client.tsx @@ -264,7 +264,7 @@ function DetailPanel({ }) { return ( - +
@@ -851,12 +851,6 @@ function IntegrationSelectModal({ new Set(integrations?.filter((i) => i.installed).map((i) => i.id) ?? []) ); - useEffect(() => { - if (integrations) { - setSelected(new Set(integrations.filter((i) => i.installed).map((i) => i.id))); - } - }, [integrations]); - useEffect(() => { const handleKey = (e: KeyboardEvent) => { if (e.key === "Escape") onClose(); }; document.addEventListener("keydown", handleKey); @@ -1315,7 +1309,7 @@ function PoliciesTab({ onHooksInstallChange }: { onHooksInstallChange?: (install ? (config.cliOverrides[selectedCliTab]?.policyParams[policy.name] ?? policy.currentParams) : policy.currentParams; - const isForcedOn = selectedCliTab ? cliMode === "enable" : policy.enabled; + const isForcedOn = selectedCliTab ? cliMode === "enable" || (cliMode === "inherit" && policy.enabled) : policy.enabled; return (
{ lastModifiedFormatted: formatDate(lastModified), source: "opencode" as const, sources: ["opencode" as const], + cwd: row.directory, } as ProjectFolder; }); } catch { @@ -182,13 +185,10 @@ async function getOpencodeDbSessionsForCwd(cwd: string): Promise } /** Internal helper to check if an opencode session has already been merged into a workspace project */ -function isOpencodeSessionMerged(sessionId: string, virtualFolders: ProjectFolder[]): boolean { - return virtualFolders.some(f => - f.sources.includes("opencode") && - // This is a heuristic: if we have activity for opencode in a CWD, - // we assume the standalone session file for that CWD is redundant. - f.name !== sessionId - ); +function isOpencodeSessionMerged(sessionCwd: string, virtualFolders: ProjectFolder[]): boolean { + if (!sessionCwd) return false; + const encoded = encodeCwd(sessionCwd); + return virtualFolders.some(f => f.sources.includes("opencode") && f.name === encoded); } export async function getProjectFolders(): Promise { @@ -210,7 +210,7 @@ export async function getProjectFolders(): Promise { for (const folder of allFolders) { // For standalone opencode session files: skip if we've already merged opencode // activity into a workspace project (unification). - if (folder.source === "opencode" && folder.name.startsWith("ses_") && isOpencodeSessionMerged(folder.name, virtualFolders)) { + if (folder.source === "opencode" && folder.name.startsWith("ses_") && isOpencodeSessionMerged(folder.cwd ?? "", virtualFolders)) { continue; } @@ -300,14 +300,10 @@ export function resolveAnyProjectPath( return { path: `${OPENCODE_DB_SESSION_PREFIX}${name}`, source: "opencode" }; } try { - return { path: resolveProjectPath(name), source: "claude-code" }; + const path = resolveProjectPath(name); + return { path, source: existsSync(path) ? "claude-code" : "virtual" }; } catch { - // If it's none of the above, it might be a virtual project name (encoded CWD) - try { - return { path: resolveProjectPath(name), source: "virtual" }; - } catch { - throw new RangeError(`Project "${name}" not found in Claude, Copilot, or opencode paths`); - } + throw new RangeError(`Project "${name}" not found in Claude, Copilot, or opencode paths`); } } diff --git a/scripts/codex-trace.mjs b/scripts/codex-trace.mjs index adc3d31b..aea8e006 100644 --- a/scripts/codex-trace.mjs +++ b/scripts/codex-trace.mjs @@ -1,4 +1,4 @@ -#!/usr/bin/env node +#!/usr/bin/env bun import { homedir } from "node:os"; import { resolve } from "node:path"; import { writeCodexTraceFile } from "../src/codex/trace-parser"; diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index e9c7fea5..6ef2808c 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -124,14 +124,15 @@ function getFilePath(ctx: PolicyContext): string { const input = parseJsonLikeValue(ctx.toolInput); if (!input || typeof input !== "object") return ""; const record = input as Record; - return ( + const direct = ( (record.file_path as string) ?? (record.filePath as string) ?? (record.path as string) ?? (record.relative_path as string) ?? - (record.filename as string) ?? - "" + (record.filename as string) ); + if (typeof direct === "string" && direct.trim().length > 0) return direct; + return findNestedStringByKeys(record, ["file_path", "filePath", "path", "relative_path", "filename"]); } /** diff --git a/src/hooks/handler.ts b/src/hooks/handler.ts index c4c95dcb..8ec25d33 100644 --- a/src/hooks/handler.ts +++ b/src/hooks/handler.ts @@ -559,7 +559,12 @@ export async function handleHookEvent(eventType: string, cliOverride?: string): // Helper for safe integration retrieval const getInteg = (type: IntegrationType) => { - try { return getIntegration(type); } catch { return getIntegration("claude-code"); } + try { + return getIntegration(type); + } catch (e) { + hookLogWarn(`unknown integration "${type}", falling back to claude-code: ${e instanceof Error ? e.message : String(e)}`); + return getIntegration("claude-code"); + } }; const integ = getInteg(integrationType); diff --git a/src/hooks/integrations.ts b/src/hooks/integrations.ts index 800b0100..eeda15ec 100644 --- a/src/hooks/integrations.ts +++ b/src/hooks/integrations.ts @@ -1356,9 +1356,8 @@ export const FailproofAIPlugin = (ctx: any) => { syncSession(input.sessionID); }, "session.idle": async (input: any) => { - try { + try { callcli("session.idle", { session_id: input.sessionID }); - callcli("SessionEnd", { session_id: input.sessionID }); } catch {} }, }; @@ -1508,16 +1507,6 @@ export default function (pi: ExtensionAPI) { } }; - // DEBUG: Log available session values - const debugInfo = { - ctx_sessionId: ctx?.sessionId, - ctx_session_id: ctx?.session?.id, - pi_session_id: pi.session?.id, - pi_sessionId: pi.sessionId, - projectName, - }; - try { (pi as any).log?.(\`[FailproofAI Debug] Session ID sources: \${JSON.stringify(debugInfo)}\`); } catch {} - const sessionId = ctx?.sessionId || ctx?.session?.id || From be35518f1dac412899c75b9cdace75571a0f1cad Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Sun, 26 Apr 2026 06:20:07 +0000 Subject: [PATCH 44/47] chore: integrate require-no-conflicts-before-stop from main (#176) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-integrate upstream additions before merge to avoid conflict markers crashing the bun hook process during git merge/rebase: - Add requireNoConflictsBeforeStop function and BUILTIN_POLICIES entry - Update test counts (31→32 policies, 4→5 workflow policies) - Add params schema test for require-no-conflicts-before-stop - Add CHANGELOG entry and bump version to 0.0.6-beta.6 Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 4 + __tests__/hooks/builtin-policies.test.ts | 23 +++-- package.json | 2 +- src/hooks/builtin-policies.ts | 117 +++++++++++++++++++++++ 4 files changed, 138 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70a52d45..12c3e290 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ - Populate `permissionMode` in activity entries for all CLIs: Codex reads `approval_policy` from its session transcript, Cursor/Copilot/Gemini walk the `/proc` ancestor tree to parse mode flags (checking both `argv[0]` and `argv[1]` to support Node.js-wrapped binaries), all CLIs fall back to `"default"` when no explicit mode is detected - Add cloud platform client: `login`, `logout`, `whoami`, `relay start|stop|status`, and `sync` subcommands. Hook events are appended to a local queue and streamed to the failproofai cloud server via a background relay daemon that lazy-starts from the hook handler and survives reboots (#132) - Add native transcript/session support across non-Claude CLIs in dashboard parsing: OpenCode sessions now load from `~/.local/share/opencode/opencode.db`, Gemini/Pi native transcript discovery is expanded, and Gemini chat discovery now targets real chat files (`.jsonl`/`.json`) while ignoring tool-call sidecar artifacts. +- Add `require-no-conflicts-before-stop` builtin workflow policy that denies Stop until the current branch merges cleanly with the base branch. Runs a local `git merge-tree` probe (names the conflicted files) and an optional `gh pr view --json mergeable` probe that catches conflicts a stale local `origin/` would miss (#176) + +### Docs +- Add demo GIF to README (#178) ### Fixes - Fix Cursor/Gemini e2e tests to use isolated temp HOME (isoHome) per test, preventing real `~/.failproofai` mutation and parallel flakes diff --git a/__tests__/hooks/builtin-policies.test.ts b/__tests__/hooks/builtin-policies.test.ts index 701ed763..257c83a7 100644 --- a/__tests__/hooks/builtin-policies.test.ts +++ b/__tests__/hooks/builtin-policies.test.ts @@ -39,8 +39,8 @@ describe("hooks/builtin-policies", () => { }); describe("BUILTIN_POLICIES", () => { - it("has 31 built-in policies", () => { - expect(BUILTIN_POLICIES).toHaveLength(31); + it("has 32 built-in policies", () => { + expect(BUILTIN_POLICIES).toHaveLength(32); }); it("has 11 default-enabled policies", () => { @@ -2004,12 +2004,13 @@ describe("hooks/builtin-policies", () => { describe("workflow policy metadata", () => { const workflowPolicies = BUILTIN_POLICIES.filter((p) => p.category === "Workflow"); - it("all 4 workflow policies exist", () => { - expect(workflowPolicies).toHaveLength(4); + it("all 5 workflow policies exist", () => { + expect(workflowPolicies).toHaveLength(5); const names = workflowPolicies.map((p) => p.name).sort(); expect(names).toEqual([ "require-ci-green-before-stop", "require-commit-before-stop", + "require-no-conflicts-before-stop", "require-pr-before-stop", "require-push-before-stop", ]); @@ -2033,11 +2034,15 @@ describe("hooks/builtin-policies", () => { } }); - it("require-push-before-stop and require-pr-before-stop have params schemas", () => { + it("require-push-before-stop, require-pr-before-stop, and require-no-conflicts-before-stop have params schemas", () => { const withParams = workflowPolicies.filter((p) => p.params); - expect(withParams).toHaveLength(2); + expect(withParams).toHaveLength(3); const names = withParams.map((p) => p.name).sort(); - expect(names).toEqual(["require-pr-before-stop", "require-push-before-stop"]); + expect(names).toEqual([ + "require-no-conflicts-before-stop", + "require-pr-before-stop", + "require-push-before-stop", + ]); const pushPolicy = withParams.find((p) => p.name === "require-push-before-stop")!; expect(pushPolicy.params!.remote).toBeDefined(); @@ -2048,6 +2053,10 @@ describe("hooks/builtin-policies", () => { const prPolicy = withParams.find((p) => p.name === "require-pr-before-stop")!; expect(prPolicy.params!.baseBranch).toBeDefined(); expect(prPolicy.params!.baseBranch.default).toBe("main"); + + const conflictsPolicy = withParams.find((p) => p.name === "require-no-conflicts-before-stop")!; + expect(conflictsPolicy.params!.baseBranch).toBeDefined(); + expect(conflictsPolicy.params!.baseBranch.default).toBe("main"); }); }); diff --git a/package.json b/package.json index 65a15755..0136991a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "failproofai", - "version": "0.0.6-beta.5", + "version": "0.0.6-beta.6", "description": "The easiest way to manage policies that keep your AI agents reliable, on-task, and running autonomously — for Claude Code & the Agents SDK", "bin": { "failproofai": "./dist/cli.mjs" diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index 6ef2808c..70686989 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -1372,6 +1372,108 @@ function requirePrBeforeStop(ctx: PolicyContext): PolicyResult { } } +function requireNoConflictsBeforeStop(ctx: PolicyContext): PolicyResult { + const cwd = ctx.session?.cwd; + if (!cwd) return allow("No working directory available, skipping conflict check."); + + const branch = getCurrentBranch(cwd); + if (!branch || branch === "HEAD") return allow("Detached HEAD, skipping conflict check."); + + const baseBranch = (ctx.params?.baseBranch as string) ?? "main"; + if (branch === baseBranch) { + return allow(`On base branch "${baseBranch}", skipping conflict check.`); + } + + // -- Layer 1: local git merge-tree -- + let localSkipped = false; + try { + execFileSync("git", ["rev-parse", "--verify", `origin/${baseBranch}`], { + cwd, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"], timeout: 3000, + }); + + const ahead = execFileSync( + "git", ["log", `origin/${baseBranch}..HEAD`, "--oneline"], + { cwd, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"], timeout: 5000 }, + ).trim(); + + if (!ahead) { + // Nothing ahead of base — Layer 1 doesn't apply, fall through to Layer 2. + localSkipped = true; + } else { + execFileSync( + "git", + ["merge-tree", "--write-tree", "--name-only", `origin/${baseBranch}`, "HEAD"], + { cwd, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"], timeout: 10000 }, + ); + // exit 0 → clean merge, fall through to Layer 2 + } + } catch (err) { + const e = err as { status?: number; stdout?: string | Buffer }; + if (e.status === 1) { + // git merge-tree exit 1 = conflicts. stdout: \n\n\n\n + const out = (typeof e.stdout === "string" ? e.stdout : e.stdout?.toString("utf8") ?? "").trim(); + const lines = out.split("\n"); + const files: string[] = []; + for (let i = 1; i < lines.length; i++) { + const line = lines[i]; + if (line === "") break; + files.push(line); + } + const fileList = files.length ? files.join(", ") : "one or more files"; + return deny( + `Branch "${branch}" has merge conflicts with ${baseBranch} in: ${fileList}. ` + + `Rebase or merge origin/${baseBranch} now and resolve the conflicts.`, + ); + } + localSkipped = true; + } + + // -- Layer 2: GitHub PR mergeability -- + try { + execSync("gh --version", { cwd, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"], timeout: 3000 }); + } catch { + return allow( + localSkipped + ? "Local conflict check skipped and gh CLI not installed, skipping conflict check." + : `Branch "${branch}" merges cleanly with ${baseBranch} locally (gh CLI not installed, PR mergeability not verified).`, + ); + } + + let prJson: string; + try { + prJson = execSync("gh pr view --json mergeable,number,url", { + cwd, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"], timeout: 15000, + }).trim(); + } catch { + return allow( + localSkipped + ? "No pull request found for branch, skipping conflict check." + : `Branch "${branch}" merges cleanly with ${baseBranch} locally (no PR to verify against).`, + ); + } + + let pr: { mergeable: string; number: number; url: string }; + try { + pr = JSON.parse(prJson); + } catch { + return allow("Could not parse gh pr view output, skipping PR mergeability check."); + } + + if (pr.mergeable === "CONFLICTING") { + return deny( + `PR #${pr.number} has merge conflicts per GitHub (${pr.url}). ` + + `Rebase or merge origin/${baseBranch} now and resolve the conflicts.`, + ); + } + if (pr.mergeable === "UNKNOWN") { + return deny( + `GitHub is still computing mergeability for PR #${pr.number} (${pr.url}). ` + + `Wait ~10 seconds, then re-check with \`gh pr view --json mergeable\` before attempting to stop again.`, + ); + } + return allow(`PR #${pr.number} merges cleanly per GitHub.`); +} + /** Returns false only when we can confirm the transcript has no tool_use blocks; fails open otherwise. */ function sessionHadToolUse(ctx: PolicyContext): boolean { const transcriptPath = ctx.session?.transcriptPath; @@ -1805,6 +1907,21 @@ export const BUILTIN_POLICIES: BuiltinPolicyDefinition[] = [ }, } satisfies PolicyParamsSchema, }, + { + name: "require-no-conflicts-before-stop", + description: "Require the current branch to merge cleanly with the base branch before Claude stops", + fn: requireNoConflictsBeforeStop, + match: { events: ["Stop"] }, + defaultEnabled: false, + category: "Workflow", + params: { + baseBranch: { + type: "string", + description: "Base branch to check for conflicts against (default: main)", + default: "main", + }, + } satisfies PolicyParamsSchema, + }, { name: "require-ci-green-before-stop", description: "Require CI checks to pass on the current branch before Claude stops", From 6ac2fd13019aa8bd40054753db90bfd445b4c2b4 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Sun, 26 Apr 2026 06:31:06 +0000 Subject: [PATCH 45/47] chore: remove committed debug scripts and gitignore them debug.mjs and debug2.mjs were throwaway scripts with hardcoded local paths used during Gemini integration development. Not suitable for a production repo. Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 2 ++ debug.mjs | 15 --------------- debug2.mjs | 24 ------------------------ 3 files changed, 2 insertions(+), 39 deletions(-) delete mode 100644 debug.mjs delete mode 100644 debug2.mjs diff --git a/.gitignore b/.gitignore index 5c7c5f27..f009c694 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,5 @@ packages/*/assets/ # scratch / planning files HOOKS_MINDMAP.md +debug.mjs +debug2.mjs diff --git a/debug.mjs b/debug.mjs deleted file mode 100644 index 6615905f..00000000 --- a/debug.mjs +++ /dev/null @@ -1,15 +0,0 @@ -import { execSync } from "node:child_process"; -const BINARY_PATH = "dist/cli.mjs"; -const PROJECT_DIR = "__tests__/fixtures/gemini-project"; -try { - const output = execSync(`bun ${BINARY_PATH} --hook BeforeTool --cli gemini`, { - input: JSON.stringify({"session_id":"test-session-gemini-bash-001","cwd":"/home/yashu/fp/failproofai/__tests__/fixtures/gemini-project","hook_event_name":"BeforeTool","tool_name":"Shell","tool_input":"ls"}), - cwd: process.cwd(), - env: { ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" } - }); - console.log("STDOUT:", output.toString()); -} catch (e) { - console.log("ERROR:", e.message); - console.log("STDERR:", e.stderr?.toString()); - console.log("STDOUT:", e.stdout?.toString()); -} diff --git a/debug2.mjs b/debug2.mjs deleted file mode 100644 index 1268117e..00000000 --- a/debug2.mjs +++ /dev/null @@ -1,24 +0,0 @@ -import { execSync } from "node:child_process"; -import { mkdirSync, rmSync, existsSync } from "node:fs"; -const BINARY_PATH = "/home/yashu/fp/failproofai/dist/cli.mjs"; -const PROJECT_DIR = "/home/yashu/fp/failproofai/__tests__/fixtures/gemini-project-debug"; -if (existsSync(PROJECT_DIR)) rmSync(PROJECT_DIR, { recursive: true, force: true }); -mkdirSync(PROJECT_DIR, { recursive: true }); - -try { - execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli gemini --scope project`, { - cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: "/home/yashu/fp/failproofai" } - }); - - const output = execSync(`bun ${BINARY_PATH} --hook BeforeTool --cli gemini`, { - input: JSON.stringify({"session_id":"test-session-gemini-bash-001","cwd":PROJECT_DIR,"hook_event_name":"BeforeTool","tool_name":"Shell","tool_input":"ls"}), - cwd: PROJECT_DIR, - env: { ...process.env, FAILPROOFAI_DIST_PATH: "/home/yashu/fp/failproofai", FAILPROOFAI_SKIP_KILL: "true" } - }).toString(); - console.log("STDOUT:", output); -} catch (e) { - console.log("ERROR:", e.message); - console.log("STDERR:", e.stderr?.toString()); - console.log("STDOUT:", e.stdout?.toString()); -} From 370362e10d103b3523eeff5695a4a6e4622efbbc Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Sun, 26 Apr 2026 12:27:30 +0000 Subject: [PATCH 46/47] feat: add tool name canonicalization, per-CLI policy scoping, and Pi session fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Canonicalize tool names across CLIs (WriteFile→Write, run_terminal_command→Bash, etc.) so policies work cross-agent without per-CLI string matching - Export isBashTool helper from public API - Add per-CLI policy configuration UI with 3-state overrides (Inherit/ON/OFF) and per-CLI params - Add per-CLI --uninstall scoping and policyParams/customPoliciesPath overrides - Populate permissionMode in activity entries for all CLIs (Codex, Cursor, Copilot, Gemini) - Fix Pi SessionStart session ID: defer until real UUID is available from tool_call, avoiding fallback lock-in - Fix various test isolation issues and e2e flakes Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 3 + __tests__/e2e/helpers/payloads.ts | 18 +++ .../e2e/hooks/copilot-integration.e2e.test.ts | 2 +- .../e2e/hooks/gemini-integration.e2e.test.ts | 91 ++++++----- .../hooks/canonicalize-tool-name.test.ts | 43 ++++++ __tests__/hooks/handler.test.ts | 26 +++- __tests__/hooks/manager.test.ts | 31 +++- app/components/raw-log-viewer.tsx | 2 +- app/policies/hooks-client.tsx | 40 +++-- bin/failproofai.mjs | 7 + examples/policies-basic.js | 32 ++-- lib/log-entries.ts | 52 +++++-- lib/projects.ts | 73 +++++---- src/hooks/builtin-policies.ts | 29 +--- src/hooks/custom-hooks-loader.ts | 2 +- src/hooks/handler.ts | 49 +++++- src/hooks/integrations.ts | 145 ++++++++++++++---- src/hooks/loader-utils.ts | 21 ++- src/hooks/manager.ts | 95 +++++------- src/hooks/policy-helpers.ts | 14 ++ src/hooks/types.ts | 1 + src/index.ts | 2 +- 22 files changed, 533 insertions(+), 245 deletions(-) create mode 100644 __tests__/hooks/canonicalize-tool-name.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 12c3e290..4923aac4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Features - Add per-CLI policy configuration UI to dashboard: Switch between Global and specific CLI tabs (Claude Code, Cursor, etc.) to apply 3-state overrides (Inherit/ON/OFF) and per-CLI policy parameters. Fixes terminal TUI prompt hijacking when installing hooks from the web dashboard. +- Add tool name canonicalization: Multi-agent tool names (e.g., Gemini's `WriteFile` or Cursor's `run_terminal_command`) are now mapped to standard canonical names (`Write`, `Bash`, `Read`) before policies fire. This enables custom policies to work cross-CLI without agent-specific string matching logic. +- Export `isBashTool` helper: Custom policy authors can now use the same robust shell-detection logic as built-in policies via `import { isBashTool } from 'failproofai'`. - Add per-CLI policy scoping: `--uninstall --cli ` now disables only for that CLI (writes to `cli[X].disabledPolicies`), leaving all other CLIs unaffected. Per-CLI `policyParams` and `customPoliciesPath` overrides are also supported. `listHooks` shows per-CLI suppressions and CLI-only additions inline. - Populate `permissionMode` in activity entries for all CLIs: Codex reads `approval_policy` from its session transcript, Cursor/Copilot/Gemini walk the `/proc` ancestor tree to parse mode flags (checking both `argv[0]` and `argv[1]` to support Node.js-wrapped binaries), all CLIs fall back to `"default"` when no explicit mode is detected - Add cloud platform client: `login`, `logout`, `whoami`, `relay start|stop|status`, and `sync` subcommands. Hook events are appended to a local queue and streamed to the failproofai cloud server via a background relay daemon that lazy-starts from the hook handler and survives reboots (#132) @@ -23,6 +25,7 @@ - Fix `isOpencodeSessionMerged` to compare session CWD against virtual folder names (encoded CWD) instead of always returning true - Fix `resolveAnyProjectPath` unreachable `"virtual"` branch: now uses `existsSync` to distinguish real Claude project directories from activity-store-only virtual projects - Fix `session.idle` in OpenCode plugin to not double-emit `SessionEnd` (idle is not a session close) +- Fix Pi integration `SessionStart` session ID: defers the event until the first real session UUID is available (from tool_call or Pi's context), preventing the fallback `pi--` ID from being locked in before Pi assigns the real UUID - Remove debug logging of Pi session ID sources from `integrations.ts` - Fix `getFilePath` in builtin policies to use `findNestedStringByKeys` for nested payload lookup, matching `getCommand` behaviour - Fix silent fallback in `getInteg` handler to log a warning before falling back to `claude-code` diff --git a/__tests__/e2e/helpers/payloads.ts b/__tests__/e2e/helpers/payloads.ts index 489682d8..18fa3876 100644 --- a/__tests__/e2e/helpers/payloads.ts +++ b/__tests__/e2e/helpers/payloads.ts @@ -162,6 +162,24 @@ export const GeminiPayloads = { tool_input: { command }, }; }, + bashViaToolArgs(command: string, cwd: string): Record { + return { + session_id: SESSION_ID, + cwd, + hook_event_name: "BeforeTool", + toolName: "Shell", + toolArgs: JSON.stringify({ command, cwd }), + }; + }, + writeFile(filePath: string, cwd: string): Record { + return { + session_id: SESSION_ID, + cwd, + hook_event_name: "BeforeTool", + tool_name: "WriteFile", + tool_input: { file_path: filePath }, + }; + }, }, afterAgent(cwd: string): Record { return { diff --git a/__tests__/e2e/hooks/copilot-integration.e2e.test.ts b/__tests__/e2e/hooks/copilot-integration.e2e.test.ts index a47eeafd..a28745a0 100644 --- a/__tests__/e2e/hooks/copilot-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/copilot-integration.e2e.test.ts @@ -182,7 +182,7 @@ describe("E2E: Copilot Integration", () => { eventType: "PreToolUse", integration: "copilot", sessionId: COPILOT_SESSION_ID, - toolName: "bash", + toolName: "Bash", transcriptPath: join(HOME_DIR, ".copilot", "session-state", COPILOT_SESSION_ID, "events.jsonl"), }), ]), diff --git a/__tests__/e2e/hooks/gemini-integration.e2e.test.ts b/__tests__/e2e/hooks/gemini-integration.e2e.test.ts index 0905eb10..bb030b31 100644 --- a/__tests__/e2e/hooks/gemini-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/gemini-integration.e2e.test.ts @@ -27,6 +27,15 @@ describe("E2E: Gemini Integration", () => { const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), HOME: isoHome }); + const runHook = (eventName: string, payload: Record) => { + return spawnSync("bun", [BINARY_PATH, "--hook", eventName, "--cli", "gemini"], { + input: JSON.stringify(payload), + cwd: PROJECT_DIR, + env: { ...baseEnv(), FAILPROOFAI_LOG_LEVEL: "info", FAILPROOFAI_SKIP_KILL: "true" }, + encoding: "utf8", + }); + }; + it("denies sudo via Gemini BeforeTool hook with deny decision", () => { // 1. Install block-sudo execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli gemini --scope project`, { @@ -37,18 +46,11 @@ describe("E2E: Gemini Integration", () => { // 2. Trigger the hook const payload = GeminiPayloads.beforeTool.bash("sudo rm -rf /", PROJECT_DIR); - // We pass --cli gemini to ensure it doesn't fallback to claude-code - const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "BeforeTool", "--cli", "gemini"], { - input: JSON.stringify(payload), - cwd: PROJECT_DIR, - env: { ...baseEnv(), FAILPROOFAI_LOG_LEVEL: "info", FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8", - }); + const { status, stdout, stderr } = runHook("BeforeTool", payload); console.log("Gemini STDOUT:", stdout); console.log("Gemini STDERR:", stderr); // Gemini expects Exit 0 for a protocol-compliant JSON denial. - // If we exit with 2, it may "fail open" and proceed with the action. expect(status).toBe(0); const parsed = JSON.parse(stdout.trim()); expect(parsed.decision).toBe("deny"); @@ -66,13 +68,9 @@ describe("E2E: Gemini Integration", () => { }); const payload = GeminiPayloads.beforeTool.bash("ls", PROJECT_DIR); - const output = execSync(`bun ${BINARY_PATH} --hook BeforeTool --cli gemini`, { - input: JSON.stringify(payload), - cwd: PROJECT_DIR, - env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, - }).toString(); + const { stdout } = runHook("BeforeTool", payload); - expect(JSON.parse(output.trim())).toEqual({ decision: "allow" }); + expect(JSON.parse(stdout.trim())).toEqual({ decision: "allow" }); }); it("denies sudo from stringified Gemini toolArgs payloads", () => { @@ -81,20 +79,9 @@ describe("E2E: Gemini Integration", () => { env: baseEnv(), }); - const payload = { - session_id: "test-session-gemini-json-001", - cwd: PROJECT_DIR, - hook_event_name: "BeforeTool", - toolName: "Shell", - toolArgs: "{\"command\":\"sudo apt-get update\",\"cwd\":\"" + PROJECT_DIR.replace(/\\/g, "\\\\") + "\"}", - }; + const payload = GeminiPayloads.beforeTool.bashViaToolArgs("sudo apt-get update", PROJECT_DIR); - const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "BeforeTool", "--cli", "gemini"], { - input: JSON.stringify(payload), - cwd: PROJECT_DIR, - env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8", - }); + const { status, stdout, stderr } = runHook("BeforeTool", payload); expect(status).toBe(0); const parsed = JSON.parse(stdout.trim()); @@ -112,20 +99,9 @@ describe("E2E: Gemini Integration", () => { env: baseEnv(), }); - const payload = { - session_id: "test-session-e2e-001", - cwd: PROJECT_DIR, - hook_event_name: "BeforeTool", - tool_name: "Shell", - tool_input: "env", - }; + const payload = GeminiPayloads.beforeTool.bash("env", PROJECT_DIR); - const { status, stdout, stderr } = spawnSync("bun", [BINARY_PATH, "--hook", "BeforeTool", "--cli", "gemini"], { - input: JSON.stringify(payload), - cwd: PROJECT_DIR, - env: { ...baseEnv(), FAILPROOFAI_SKIP_KILL: "true" }, - encoding: "utf8", - }); + const { status, stdout, stderr } = runHook("BeforeTool", payload); expect(status).toBe(0); const parsed = JSON.parse(stdout.trim()); @@ -136,4 +112,39 @@ describe("E2E: Gemini Integration", () => { expect(stderr).toContain("MANDATORY ACTION REQUIRED"); expect(stderr).toContain("environment variables"); }); + + it("denies production writes on Gemini WriteFile tool via canonicalization", () => { + // 1. Create a custom policy file + const policyPath = resolve(PROJECT_DIR, "prod-policy.js"); + writeFileSync(policyPath, ` + import { customPolicies, allow, deny, isBashTool } from "failproofai"; + customPolicies.add({ + name: "block-production-writes", + match: { events: ["PreToolUse"] }, + fn: async (ctx) => { + if (ctx.toolName === "Write") { + if (ctx.toolInput?.file_path?.includes("production")) { + return deny("Production write blocked"); + } + } + return allow(); + } + }); + `); + + // 2. Install with custom path + execSync(`bun ${BINARY_PATH} policies --install --custom ${policyPath} --cli gemini --scope project`, { + cwd: PROJECT_DIR, + env: baseEnv(), + }); + + // 3. Trigger with WriteFile (should be canonicalized to Write) + const payload = GeminiPayloads.beforeTool.writeFile("/etc/production.conf", PROJECT_DIR); + const { status, stdout } = runHook("BeforeTool", payload); + + expect(status).toBe(0); + const parsed = JSON.parse(stdout.trim()); + expect(parsed.decision).toBe("deny"); + expect(parsed.reason).toContain("Production write blocked"); + }); }); diff --git a/__tests__/hooks/canonicalize-tool-name.test.ts b/__tests__/hooks/canonicalize-tool-name.test.ts new file mode 100644 index 00000000..f00d0fe9 --- /dev/null +++ b/__tests__/hooks/canonicalize-tool-name.test.ts @@ -0,0 +1,43 @@ +import { describe, it, expect } from "vitest"; +import { canonicalizeToolName } from "../../src/hooks/integrations"; + +describe("canonicalizeToolName", () => { + it("normalizes file write tools to 'Write'", () => { + expect(canonicalizeToolName("WriteFile")).toBe("Write"); + expect(canonicalizeToolName("write_file")).toBe("Write"); + expect(canonicalizeToolName("save_file")).toBe("Write"); + expect(canonicalizeToolName("createfile")).toBe("Write"); + }); + + it("normalizes file read tools to 'Read'", () => { + expect(canonicalizeToolName("ReadFile")).toBe("Read"); + expect(canonicalizeToolName("read_file")).toBe("Read"); + expect(canonicalizeToolName("get_file_content")).toBe("Read"); + }); + + it("normalizes shell tools to 'Bash'", () => { + expect(canonicalizeToolName("Shell")).toBe("Bash"); + expect(canonicalizeToolName("terminal")).toBe("Bash"); + expect(canonicalizeToolName("console")).toBe("Bash"); + expect(canonicalizeToolName("sh")).toBe("Bash"); + expect(canonicalizeToolName("bash_login_shell")).toBe("Bash"); + expect(canonicalizeToolName("run_terminal_command")).toBe("Bash"); + expect(canonicalizeToolName("run_shell_command")).toBe("Bash"); + expect(canonicalizeToolName("execute_command")).toBe("Bash"); + }); + + it("passes through already canonical names", () => { + expect(canonicalizeToolName("Write")).toBe("Write"); + expect(canonicalizeToolName("Read")).toBe("Read"); + expect(canonicalizeToolName("Bash")).toBe("Bash"); + }); + + it("passes through unknown tool names", () => { + expect(canonicalizeToolName("Glob")).toBe("Glob"); + expect(canonicalizeToolName("Search")).toBe("Search"); + }); + + it("handles undefined/null", () => { + expect(canonicalizeToolName(undefined)).toBeUndefined(); + }); +}); diff --git a/__tests__/hooks/handler.test.ts b/__tests__/hooks/handler.test.ts index 364a813d..593103ed 100644 --- a/__tests__/hooks/handler.test.ts +++ b/__tests__/hooks/handler.test.ts @@ -200,7 +200,7 @@ describe("hooks/handler", () => { expect(evaluatePolicies).toHaveBeenCalledWith( "PreToolUse", expect.objectContaining({ - tool_name: "Shell", + tool_name: "Bash", tool_input: { command: "sudo apt-get update", cwd: "/repo/subdir" }, cwd: "/repo/subdir", }), @@ -672,7 +672,7 @@ describe("hooks/handler", () => { "PreToolUse", expect.objectContaining({ session_id: "cop-toolargs-1", - tool_name: "bash", + tool_name: "Bash", tool_input: { command: "sudo ls", cwd: "/repo/copilot-app/subdir" }, }), expect.objectContaining({ @@ -1068,11 +1068,31 @@ describe("hooks/handler", () => { it("skips Stop and other non-conversation events", () => { writeVirtualLogEntry(logPath, "Stop", {}); - writeVirtualLogEntry(logPath, "SessionStart", {}); expect(fs.existsSync(logPath)).toBe(false); }); + it("logs SessionStart as a system message", () => { + writeVirtualLogEntry(logPath, "SessionStart", {}); + + expect(fs.existsSync(logPath)).toBe(true); + const entry = JSON.parse(fs.readFileSync(logPath, "utf-8")); + expect(entry.type).toBe("system"); + expect(entry.message.content).toBe("Session started"); + }); + + it("logs AssistantResponse as an assistant message", () => { + writeVirtualLogEntry(logPath, "AssistantResponse", { + assistant_response: "Hello user!", + }); + + expect(fs.existsSync(logPath)).toBe(true); + const entry = JSON.parse(fs.readFileSync(logPath, "utf-8")); + expect(entry.type).toBe("assistant"); + expect(entry.message.role).toBe("assistant"); + expect(entry.message.content[0].text).toBe("Hello user!"); + }); + it("skips PostToolUse with no matching PreToolUse", () => { writeVirtualLogEntry(logPath, "PostToolUse", { tool_name: "Bash", diff --git a/__tests__/hooks/manager.test.ts b/__tests__/hooks/manager.test.ts index 31107a43..4f02c479 100644 --- a/__tests__/hooks/manager.test.ts +++ b/__tests__/hooks/manager.test.ts @@ -82,7 +82,7 @@ describe("hooks/manager", () => { }); describe("installHooks", () => { - it("installs hooks for all 26 event types into empty settings", async () => { + it("installs hooks for all 27 event types into empty settings", async () => { vi.mocked(existsSync).mockReturnValue(true); vi.mocked(readFileSync).mockReturnValue("{}"); @@ -94,7 +94,7 @@ describe("hooks/manager", () => { expect(path).toBe(USER_SETTINGS_PATH); const written = JSON.parse(content as string); - expect(Object.keys(written.hooks)).toHaveLength(26); + expect(Object.keys(written.hooks)).toHaveLength(27); for (const [eventType, matchers] of Object.entries(written.hooks)) { expect(matchers).toHaveLength(1); @@ -227,7 +227,13 @@ describe("hooks/manager", () => { ], }, }; - vi.mocked(readFileSync).mockReturnValue(JSON.stringify(existingSettings)); + vi.mocked(readFileSync).mockImplementation((p) => { + if (p === USER_SETTINGS_PATH) return JSON.stringify(existingSettings); + return "{}"; + }); + vi.mocked(existsSync).mockImplementation((p) => { + return p === USER_SETTINGS_PATH || String(p).includes("failproofai.mjs"); + }); const { installHooks } = await import("../../src/hooks/manager"); await installHooks(); @@ -250,7 +256,7 @@ describe("hooks/manager", () => { expect(writeFileSync).toHaveBeenCalledOnce(); const [, content] = vi.mocked(writeFileSync).mock.calls[0]; const written = JSON.parse(content as string); - expect(Object.keys(written.hooks)).toHaveLength(26); + expect(Object.keys(written.hooks)).toHaveLength(27); }); it("resolves binary from FAILPROOFAI_DIST_PATH", async () => { @@ -350,7 +356,13 @@ describe("hooks/manager", () => { ], }, }; - vi.mocked(readFileSync).mockReturnValue(JSON.stringify(existingSettings)); + vi.mocked(readFileSync).mockImplementation((p) => { + if (p === PROJECT_SETTINGS_PATH) return JSON.stringify(existingSettings); + return "{}"; + }); + vi.mocked(existsSync).mockImplementation((p) => { + return p === PROJECT_SETTINGS_PATH || String(p).includes("failproofai.mjs"); + }); const { installHooks } = await import("../../src/hooks/manager"); await installHooks(["all"], "project"); @@ -439,7 +451,7 @@ describe("hooks/manager", () => { expect(path).toBe(USER_SETTINGS_PATH); }); - it("warns when hooks exist in another scope", async () => { + it("skips installation when hooks exist in another scope to avoid double execution", async () => { // Mock: project scope has existing hooks, installing to user scope vi.mocked(existsSync).mockImplementation((p) => { return p === PROJECT_SETTINGS_PATH || p === USER_SETTINGS_PATH; @@ -460,8 +472,13 @@ describe("hooks/manager", () => { await installHooks(["all"], "user"); expect(console.log).toHaveBeenCalledWith( - expect.stringContaining("Warning: Failproof AI hooks are also installed"), + expect.stringContaining("Notice: Failproof AI hooks are already active"), ); + expect(console.log).toHaveBeenCalledWith( + expect.stringContaining("Skipping installation in user scope"), + ); + // writeHookEntries should NOT have been called for user scope + expect(writeFileSync).not.toHaveBeenCalled(); }); it("fires hooks_installed telemetry with correct properties", async () => { diff --git a/app/components/raw-log-viewer.tsx b/app/components/raw-log-viewer.tsx index a1c5e0c6..3db18c0d 100644 --- a/app/components/raw-log-viewer.tsx +++ b/app/components/raw-log-viewer.tsx @@ -350,7 +350,7 @@ function VirtualizedEntryList({ entries, entriesBySource, projectName, sessionId const isTarget = !!highlightedUuid && entry.uuid === (parentUuidForSubagent ?? highlightedUuid); return (
- - - - - - - + {INTEGRATION_TYPES.map((id) => ( + + ))}
- + { + if (!integrations) return; + const allIds = integrations.map(i => i.id); + if (selected.size === allIds.length) { + setSelected(new Set()); + } else { + setSelected(new Set(allIds)); + } + }; + return (
-
+
+ Integrations + +
+
{integrations === null ? (

Loading…

) : ( @@ -1105,7 +1123,9 @@ function PoliciesTab({ onHooksInstallChange }: { onHooksInstallChange?: (install const handleInstall = () => { setIntegrationsList(null); setShowIntegrationModal(true); - getIntegrationsStatusAction().then(setIntegrationsList); + getIntegrationsStatusAction() + .then(setIntegrationsList) + .catch((e) => setActionError(e instanceof Error ? e.message : "Failed to load integrations status.")); }; const handleInstallWithIntegrations = (integrations: string[]) => { diff --git a/bin/failproofai.mjs b/bin/failproofai.mjs index 637b1f41..f39ed127 100755 --- a/bin/failproofai.mjs +++ b/bin/failproofai.mjs @@ -365,6 +365,13 @@ EXAMPLES const consumedIdxs = new Set(); if (scopeIdx >= 0) consumedIdxs.add(scopeIdx + 1); + const customIdx = subArgs.includes("--custom") ? subArgs.indexOf("--custom") + : subArgs.includes("-c") ? subArgs.indexOf("-c") + : -1; + if (customIdx >= 0 && subArgs[customIdx + 1] && !subArgs[customIdx + 1].startsWith("-")) { + consumedIdxs.add(customIdx + 1); + } + const cliIdx = subArgs.indexOf("--cli"); if (cliIdx >= 0) { for (let i = cliIdx + 1; i < subArgs.length && !subArgs[i].startsWith("-"); i++) { diff --git a/examples/policies-basic.js b/examples/policies-basic.js index 05d978aa..cec2d395 100644 --- a/examples/policies-basic.js +++ b/examples/policies-basic.js @@ -11,19 +11,33 @@ * - Run `curl ... | bash` → deny * - Anything else → allow */ -import { customPolicies, allow, deny, instruct } from "failproofai"; +import { customPolicies, allow, deny, instruct, isBashTool } from "failproofai"; // 1. Block writes to production config files customPolicies.add({ name: "block-production-writes", - description: "Prevent writes to files with 'production' or 'prod.' in their path", + description: "Prevent writes to files with 'production' or 'prod.' in their path or command", match: { events: ["PreToolUse"] }, fn: async (ctx) => { - if (ctx.toolName !== "Write") return allow(); - const path = String(ctx.toolInput?.file_path ?? ""); - if (/production|prod\./i.test(path)) { - return deny(`Writing to production config is blocked: ${path}`); + const PROD_RE = /production|prod\./i; + + // Write tool (Claude Code, Gemini WriteFile→Write, etc.) + if (ctx.toolName === "Write") { + const path = String(ctx.toolInput?.file_path ?? ""); + if (PROD_RE.test(path)) { + return deny(`Writing to production file is blocked: ${path}`); + } + return allow(); + } + + // Bash-based file writes: echo >> file, tee, cat >, cp to production paths + if (isBashTool(ctx.toolName)) { + const cmd = String(ctx.toolInput?.command ?? ""); + if (PROD_RE.test(cmd)) { + return deny(`Shell command targeting a production path is blocked: ${cmd}`); + } } + return allow(); }, }); @@ -34,7 +48,7 @@ customPolicies.add({ description: "Block git push --force with a team-specific message", match: { events: ["PreToolUse"] }, fn: async (ctx) => { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = String(ctx.toolInput?.command ?? ""); if (/git\s+push\b.*\s(-f|--force)\b/.test(cmd)) { return deny("Force-push is prohibited — open a PR and request a branch reset instead"); @@ -49,7 +63,7 @@ customPolicies.add({ description: "Remind Claude to verify lockfile consistency before npm install", match: { events: ["PreToolUse"] }, fn: async (ctx) => { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = String(ctx.toolInput?.command ?? ""); if (/\bnpm\s+install\b/.test(cmd) && !/\bnpm\s+install\s+\S/.test(cmd)) { return instruct( @@ -67,7 +81,7 @@ customPolicies.add({ description: "Block curl|sh and wget|bash remote code execution patterns", match: { events: ["PreToolUse"] }, fn: async (ctx) => { - if (ctx.toolName !== "Bash") return allow(); + if (!isBashTool(ctx.toolName)) return allow(); const cmd = String(ctx.toolInput?.command ?? ""); if (/\bcurl\b.*\|\s*(ba)?sh\b/.test(cmd) || /\bwget\b.*\|\s*(ba)?sh\b/.test(cmd)) { return deny("Piping remote content into a shell is blocked — download the script first and inspect it"); diff --git a/lib/log-entries.ts b/lib/log-entries.ts index 5f6ef18a..be155f1a 100644 --- a/lib/log-entries.ts +++ b/lib/log-entries.ts @@ -176,7 +176,7 @@ function mapActivityEntryToLogEntry(e: any): LogEntry { const baseDetails = { _source: source, - uuid: (e.sessionId || "") + (e.timestamp || ""), + uuid: `${e.sessionId || ""}-${e.timestamp || ""}-${e.eventType || ""}-${e.toolName || ""}`, parentUuid: null, timestamp, timestampMs: e.timestamp, @@ -186,12 +186,8 @@ function mapActivityEntryToLogEntry(e: any): LogEntry { const lowEvent = (e.eventType || "").toLowerCase(); // 1. User Prompts (Claude, Cursor, Gemini, Copilot, Codex, OpenCode, Pi) - const isUserEvent = - lowEvent.includes("prompt") || - lowEvent.includes("submit") || - lowEvent.includes("message") || - lowEvent.includes("chat") || - lowEvent.includes("input"); + const userEvents = ["userpromptsubmit", "userchat", "userinput", "message_sent", "chat_message", "promptsuggestionselected"]; + const isUserEvent = userEvents.includes(lowEvent); if (isUserEvent) { const ti = e.toolInput as Record | string | undefined; @@ -446,7 +442,9 @@ function mergeMirroredAndActivityEntries( const nonDuplicateActivityEntries = activityEntries.filter((entry) => { const sig = getEntrySignature(entry); const bucket = Math.floor(entry.timestampMs / BUCKET_MS); - return !seen.has(`${sig}|${bucket}`); + if (seen.has(`${sig}|${bucket}`)) return false; + markSeen(entry); // Mark it so subsequent activity entries with same sig are dropped + return true; }); const allEntries = [...mirroredEntries, ...nonDuplicateActivityEntries]; @@ -847,6 +845,7 @@ function parseCopilotEventsFile(content: string, source: LogSource): ParseFileRe } const entries: LogEntry[] = []; + const seenPrompts = new Set(); // Added to deduplicate user.message vs hook.start for (const obj of allObjects) { const type = obj.type as string; @@ -865,7 +864,12 @@ function parseCopilotEventsFile(content: string, source: LogSource): ParseFileRe const data = obj.data as Record | undefined; const text = stringifyStructured(data?.content ?? data?.message ?? data?.text ?? "") ?? ""; if (text) { - entries.push({ type: "user", ...base, message: { role: "user", content: text } } as UserEntry); + // Use bucketed timestamp to catch simultaneous events + const promptKey = `${Math.floor(date.getTime() / 2000)}:${text}`; + if (!seenPrompts.has(promptKey)) { + entries.push({ type: "user", ...base, message: { role: "user", content: text } } as UserEntry); + seenPrompts.add(promptKey); + } } continue; } @@ -876,7 +880,11 @@ function parseCopilotEventsFile(content: string, source: LogSource): ParseFileRe const input = data.input as Record | undefined; const text = stringifyStructured(input?.prompt ?? input?.content ?? "") ?? ""; if (text) { - entries.push({ type: "user", ...base, message: { role: "user", content: text } } as UserEntry); + const promptKey = `${Math.floor(date.getTime() / 2000)}:${text}`; + if (!seenPrompts.has(promptKey)) { + entries.push({ type: "user", ...base, message: { role: "user", content: text } } as UserEntry); + seenPrompts.add(promptKey); + } } } continue; @@ -1723,7 +1731,10 @@ export async function parseSessionLog( // Try mirrored JSONL (has proper pre/post tool pairing via writeVirtualLogEntry sidecar) let mirroredEntries: LogEntry[] = []; let usedMirrored = false; - const mirroredPath = join(resolveProjectPath(sid), `${sessionId}.jsonl`); + const firstActivity = matching[0]; + const { encodeCwd } = await import("./paths"); + const encodedCwd = firstActivity.cwd ? encodeCwd(firstActivity.cwd) : sid; + const mirroredPath = join(getClaudeProjectsPath(), encodedCwd, `${sessionId}.jsonl`); try { const mirroredContent = await readFile(mirroredPath, "utf-8"); const parsed = await parseFileContent(mirroredContent, "session"); @@ -1759,7 +1770,7 @@ export async function parseSessionLog( const cwd = decodeFolderName(projectName); void cwd; // used implicitly through tryKnownNativeTranscriptPaths above const allActivity = getAllHookActivityEntries(); - const VIRTUAL_INTEGRATIONS = INTEGRATION_TYPES as unknown as string[]; + const VIRTUAL_INTEGRATIONS = INTEGRATION_TYPES.filter((t) => t !== "claude-code") as unknown as string[]; const matchingEntries = allActivity.filter( (entry) => entry.sessionId === sessionId && @@ -1776,7 +1787,10 @@ export async function parseSessionLog( // Try mirrored JSONL (has proper pre/post tool pairing via writeVirtualLogEntry sidecar) let mirroredEntries: LogEntry[] = []; let usedMirrored = false; - const mirroredPath = join(resolveProjectPath(projectName), `${sessionId}.jsonl`); + const firstActivity = matchingEntries[0]; + const { encodeCwd } = await import("./paths"); + const encodedCwd = firstActivity.cwd ? encodeCwd(firstActivity.cwd) : projectName; + const mirroredPath = join(getClaudeProjectsPath(), encodedCwd, `${sessionId}.jsonl`); try { const mirroredContent = await readFile(mirroredPath, "utf-8"); const parsed = await parseFileContent(mirroredContent, "session"); @@ -1865,7 +1879,17 @@ export async function parseSessionLog( // Sort combined entries by timestamp allEntries.sort((a, b) => a.timestampMs - b.timestampMs); - return { entries: allEntries, rawLines: allRawLines, subagentIds, sourceMode: "native", sourceDetail: sourcePathUsed }; + // Final deduplication by UUID to prevent React key collisions + const uniqueEntries: LogEntry[] = []; + const seenUuids = new Set(); + for (const entry of allEntries) { + if (!entry.uuid || !seenUuids.has(entry.uuid)) { + uniqueEntries.push(entry); + if (entry.uuid) seenUuids.add(entry.uuid); + } + } + + return { entries: uniqueEntries, rawLines: allRawLines, subagentIds, sourceMode: "native", sourceDetail: sourcePathUsed }; } export const getCachedSessionLog = runtimeCache( diff --git a/lib/projects.ts b/lib/projects.ts index 294989f8..2b3dc655 100644 --- a/lib/projects.ts +++ b/lib/projects.ts @@ -14,7 +14,7 @@ import { runtimeCache } from "./runtime-cache"; import { batchAll } from "./concurrency"; import { logWarn, logError } from "./logger"; import { formatDate } from "./utils"; -import { IntegrationType } from "@/src/hooks/types"; +import { type IntegrationType, INTEGRATION_TYPES } from "@/src/hooks/types"; export const UUID_RE = /^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$/; export const PATH_TRAVERSAL_RE = /(^|[\\/])\.\.($|[\\/])/; @@ -100,29 +100,31 @@ async function readOpencodeDbEntries(): Promise { const bunSqliteSpecifier = `bun${":sqlite"}`; const mod = await import(bunSqliteSpecifier as unknown as string) as any; const db = new mod.Database(OPENCODE_DB_PATH, { readonly: true }); - const rows: Array<{ id: string; directory: string; time_created: number }> = db.query( - "SELECT id, directory, time_created FROM session ORDER BY time_created DESC" - ).all(); - db.close(); - return rows.map((row) => { - const lastModified = new Date(row.time_created); - return { - name: row.id, - path: `${OPENCODE_DB_SESSION_PREFIX}${row.id}`, - isDirectory: false, - lastModified, - lastModifiedFormatted: formatDate(lastModified), - source: "opencode" as const, - sources: ["opencode" as const], - cwd: row.directory, - } as ProjectFolder; - }); + try { + const rows: Array<{ id: string; directory: string; time_created: number }> = db.query( + "SELECT id, directory, time_created FROM session ORDER BY time_created DESC" + ).all(); + return rows.map((row) => { + const lastModified = new Date(row.time_created); + return { + name: row.id, + path: `${OPENCODE_DB_SESSION_PREFIX}${row.id}`, + isDirectory: false, + lastModified, + lastModifiedFormatted: formatDate(lastModified), + source: "opencode" as const, + sources: ["opencode" as const], + cwd: row.directory, + } as ProjectFolder; + }); + } finally { + db.close(); + } } catch { return []; } } -import { INTEGRATION_TYPES } from "@/src/hooks/types"; const VIRTUAL_INTEGRATIONS = INTEGRATION_TYPES as unknown as string[]; async function getVirtualProjectsFromActivityStore(): Promise { @@ -165,20 +167,23 @@ async function getOpencodeDbSessionsForCwd(cwd: string): Promise const bunSqliteSpecifier = `bun${":sqlite"}`; const mod = await import(bunSqliteSpecifier as unknown as string) as any; const db = new mod.Database(OPENCODE_DB_PATH, { readonly: true }); - const rows: Array<{ id: string; time_created: number }> = db.query( - "SELECT id, time_created FROM session WHERE directory = ? ORDER BY time_created DESC" - ).all(cwd); - db.close(); - return rows.map((row) => { - const lastModified = new Date(row.time_created); - return { - name: row.id, - path: `${OPENCODE_DB_SESSION_PREFIX}${row.id}`, - lastModified, - lastModifiedFormatted: formatDate(lastModified), - sessionId: row.id, - } as SessionFile; - }); + try { + const rows: Array<{ id: string; time_created: number }> = db.query( + "SELECT id, time_created FROM session WHERE directory = ? ORDER BY time_created DESC" + ).all(cwd); + return rows.map((row) => { + const lastModified = new Date(row.time_created); + return { + name: row.id, + path: `${OPENCODE_DB_SESSION_PREFIX}${row.id}`, + lastModified, + lastModifiedFormatted: formatDate(lastModified), + sessionId: row.id, + } as SessionFile; + }); + } finally { + db.close(); + } } catch { return []; } @@ -228,7 +233,7 @@ export async function getProjectFolders(): Promise { } else { projectMap.set(folder.name, { ...folder, - sources: folder.source ? [folder.source] : [], + sources: folder.sources ? [...folder.sources] : (folder.source ? [folder.source] : []), }); } } diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index 70686989..29bc4ba4 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -7,7 +7,7 @@ import { readFileSync } from "node:fs"; import { execSync, execFileSync } from "node:child_process"; import { homedir } from "node:os"; import type { BuiltinPolicyDefinition, PolicyContext, PolicyResult, PolicyParamsSchema } from "./policy-types"; -import { allow, deny, instruct } from "./policy-helpers"; +import { allow, deny, instruct, isBashTool } from "./policy-helpers"; import { registerPolicy } from "./policy-registry"; import { hookLogWarn } from "./hook-logger"; @@ -20,19 +20,6 @@ function isClaudeSettingsFile(resolved: string): boolean { return /[\\/]\.claude[\\/]settings(?:\.[^/\\]+)?\.json$/.test(resolved); } -function isBashTool(toolName: string | undefined): boolean { - if (!toolName) return true; // Assume shell if tool name is missing - const lower = toolName.toLowerCase(); - return ( - lower === "bash" || - lower === "shell" || - lower === "terminal" || - lower === "console" || - lower.includes("command") || - lower === "run_terminal_command" || - lower === "sh" - ); -} const SHELL_TOOL_NAMES = [ "Bash", @@ -203,7 +190,7 @@ const CMD_ECHO_ENV_RE = /echo\s+%[A-Za-z_]/i; // blockEnvFiles const ENV_FILE_PATH_RE = /(?:^|[\\/])(?:\.env(?!\w)|env_\w*)/i; -const ENV_CMD_RE = /(?:\.env(?!\w)|env_\w*)/i; +const ENV_CMD_RE = /(?:^|[\s/\\\\])\.env(?!rc\b)[a-z0-9._-]*\b/i; // blockSudo const SUDO_RE = /(?:^|;|&&|\|\|)\s*sudo\s/; @@ -211,6 +198,7 @@ const PS_ELEVATION_RE = /Start-Process\s+.*-Verb\s+RunAs/i; const RUNAS_RE = /(?:^|;|&&|\|\|)\s*runas\s/i; // blockCurlPipeSh +const REMOTE_DOWNLOAD_RE = /\b(curl|wget)\b.*?\s-(O|o|L|f|S|s|J|g|-output|-remote-name)\b/i; const CURL_PIPE_SH_RE = /(?:curl|wget)\s.*\|\s*(?:sh|bash|zsh|dash|ksh|csh|tcsh|fish|ash)\b/; const REMOTE_SCRIPT_DOWNLOAD_RE = /(?:^|;|&&|\|\|)\s*(?:curl|wget)\b[^\n]*https?:\/\/[^\s"'`]+\.sh(?:[?#][^\s"'`]*)?(?:\s|$)/i; const PS_WEB_PIPE_RE = /(?:Invoke-WebRequest|iwr|Invoke-RestMethod|irm)\s+.*\|\s*(?:Invoke-Expression|iex)/i; @@ -494,8 +482,8 @@ function getToolOutputText(ctx: PolicyContext): string { } function looksLikeFailproofStopMessage(text: string): boolean { - return text.includes("MANDATORY ACTION REQUIRED from FailproofAI (policy:") - || text.includes("[FailproofAI Security Stop] Policy:"); + return /\[failproofai security stop\] policy:/i.test(text) + || /\b(stop|denied):/i.test(text); } function safeStringify(value: unknown): string { @@ -1485,10 +1473,9 @@ function sessionHadToolUse(ctx: PolicyContext): boolean { try { const entry = JSON.parse(line) as Record; const content = (entry.message as Record | undefined)?.content; - if (Array.isArray(content)) { - for (const block of content as Record[]) { - if (block.type === "tool_use") return true; - } + const blocks = Array.isArray(content) ? content : (typeof entry.content === "object" ? [entry.content] : []); + for (const block of blocks as Record[]) { + if (block?.type === "tool_use" || block?.tool_use) return true; } } catch { /* skip malformed lines */ } } diff --git a/src/hooks/custom-hooks-loader.ts b/src/hooks/custom-hooks-loader.ts index 8d14df1d..859f4632 100644 --- a/src/hooks/custom-hooks-loader.ts +++ b/src/hooks/custom-hooks-loader.ts @@ -57,7 +57,7 @@ async function loadSingleFile(absPath: string, opts?: { strict?: boolean }): Pro tmpFiles = await rewriteFileTree(absPath, distUrl, distIndex); const entryTmp = absPath + TMP_SUFFIX; - const fileUrl = pathToFileURL(entryTmp).href; + const fileUrl = pathToFileURL(entryTmp).href + `?t=${Date.now()}`; await import(/* webpackIgnore: true */ fileUrl); } catch (err) { const msg = err instanceof Error ? err.message : String(err); diff --git a/src/hooks/handler.ts b/src/hooks/handler.ts index 8ec25d33..f920e5ed 100644 --- a/src/hooks/handler.ts +++ b/src/hooks/handler.ts @@ -19,7 +19,12 @@ import { trackHookEvent } from "./hook-telemetry"; import { getInstanceId } from "../../lib/telemetry-id"; import { hookLogInfo, hookLogWarn } from "./hook-logger"; import { resolvePermissionMode } from "./resolve-permission-mode"; -import { getIntegration, INTEGRATIONS } from "./integrations"; +import { + getIntegration, + INTEGRATIONS, + canonicalizeToolName, + type Integration, +} from "./integrations"; import { getClaudeProjectsPath, encodeCwd } from "../../lib/paths"; import type { HookActivityEntry } from "./hook-activity-store"; @@ -352,7 +357,7 @@ function tryAcquireFiringLock(integration: string | undefined, eventType: string return true; } catch (e) { if ((e as any).code === "EEXIST") return false; - return false; + throw e; } } @@ -387,7 +392,7 @@ export function writeVirtualLogEntry( const toolInput = parsed.tool_input as Record | string | undefined; const prompt = ( typeof toolInput === "string" ? toolInput - : ((toolInput?.user_prompt ?? toolInput?.prompt ?? parsed.prompt ?? "") as string) + : ((toolInput?.user_prompt ?? toolInput?.prompt ?? parsed.user_prompt ?? parsed.prompt ?? "") as string) ).trim(); if (!prompt) return; @@ -443,6 +448,32 @@ export function writeVirtualLogEntry( }, }); state.lastUuid = newUuid; + + } else if (eventType === "AssistantResponse") { + const content = (parsed.assistant_response ?? parsed.content ?? "") as string; + if (!content) return; + + logLine = JSON.stringify({ + type: "assistant", + uuid: newUuid, + parentUuid: state.lastUuid, + timestamp, + message: { + role: "assistant", + content: [{ type: "text", text: content }], + }, + }); + state.lastUuid = newUuid; + + } else if (eventType === "SessionStart") { + logLine = JSON.stringify({ + type: "system", + uuid: newUuid, + parentUuid: state.lastUuid, + timestamp, + message: { role: "system", content: "Session started" }, + }); + state.lastUuid = newUuid; } if (logLine) { @@ -485,10 +516,10 @@ export async function handleHookEvent(eventType: string, cliOverride?: string): }); process.stdin.on("end", () => resolve(chunks.join(""))); - // Handle the case where stdin is not a pipe or is empty - setTimeout(() => { - if (chunks.length === 0) resolve(""); - }, 500); // 500ms timeout for slow pipes + if (process.stdin.isTTY) { + resolve(""); + return; + } process.stdin.on("error", reject); if (process.stdin.readableEnded) resolve(""); @@ -569,6 +600,10 @@ export async function handleHookEvent(eventType: string, cliOverride?: string): const integ = getInteg(integrationType); integ.normalizePayload(parsed); + // Canonicalize tool name so custom and builtin policies work cross-CLI. + if (typeof parsed.tool_name === "string") { + parsed.tool_name = canonicalizeToolName(parsed.tool_name); + } const canonicalEventName = integ.getCanonicalEventName(parsed, rawEventType); // Gemini BeforeToolSelection is advisory-only per spec (no deny/continue/systemMessage). diff --git a/src/hooks/integrations.ts b/src/hooks/integrations.ts index eeda15ec..46ef9775 100644 --- a/src/hooks/integrations.ts +++ b/src/hooks/integrations.ts @@ -36,6 +36,33 @@ import { PI_EVENT_MAP, } from "./types"; +export const TOOL_NAME_CANONICAL_MAP: Record = { + // File write variants + "writefile": "Write", + "write_file": "Write", + "save_file": "Write", + "createfile": "Write", + // File read variants + "readfile": "Read", + "read_file": "Read", + "get_file_content": "Read", + // Shell variants → canonical "Bash" + "shell": "Bash", + "terminal": "Bash", + "console": "Bash", + "sh": "Bash", + "bash": "Bash", + "bash_login_shell": "Bash", + "run_terminal_command": "Bash", + "run_shell_command": "Bash", + "execute_command": "Bash", +}; + +export function canonicalizeToolName(name: string | undefined): string | undefined { + if (!name) return name; + return TOOL_NAME_CANONICAL_MAP[name.toLowerCase()] ?? name; +} + // ── Integration interface ─────────────────────────────────────────────────── export interface Integration { @@ -1296,7 +1323,9 @@ const opencode: Integration = { const template = `/** * FailproofAI Integration for OpenCode * Generated by failproofai + * // failproofai-hook */ +// @ts-nocheck import { spawnSync } from "node:child_process"; export const FailproofAIPlugin = (ctx: any) => { @@ -1383,7 +1412,14 @@ export default FailproofAIPlugin; }, hooksInstalledInSettings(scope: string, cwd?: string): boolean { - return existsSync(this.getSettingsPath(scope, cwd)); + const path = this.getSettingsPath(scope, cwd); + if (!existsSync(path)) return false; + try { + const content = readFileSync(path, "utf8"); + return content.includes(this.hookMarker); + } catch { + return false; + } }, detectInstalled(): boolean { @@ -1465,7 +1501,9 @@ const pi: Integration = { const template = `/** * FailproofAI Integration for Pi Coding Agent * Generated by failproofai + * // failproofai-hook */ +// @ts-nocheck import { spawnSync } from "node:child_process"; import { existsSync, readdirSync, statSync } from "node:fs"; import { join } from "node:path"; @@ -1473,15 +1511,13 @@ import { homedir } from "node:os"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; export default function (pi: ExtensionAPI) { + let currentSessionId: string | undefined = undefined; + let pendingSessionStart: { ctx: any } | undefined = undefined; const reportedSessions = new Set(); + const projectName = process.cwd().split("/").pop() || "failproofai"; + const fallbackSessionId = \`pi-\${projectName}-\${Date.now()}\`; const callcli = (event: string, args: any, ctx?: any) => { - // Session ID priority: - // 1. ctx.sessionId or ctx.session.id (SDK scoped) - // 2. pi.session.id (Global scoped) - // 3. Fallback to Project Name - const projectName = process.cwd().split("/").pop() || "failproofai"; - const getSessionIdFromFile = (): string | undefined => { try { const cwd = process.cwd(); @@ -1495,7 +1531,9 @@ export default function (pi: ExtensionAPI) { .map((f: string) => ({ name: f, mtime: statSync(join(sessionsDir, f)).mtimeMs })) .sort((a: any, b: any) => b.mtime - a.mtime); if (files.length === 0) return undefined; - const filename = files[0].name; + + const latest = files[0]; + const filename = latest.name; const underscore = filename.lastIndexOf('_'); const dot = filename.lastIndexOf('.'); if (underscore > 0 && dot > underscore) { @@ -1507,21 +1545,34 @@ export default function (pi: ExtensionAPI) { } }; - const sessionId = - ctx?.sessionId || - ctx?.session?.id || - pi.session?.id || - pi.sessionId || - process.env.PI_SESSION_ID || - getSessionIdFromFile() || - \`pi-\${projectName}-\${Date.now()}\`; + let foundId = + args?.session_id || + args?.sessionId || + ctx?.sessionId || + ctx?.session?.id || + pi.session?.id || + pi.sessionId || + process.env.PI_SESSION_ID || + getSessionIdFromFile(); + + if (foundId && foundId !== fallbackSessionId) { + const wasOnFallback = !currentSessionId || currentSessionId === fallbackSessionId; + currentSessionId = foundId; + if (wasOnFallback && pendingSessionStart) { + const ps = pendingSessionStart; + pendingSessionStart = undefined; + callcli("SessionStart", {}, ps.ctx); + } + } else if (!currentSessionId) { + currentSessionId = fallbackSessionId; + } - const payloadWithCwd = { - ...args, - integration: "pi", - cwd: process.cwd(), - session_id: sessionId, - }; + const payloadWithCwd = { + ...args, + integration: "pi", + cwd: process.cwd(), + session_id: currentSessionId, + }; const cmd = '${cliInvocation} --hook ' + event + ' --cli pi --stdin'; @@ -1547,7 +1598,14 @@ export default function (pi: ExtensionAPI) { }; pi.on("session_start", (event, ctx) => { - try { callcli("SessionStart", {}, ctx); } catch {} + try { + const id = event?.sessionId || event?.session_id || event?.session?.id || ctx?.sessionId || ctx?.session?.id || pi.session?.id || pi.sessionId; + if (id) { + callcli("SessionStart", { session_id: id }, ctx); + } else { + pendingSessionStart = { ctx }; + } + } catch {} }); pi.on("tool_call", (event, ctx) => { @@ -1556,7 +1614,8 @@ export default function (pi: ExtensionAPI) { const toolInput = event.input || event.args || event.arguments; const res = callcli("PreToolUse", { tool_name: toolName, - tool_input: toolInput + tool_input: toolInput, + session_id: event?.sessionId || event?.session_id || event?.session?.id }, ctx); if (res?.block) return { block: true, reason: res.reason }; } catch {} @@ -1566,11 +1625,15 @@ export default function (pi: ExtensionAPI) { try { const toolName = event.toolName || event.name; const toolInput = event.input || event.args || event.arguments; - const toolOutput = event.result?.content || event.output || event.result; + let toolOutput = event.result?.content || event.output || event.result; + if (toolOutput && typeof toolOutput !== "string") { + toolOutput = JSON.stringify(toolOutput); + } callcli("PostToolUse", { tool_name: toolName, tool_input: toolInput, - tool_output: toolOutput + tool_output: toolOutput, + session_id: event?.sessionId || event?.session_id || event?.session?.id }, ctx); } catch {} }); @@ -1579,18 +1642,29 @@ export default function (pi: ExtensionAPI) { try { const text = event.text || event.input || event.content || (typeof event === "string" ? event : ""); if (text) { - // Isolation guard: ensure we don't handle messages that might be recursive if (text === "/failproofai-status") return; - callcli("UserPromptSubmit", { tool_input: text }, ctx); + callcli("UserPromptSubmit", { + user_prompt: text, + session_id: event?.sessionId || event?.session_id || event?.session?.id + }, ctx); } } catch {} }); pi.on("message", (event, ctx) => { try { - // Trigger Stop when the assistant finishes its turn - if (event.role === "assistant" && (event.stopReason === "stop" || event.stopReason === "end_turn")) { - callcli("stop", {}, ctx); + if (event.role === "assistant") { + const content = event.content || event.text || (typeof event.content === "object" ? JSON.stringify(event.content) : ""); + if (content) { + callcli("AssistantResponse", { + assistant_response: content, + session_id: event?.sessionId || event?.session_id || event?.session?.id + }, ctx); + } + + if (event.stopReason === "stop" || event.stopReason === "end_turn") { + callcli("stop", { session_id: event?.sessionId || event?.session_id || event?.session?.id }, ctx); + } } } catch {} }); @@ -1614,7 +1688,14 @@ export default function (pi: ExtensionAPI) { }, hooksInstalledInSettings(scope: string, cwd?: string): boolean { - return existsSync(this.getSettingsPath(scope, cwd)); + const path = this.getSettingsPath(scope, cwd); + if (!existsSync(path)) return false; + try { + const content = readFileSync(path, "utf8"); + return content.includes(this.hookMarker); + } catch { + return false; + } }, detectInstalled(): boolean { diff --git a/src/hooks/loader-utils.ts b/src/hooks/loader-utils.ts index 09d03ff8..775e8e8c 100644 --- a/src/hooks/loader-utils.ts +++ b/src/hooks/loader-utils.ts @@ -35,19 +35,26 @@ export async function fileExists(path: string): Promise { } export async function findDistIndex(): Promise { - // Env var set by scripts/dev.ts, scripts/start.ts, bin/failproofai.mjs const distPath = process.env.FAILPROOFAI_DIST_PATH; if (distPath) { - const candidate = resolve(distPath, "index.js"); - if (await fileExists(candidate)) return candidate; + // Check both distPath and distPath/dist + const candidates = [ + resolve(distPath, "index.js"), + resolve(distPath, "dist", "index.js"), + ]; + for (const c of candidates) { + if (await fileExists(c)) return c; + } } // Fallback: check common locations const candidates = [ - // Packaged binary: dist is bundled at {binaryDir}/../assets/dist/ - resolve(dirname(process.execPath), "..", "assets", "dist", "index.js"), + // Repo root dist (if process.cwd is repo root) resolve(process.cwd(), "dist", "index.js"), + // In node_modules resolve(process.cwd(), "node_modules", "failproofai", "dist", "index.js"), + // Packaged binary: dist is bundled at {binaryDir}/../assets/dist/ + resolve(dirname(process.execPath), "..", "assets", "dist", "index.js"), ]; for (const c of candidates) { if (await fileExists(c)) return c; @@ -84,13 +91,15 @@ export async function createEsmShim( // multiple hook subprocesses load custom policies in parallel. const shimPath = `${distIndex}.__failproofai_esm_shim__.${process.pid}.${randomUUID()}.mjs`; const shimCode = [ - `import _cjs from '${distUrl}';`, + `import _m from '${distUrl}';`, + `const _cjs = (_m && _m.__esModule && _m.default) ? _m.default : (_m.default || _m);`, `export const customPolicies = _cjs.customPolicies;`, `export const getCustomHooks = _cjs.getCustomHooks;`, `export const clearCustomHooks = _cjs.clearCustomHooks;`, `export const allow = _cjs.allow;`, `export const deny = _cjs.deny;`, `export const instruct = _cjs.instruct;`, + `export const isBashTool = _cjs.isBashTool;`, `export default _cjs;`, ].join("\n"); await writeFile(shimPath, shimCode, "utf-8"); diff --git a/src/hooks/manager.ts b/src/hooks/manager.ts index accf9d50..1c53c079 100644 --- a/src/hooks/manager.ts +++ b/src/hooks/manager.ts @@ -262,6 +262,17 @@ export async function installHooks( continue; } + // Prevent duplicate-scope installations to avoid double execution + const otherScopes = deduplicateScopes(integ, integ.scopes, cwd).filter((s) => s !== scope); + const duplicates = otherScopes.filter((s) => integ.hooksInstalledInSettings(s as any, cwd)); + if (duplicates.length > 0) { + const scopeList = duplicates.map((s) => `${s} (${scopeLabel(integ, s, cwd)})`).join(", "); + console.log(`\n\x1B[33mNotice: Failproof AI hooks are already active at ${scopeList} for ${integ.displayName}.\x1B[0m`); + console.log(`Skipping installation in ${scope} scope to prevent duplicate policy evaluation.`); + console.log(`If you want to move the installation to ${scope}, uninstall from the other scope(s) first.`); + continue; + } + const settingsPath = integ.getSettingsPath(scope as any, cwd); const settings = integ.readSettings(settingsPath) as ClaudeSettings; integ.writeHookEntries(settings, binaryPath, scope); @@ -310,17 +321,6 @@ export async function installHooks( } else { console.log(`Binary: ${binaryPath}`); } - - // Warn about duplicate-scope installations - const otherScopes = deduplicateScopes(integ, integ.scopes, cwd).filter((s) => s !== scope); - const duplicates = otherScopes.filter((s) => integ.hooksInstalledInSettings(s as any, cwd)); - if (duplicates.length > 0) { - const scopeList = duplicates.map((s) => `${s} (${scopeLabel(integ, s, cwd)})`).join(", "); - console.log(); - console.log(`\x1B[33mWarning: Failproof AI hooks are also installed at ${scopeList} for ${integ.displayName}.\x1B[0m`); - console.log(`Having hooks in multiple scopes may cause duplicate policy evaluation.`); - console.log(`Use \`failproofai policies --uninstall --scope ${duplicates[0]} --cli ${integId}\` to remove the other installation.`); - } } } @@ -536,8 +536,7 @@ export async function listHooks( integration: IntegrationType = "claude-code", ): Promise { const integ = getIntegration(integration); - // Multi-scope config is merged for listing (no CLI filter \u2014 show global view) - const config = readMergedHooksConfig(cwd); + const config = readMergedHooksConfig(cwd, integration); const enabledSet = new Set(config.enabledPolicies); const uniqueScopes = deduplicateScopes(integ, integ.scopes, cwd); @@ -594,55 +593,26 @@ export async function listHooks( const statusCol = installedScopes.length > 1 ? installedScopes.length * 9 : 8; - if (installedScopes.length === 0) { - console.log(` ${"Status".padEnd(8)}${"Name".padEnd(nameColWidth)}Description`); - console.log(` ${"\u2500".repeat(6)} ${"\u2500".repeat(nameColWidth - 2)} ${"\u2500".repeat(38)}`); + console.log(`\nFailproof AI Hook Policies \u2014 Profile: ${integ.displayName}\n`); + console.log(` ${"Status".padEnd(8)}${"Name".padEnd(nameColWidth)}Description`); + console.log(` ${"\u2500".repeat(6)} ${"\u2500".repeat(nameColWidth - 2)} ${"\u2500".repeat(38)}`); - for (const p of regularPolicies) { + for (const p of regularPolicies) { + const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; + printPolicyLine(p, mark); + } + + if (betaPolicies.length > 0) { + console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); + for (const p of betaPolicies) { const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; printPolicyLine(p, mark); } + } - if (betaPolicies.length > 0) { - console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); - for (const p of betaPolicies) { - const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - printPolicyLine(p, mark); - } - } + if (installedScopes.length === 0) { console.log(`\n Run \`failproofai policies --install --cli ${integration}\` to activate hooks for ${integ.displayName}.`); - } else if (installedScopes.length === 1) { - console.log(`\nFailproof AI Hook Policies\n`); - console.log(` ${"Status".padEnd(8)}${"Name".padEnd(nameColWidth)}Description`); - console.log(` ${"\u2500".repeat(6)} ${"\u2500".repeat(nameColWidth - 2)} ${"\u2500".repeat(38)}`); - - for (const p of regularPolicies) { - const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - printPolicyLine(p, mark); - } - if (betaPolicies.length > 0) { - console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); - for (const p of betaPolicies) { - const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - printPolicyLine(p, mark); - } - } } else { - console.log(`\nFailproof AI Hook Policies\n`); - console.log(` ${"Status".padEnd(8)}${"Name".padEnd(nameColWidth)}Description`); - console.log(` ${"\u2500".repeat(6)} ${"\u2500".repeat(nameColWidth - 2)} ${"\u2500".repeat(38)}`); - - for (const p of regularPolicies) { - const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - printPolicyLine(p, mark); - } - if (betaPolicies.length > 0) { - console.log(`\n \x1B[2m\u2500\u2500 Beta \u2500\u2500\x1B[0m`); - for (const p of betaPolicies) { - const mark = enabledSet.has(p.name) ? `\x1B[32m\u2713\x1B[0m` : " "; - printPolicyLine(p, mark); - } - } console.log(`\n Hooks active in scopes: ${installedScopes.join(", ")}`); } @@ -718,10 +688,19 @@ export async function listHooks( const hooksInstalled = unique.some((s) => i.hooksInstalledInSettings(s as any, cwd)); const binaryInstalled = i.detectInstalled(); + // Get policy count for this specific CLI + const cliConfig = readMergedHooksConfig(cwd, integId); + const pCount = cliConfig.enabledPolicies.length; + const hasCustom = !!cliConfig.customPoliciesPath; + let status = ""; - if (hooksInstalled) status = "\x1B[32mhooks active\x1B[0m"; - else if (binaryInstalled) status = "\x1B[33mCLI detected; hooks inactive\x1B[0m"; - else status = "\x1B[2mCLI not detected\x1B[0m"; + if (hooksInstalled) { + status = `\x1B[32mhooks active\x1B[0m (${pCount} policies${hasCustom ? " + custom" : ""})`; + } else if (binaryInstalled) { + status = "\x1B[33mCLI detected; hooks inactive\x1B[0m"; + } else { + status = "\x1B[2mCLI not detected\x1B[0m"; + } const mark = hooksInstalled ? `\x1B[32m\u2713\x1B[0m` : (binaryInstalled ? `\x1B[33m?\x1B[0m` : `\x1B[31m\u2717\x1B[0m`); console.log(` ${mark} ${i.displayName.padEnd(15)} (${status})`); diff --git a/src/hooks/policy-helpers.ts b/src/hooks/policy-helpers.ts index 878b446f..1a1fade4 100644 --- a/src/hooks/policy-helpers.ts +++ b/src/hooks/policy-helpers.ts @@ -14,3 +14,17 @@ export function deny(reason: string): PolicyResult { export function instruct(reason: string): PolicyResult { return { decision: "instruct", reason }; } + +export function isBashTool(toolName: string | undefined): boolean { + if (!toolName) return true; // Assume shell if tool name is missing + const lower = toolName.toLowerCase(); + return ( + lower === "bash" || + lower === "shell" || + lower === "terminal" || + lower === "console" || + lower.includes("command") || + lower === "run_terminal_command" || + lower === "sh" + ); +} diff --git a/src/hooks/types.ts b/src/hooks/types.ts index 709de43c..9f443d8b 100644 --- a/src/hooks/types.ts +++ b/src/hooks/types.ts @@ -14,6 +14,7 @@ export const HOOK_EVENT_TYPES = [ "SessionStart", "SessionEnd", "UserPromptSubmit", + "AssistantResponse", "PreToolUse", "PermissionRequest", "PermissionDenied", diff --git a/src/index.ts b/src/index.ts index aed5c66b..b8c6fdb2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -9,7 +9,7 @@ export { getCustomHooks, clearCustomHooks, } from "./hooks/custom-hooks-registry"; -export { allow, deny, instruct } from "./hooks/policy-helpers"; +export { allow, deny, instruct, isBashTool } from "./hooks/policy-helpers"; export type { PolicyContext, PolicyResult, From 610ed419751fb3df5d6d36f2044a3e15354b4ea8 Mon Sep 17 00:00:00 2001 From: Yash Upadhyay Date: Mon, 27 Apr 2026 12:25:48 +0000 Subject: [PATCH 47/47] fix: tighten protect-env-vars and block-read-outside-cwd, fix Pi session attribution, suppress browser-extension hydration warnings - protect-env-vars: catch printf, compgen -v, declare -p/x, and ${!var} indirect expansion bypass patterns; extend boundary chars to include quotes/backticks so bash -c "env" is blocked - block-read-outside-cwd: add extractRelativeTraversals() so ../.. style paths are resolved and checked against CWD, not just absolute paths - Pi integration: reset currentSessionId/pendingSessionStart/pendingPromptSubmit at session_start to prevent stale ID carry-over across sessions; add 60s recency filter in getSessionIdFromFile(); remove stale PI_SESSION_ID env var from foundId chain; defer UserPromptSubmit until tool_call provides the real session ID - UI: add suppressHydrationWarning to Button base component and ActivityTab filter elements to silence Dashlane/password-manager attribute injection errors Co-Authored-By: Claude Sonnet 4.6 --- .../hooks/claude-code-integration.e2e.test.ts | 3 +- .../e2e/hooks/codex-integration.e2e.test.ts | 3 +- .../e2e/hooks/cursor-integration.e2e.test.ts | 5 +- .../e2e/hooks/gemini-integration.e2e.test.ts | 5 +- .../hooks/opencode-integration.e2e.test.ts | 5 +- .../e2e/hooks/pi-integration.e2e.test.ts | 3 +- .../hooks/block-read-outside-cwd.test.ts | 65 +++++++++++++++++ __tests__/hooks/builtin-policies.test.ts | 50 +++++++++++++ __tests__/hooks/manager.test.ts | 4 +- app/policies/hooks-client.tsx | 5 ++ components/ui/button.tsx | 1 + lib/log-entries.ts | 25 ++++++- src/hooks/builtin-policies.ts | 73 ++++++++++++++++++- src/hooks/hook-activity-store.ts | 2 +- src/hooks/integrations.ts | 37 ++++++++-- src/hooks/types.ts | 1 - 16 files changed, 260 insertions(+), 27 deletions(-) diff --git a/__tests__/e2e/hooks/claude-code-integration.e2e.test.ts b/__tests__/e2e/hooks/claude-code-integration.e2e.test.ts index 65dfdb17..ca01e263 100644 --- a/__tests__/e2e/hooks/claude-code-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/claude-code-integration.e2e.test.ts @@ -8,6 +8,7 @@ import { Payloads } from "../helpers/payloads"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); const DEDUP_DIR = resolve(homedir(), ".failproofai", "cache", "dedup"); +const REAL_ACTIVITY_STORE = resolve(homedir(), ".failproofai", "cache", "hook-activity"); describe("E2E: Claude Code Integration", () => { let projectDir: string; @@ -26,7 +27,7 @@ describe("E2E: Claude Code Integration", () => { if (existsSync(isoHome)) rmSync(isoHome, { recursive: true, force: true }); }); - const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }); + const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true", FAILPROOFAI_ACTIVITY_STORE_DIR: REAL_ACTIVITY_STORE }); it("denies sudo via PreToolUse hook (exit 2 + stderr message)", () => { execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli claude-code --scope project`, { diff --git a/__tests__/e2e/hooks/codex-integration.e2e.test.ts b/__tests__/e2e/hooks/codex-integration.e2e.test.ts index 419abdbf..bc6e7dcb 100644 --- a/__tests__/e2e/hooks/codex-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/codex-integration.e2e.test.ts @@ -6,6 +6,7 @@ import { homedir, tmpdir } from "node:os"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); const DEDUP_DIR = resolve(homedir(), ".failproofai", "cache", "dedup"); +const REAL_ACTIVITY_STORE = resolve(homedir(), ".failproofai", "cache", "hook-activity"); describe("E2E: OpenAI Codex Integration", () => { let projectDir: string; @@ -24,7 +25,7 @@ describe("E2E: OpenAI Codex Integration", () => { if (existsSync(isoHome)) rmSync(isoHome, { recursive: true, force: true }); }); - const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }); + const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true", FAILPROOFAI_ACTIVITY_STORE_DIR: REAL_ACTIVITY_STORE }); it("denies sudo via pre_tool_use (snake_case) event with exit 2", () => { execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli codex --scope project`, { diff --git a/__tests__/e2e/hooks/cursor-integration.e2e.test.ts b/__tests__/e2e/hooks/cursor-integration.e2e.test.ts index b5ad3bd7..7172dbbd 100644 --- a/__tests__/e2e/hooks/cursor-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/cursor-integration.e2e.test.ts @@ -2,10 +2,11 @@ import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { execSync, spawnSync } from "node:child_process"; import { writeFileSync, readFileSync, existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs"; import { resolve, join } from "node:path"; -import { tmpdir } from "node:os"; +import { homedir, tmpdir } from "node:os"; import { CursorPayloads } from "../helpers/payloads"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); +const REAL_ACTIVITY_STORE = join(homedir(), ".failproofai", "cache", "hook-activity"); describe("E2E: Cursor Integration", () => { let PROJECT_DIR: string; @@ -27,7 +28,7 @@ describe("E2E: Cursor Integration", () => { if (existsSync(isoHome)) rmSync(isoHome, { recursive: true, force: true }); }); - const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), HOME: isoHome }); + const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), HOME: isoHome, FAILPROOFAI_ACTIVITY_STORE_DIR: REAL_ACTIVITY_STORE }); it("denies sudo command via Cursor preToolUse hook", () => { // 1. Install block-sudo for Cursor project scope diff --git a/__tests__/e2e/hooks/gemini-integration.e2e.test.ts b/__tests__/e2e/hooks/gemini-integration.e2e.test.ts index bb030b31..752c9ab1 100644 --- a/__tests__/e2e/hooks/gemini-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/gemini-integration.e2e.test.ts @@ -2,10 +2,11 @@ import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { execSync, spawnSync } from "node:child_process"; import { writeFileSync, readFileSync, existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs"; import { resolve, join } from "node:path"; -import { tmpdir } from "node:os"; +import { homedir, tmpdir } from "node:os"; import { GeminiPayloads } from "../helpers/payloads"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); +const REAL_ACTIVITY_STORE = join(homedir(), ".failproofai", "cache", "hook-activity"); describe("E2E: Gemini Integration", () => { let PROJECT_DIR: string; @@ -25,7 +26,7 @@ describe("E2E: Gemini Integration", () => { if (existsSync(isoHome)) rmSync(isoHome, { recursive: true, force: true }); }); - const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), HOME: isoHome }); + const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), HOME: isoHome, FAILPROOFAI_ACTIVITY_STORE_DIR: REAL_ACTIVITY_STORE }); const runHook = (eventName: string, payload: Record) => { return spawnSync("bun", [BINARY_PATH, "--hook", eventName, "--cli", "gemini"], { diff --git a/__tests__/e2e/hooks/opencode-integration.e2e.test.ts b/__tests__/e2e/hooks/opencode-integration.e2e.test.ts index 56d93595..10980f6c 100644 --- a/__tests__/e2e/hooks/opencode-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/opencode-integration.e2e.test.ts @@ -2,9 +2,10 @@ import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { execSync, spawnSync } from "node:child_process"; import { readFileSync, existsSync, mkdtempSync, mkdirSync, rmSync } from "node:fs"; import { resolve, join } from "node:path"; -import { tmpdir } from "node:os"; +import { homedir, tmpdir } from "node:os"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); +const REAL_ACTIVITY_STORE = join(homedir(), ".failproofai", "cache", "hook-activity"); describe("E2E: OpenCode Integration", () => { let projectDir: string; @@ -20,7 +21,7 @@ describe("E2E: OpenCode Integration", () => { if (existsSync(isoHome)) rmSync(isoHome, { recursive: true, force: true }); }); - const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }); + const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true", FAILPROOFAI_ACTIVITY_STORE_DIR: REAL_ACTIVITY_STORE }); it("denies sudo via tool.execute.before event (exit 2 + stderr message)", () => { execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli opencode --scope project`, { diff --git a/__tests__/e2e/hooks/pi-integration.e2e.test.ts b/__tests__/e2e/hooks/pi-integration.e2e.test.ts index 95222d89..ad9ed5a3 100644 --- a/__tests__/e2e/hooks/pi-integration.e2e.test.ts +++ b/__tests__/e2e/hooks/pi-integration.e2e.test.ts @@ -6,6 +6,7 @@ import { homedir, tmpdir } from "node:os"; const BINARY_PATH = resolve(__dirname, "../../../bin/failproofai.mjs"); const DEDUP_DIR = resolve(homedir(), ".failproofai", "cache", "dedup"); +const REAL_ACTIVITY_STORE = resolve(homedir(), ".failproofai", "cache", "hook-activity"); describe("E2E: Pi Coding Agent Integration", () => { let projectDir: string; @@ -22,7 +23,7 @@ describe("E2E: Pi Coding Agent Integration", () => { if (existsSync(isoHome)) rmSync(isoHome, { recursive: true, force: true }); }); - const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true" }); + const baseEnv = () => ({ ...process.env, FAILPROOFAI_DIST_PATH: process.cwd(), FAILPROOFAI_SKIP_KILL: "true", FAILPROOFAI_ACTIVITY_STORE_DIR: REAL_ACTIVITY_STORE }); it("denies sudo via tool_call event (exit 2 + stderr message)", () => { execSync(`bun ${BINARY_PATH} policies --install block-sudo --cli pi --scope project`, { diff --git a/__tests__/hooks/block-read-outside-cwd.test.ts b/__tests__/hooks/block-read-outside-cwd.test.ts index d7d85824..088c85c4 100644 --- a/__tests__/hooks/block-read-outside-cwd.test.ts +++ b/__tests__/hooks/block-read-outside-cwd.test.ts @@ -555,4 +555,69 @@ describe("block-read-outside-cwd policy", () => { const result = await policy.fn(ctx); expect(result.decision).toBe("allow"); }); + + describe("relative path traversal via Bash", () => { + it("blocks ls ../.. (relative traversal two levels up)", async () => { + const ctx = makeCtx({ + toolName: "Bash", + toolInput: { command: "ls ../.." }, + session: { cwd: "/home/user/project" }, + }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + expect(result.reason).toContain("/home"); + }); + + it("blocks ls ../../fp/failproofai (deep relative traversal)", async () => { + const ctx = makeCtx({ + toolName: "Bash", + toolInput: { command: "ls ../../fp/failproofai" }, + session: { cwd: "/home/user/project/subdir" }, + }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + }); + + it("blocks cat ../../secrets.txt", async () => { + const ctx = makeCtx({ + toolName: "Bash", + toolInput: { command: "cat ../../secrets.txt" }, + session: { cwd: "/home/user/project" }, + }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + }); + + it("allows ls .. when .. still resolves inside cwd parent that is cwd (single level stays within tree)", async () => { + // ../subdir from /home/user/project/subdir resolves to /home/user/project — still inside project + const ctx = makeCtx({ + toolName: "Bash", + toolInput: { command: "ls ../sibling" }, + session: { cwd: "/home/user/project" }, + }); + const result = await policy.fn(ctx); + // /home/user/project/../sibling = /home/user/sibling — outside project, must be denied + expect(result.decision).toBe("deny"); + }); + + it("allows ls ./subdir (relative path that stays inside cwd)", async () => { + const ctx = makeCtx({ + toolName: "Bash", + toolInput: { command: "ls ./src" }, + session: { cwd: "/home/user/project" }, + }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("allow"); + }); + + it("blocks quoted relative traversal: ls \"../..\"", async () => { + const ctx = makeCtx({ + toolName: "Bash", + toolInput: { command: 'ls "../.."' }, + session: { cwd: "/home/user/project" }, + }); + const result = await policy.fn(ctx); + expect(result.decision).toBe("deny"); + }); + }); }); diff --git a/__tests__/hooks/builtin-policies.test.ts b/__tests__/hooks/builtin-policies.test.ts index d117caa3..be12d24f 100644 --- a/__tests__/hooks/builtin-policies.test.ts +++ b/__tests__/hooks/builtin-policies.test.ts @@ -452,6 +452,56 @@ describe("hooks/builtin-policies", () => { const ctx = makeCtx({ toolName: "Bash", toolInput: { command: "echo ${HOME}/bin" } }); expect((await policy.fn(ctx)).decision).toBe("deny"); }); + + it("blocks bash -c \"env\" (env inside quoted -c argument)", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: 'bash -c "env"' } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("blocks sh -c 'printenv' (printenv inside quoted -c argument)", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: "sh -c 'printenv'" } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("blocks printf \"%s\\n\" \"$HOME\" (printf with env var)", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: 'printf "%s\\n" "$HOME"' } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("blocks printf \"%s=%s\\n\" \"$v\" \"${!v}\" (printf with indirect expansion)", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: 'printf "%s=%s\\n" "$v" "${!v}"' } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("blocks compgen -v (enumerates all variable names)", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: "for v in $(compgen -v); do echo $v; done" } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("blocks compgen -v standalone (isolated from other patterns)", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: "compgen -v | sort" } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("blocks declare -p (dumps all variable values)", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: "declare -p" } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("blocks declare -x (dumps exported variables)", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: "declare -x" } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("blocks ${!var} indirect expansion", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: 'echo "${!API_KEY}"' } }); + expect((await policy.fn(ctx)).decision).toBe("deny"); + }); + + it("allows printf without env vars (e.g. printf 'hello')", async () => { + const ctx = makeCtx({ toolName: "Bash", toolInput: { command: "printf 'hello world\\n'" } }); + expect((await policy.fn(ctx)).decision).toBe("allow"); + }); }); describe("block-env-files", () => { diff --git a/__tests__/hooks/manager.test.ts b/__tests__/hooks/manager.test.ts index 4f02c479..09e69131 100644 --- a/__tests__/hooks/manager.test.ts +++ b/__tests__/hooks/manager.test.ts @@ -94,7 +94,7 @@ describe("hooks/manager", () => { expect(path).toBe(USER_SETTINGS_PATH); const written = JSON.parse(content as string); - expect(Object.keys(written.hooks)).toHaveLength(27); + expect(Object.keys(written.hooks)).toHaveLength(26); for (const [eventType, matchers] of Object.entries(written.hooks)) { expect(matchers).toHaveLength(1); @@ -256,7 +256,7 @@ describe("hooks/manager", () => { expect(writeFileSync).toHaveBeenCalledOnce(); const [, content] = vi.mocked(writeFileSync).mock.calls[0]; const written = JSON.parse(content as string); - expect(Object.keys(written.hooks)).toHaveLength(27); + expect(Object.keys(written.hooks)).toHaveLength(26); }); it("resolves binary from FAILPROOFAI_DIST_PATH", async () => { diff --git a/app/policies/hooks-client.tsx b/app/policies/hooks-client.tsx index ca057d64..554fdf6f 100644 --- a/app/policies/hooks-client.tsx +++ b/app/policies/hooks-client.tsx @@ -213,6 +213,7 @@ function DecisionPills({
@@ -455,6 +459,7 @@ function ActivityTab({ value={filterSessionId} onChange={(e) => setFilterSessionId(e.target.value)} placeholder="Filter by session…" + suppressHydrationWarning className="h-7 rounded-md border border-border bg-background px-2.5 text-xs text-foreground placeholder:text-muted-foreground w-44 focus:outline-none focus:ring-2 focus:ring-primary/40 focus:border-primary/40 transition-shadow" />
diff --git a/components/ui/button.tsx b/components/ui/button.tsx index e0fc8cb3..2ef1b190 100644 --- a/components/ui/button.tsx +++ b/components/ui/button.tsx @@ -31,6 +31,7 @@ const Button = React.forwardRef( className )} ref={ref} + suppressHydrationWarning {...props} /> ); diff --git a/lib/log-entries.ts b/lib/log-entries.ts index be155f1a..3061192a 100644 --- a/lib/log-entries.ts +++ b/lib/log-entries.ts @@ -742,9 +742,10 @@ function parseCursorFile(content: string, source: LogSource, fileDate: Date): Pa .map((b) => b.text as string) .join("\n") .trim(); - // Strip wrapper tags that Cursor sometimes injects - if (textContent.startsWith("")) { - textContent = textContent.replace(/^\s*/, "").replace(/\s*<\/user_query>$/, ""); + // Strip wrapper (Cursor sometimes prepends ... before it) + const userQueryMatch = textContent.match(/([\s\S]*?)<\/user_query>/); + if (userQueryMatch) { + textContent = userQueryMatch[1].trim(); } if (textContent) { entries.push({ type: "user", ...base, message: { role: "user", content: textContent } } as UserEntry); @@ -1082,6 +1083,22 @@ function parseCodexFile(content: string, source: LogSource): ParseFileResult { toolResultMap.set(callId, { content: resultText, timestamp: date.toISOString(), timestampMs: date.getTime() }); } + // Pre-scan: collect all response_item/assistant texts so event_msg/agent_message duplicates can be + // skipped regardless of whether they appear before or after the response_item in the JSONL. + const responseItemTexts = new Set(); + for (const obj of allObjects) { + if (obj.type !== "response_item") continue; + const p = obj.payload as Record | undefined; + if (!p || p.type !== "message" || p.role !== "assistant") continue; + const contentArr = Array.isArray(p.content) ? (p.content as Array>) : []; + const text = contentArr + .filter((c) => c.type === "output_text" || c.type === "text") + .map((c) => c.text as string) + .join("\n") + .trim(); + if (text) responseItemTexts.add(text); + } + const entries: LogEntry[] = []; let counter = 0; @@ -1173,7 +1190,7 @@ function parseCodexFile(content: string, source: LogSource): ParseFileResult { if (type === "event_msg" && (payload.type as string) === "agent_message") { const text = (payload.message as string) || ""; - if (text.trim()) { + if (text.trim() && !responseItemTexts.has(text.trim())) { entries.push({ type: "assistant", ...base, message: { role: "assistant", content: [{ type: "text", text }] } } as AssistantEntry); } continue; diff --git a/src/hooks/builtin-policies.ts b/src/hooks/builtin-policies.ts index 29bc4ba4..afa72248 100644 --- a/src/hooks/builtin-policies.ts +++ b/src/hooks/builtin-policies.ts @@ -180,13 +180,21 @@ const SCHEMA_ALTER_RE = /\bALTER\s+TABLE\b[\s\S]*\b(?:DROP\s+COLUMN|ADD\s+COLUMN const PUBLISH_CMD_RE = /(?:npm\s+publish|bun\s+publish|pnpm\s+publish|yarn\s+npm\s+publish|twine\s+upload|poetry\s+publish|cargo\s+publish|gem\s+push)\b/; // protectEnvVars -const ENV_PRINTENV_RE = /(?:^|[;\|&!])\s*(?:env|printenv)(?:\s|$|[;\|&!])/; -const ECHO_ENV_RE = /echo\s+.*\$\{?[A-Za-z_]/; +// Include quotes/backtick as boundaries so `bash -c "env"` is also caught +const ENV_PRINTENV_RE = /(?:^|[;|&!"'`])\s*(?:env|printenv)(?:\s|$|[;|&!"'`])/; +// Cover printf as well as echo (printf "%s\n" "$HOME" is a common bypass) +const ECHO_ENV_RE = /(?:echo|printf)\s+.*\$\{?[A-Za-z_]/; const EXPORT_RE = /(?:^|\s|;|&&|\|\|)export\s+\w+/; const PS_ENV_VAR_RE = /\$env:[A-Za-z_]/i; const PS_CHILDITEM_ENV_RE = /(?:Get-ChildItem|dir|gci|ls)\s+Env:/i; const DOTNET_GETENV_RE = /\[Environment\]::GetEnvironment/i; const CMD_ECHO_ENV_RE = /echo\s+%[A-Za-z_]/i; +// compgen -v enumerates all shell variable names (used to loop over env) +const COMPGEN_V_RE = /\bcompgen\s+-[a-zA-Z]*v/; +// declare -p / declare -x prints variable values / exported variables +const DECLARE_ENV_RE = /\bdeclare\s+-[a-zA-Z]*[px]/; +// ${!var} indirect expansion dereferences a variable whose name is in another variable +const INDIRECT_EXPANSION_RE = /\$\{![A-Za-z_]/; // blockEnvFiles const ENV_FILE_PATH_RE = /(?:^|[\\/])(?:\.env(?!\w)|env_\w*)/i; @@ -570,6 +578,18 @@ function protectEnvVars(ctx: PolicyContext): PolicyResult { if (CMD_ECHO_ENV_RE.test(cmd)) { return deny("Action blocked: command attempted to echo environment variables via cmd."); } + // compgen -v enumerates variable names, enabling full env dump via loops + if (COMPGEN_V_RE.test(cmd)) { + return deny("Action blocked: command attempted to enumerate shell variables via compgen."); + } + // declare -p / declare -x prints variable values + if (DECLARE_ENV_RE.test(cmd)) { + return deny("Action blocked: command attempted to dump environment variables via declare."); + } + // ${!var} indirect expansion reads any variable by name — often used after compgen -v + if (INDIRECT_EXPANSION_RE.test(cmd)) { + return deny("Action blocked: command attempted to read environment variables via indirect expansion."); + } return allow(); } @@ -816,6 +836,53 @@ function extractAbsolutePaths(command: string): string[] { return paths; } +/** + * Extract relative traversal paths (starting with ..) from a Bash command. + * Catches patterns like `../..`, `../../foo`, or quoted `"../bar"`. + * These are resolved against CWD in the caller to detect out-of-tree access. + */ +function extractRelativeTraversals(command: string): string[] { + const paths: string[] = []; + // Matches tokens that start with .. — optionally followed by /more/segments + const traversalRe = /(? " ".repeat(m.length)) + .replace(/'[^']*'/g, (m) => " ".repeat(m.length)); + addTraversals(stripped); + + return paths; +} + function blockReadOutsideCwd(ctx: PolicyContext): PolicyResult { // Prefer $CLAUDE_PROJECT_DIR (stable project root) over ctx.session.cwd, // which tracks the live shell CWD and drifts when Claude `cd`s into a subdir. @@ -829,7 +896,7 @@ function blockReadOutsideCwd(ctx: PolicyContext): PolicyResult { const cmd = getCommand(ctx); if (!READ_LIKE_CMDS.test(cmd)) return allow(); - const paths = extractAbsolutePaths(cmd); + const paths = [...extractAbsolutePaths(cmd), ...extractRelativeTraversals(cmd)]; const cwdWithSep = cwd.endsWith("/") ? cwd : cwd + "/"; for (const p of paths) { const resolved = resolve(cwd, p); diff --git a/src/hooks/hook-activity-store.ts b/src/hooks/hook-activity-store.ts index fdb662c9..4ed33ec9 100644 --- a/src/hooks/hook-activity-store.ts +++ b/src/hooks/hook-activity-store.ts @@ -36,7 +36,7 @@ const STATS_FILE = "stats.json"; const LOCK_FILE = "current.lock"; // advisory lock for concurrent hook processes const LOCK_STALE_MS = 2000; // steal lock if older than 2 s (covers crashed processes) -let storeDir = DEFAULT_STORE_DIR; +let storeDir = process.env.FAILPROOFAI_ACTIVITY_STORE_DIR ?? DEFAULT_STORE_DIR; let rotateSeq = 0; // ── Types ── diff --git a/src/hooks/integrations.ts b/src/hooks/integrations.ts index 46ef9775..6685adf9 100644 --- a/src/hooks/integrations.ts +++ b/src/hooks/integrations.ts @@ -1513,6 +1513,7 @@ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; export default function (pi: ExtensionAPI) { let currentSessionId: string | undefined = undefined; let pendingSessionStart: { ctx: any } | undefined = undefined; + let pendingPromptSubmit: { text: string; ctx: any } | undefined = undefined; const reportedSessions = new Set(); const projectName = process.cwd().split("/").pop() || "failproofai"; const fallbackSessionId = \`pi-\${projectName}-\${Date.now()}\`; @@ -1526,9 +1527,11 @@ export default function (pi: ExtensionAPI) { const workspaceName = '--' + encodedPath + '--'; const sessionsDir = join(homedir(), '.pi', 'agent', 'sessions', workspaceName); if (!existsSync(sessionsDir)) return undefined; + const now = Date.now(); const files = readdirSync(sessionsDir) .filter((f: string) => f.endsWith('.jsonl')) .map((f: string) => ({ name: f, mtime: statSync(join(sessionsDir, f)).mtimeMs })) + .filter((f: any) => now - f.mtime < 60_000) .sort((a: any, b: any) => b.mtime - a.mtime); if (files.length === 0) return undefined; @@ -1552,16 +1555,24 @@ export default function (pi: ExtensionAPI) { ctx?.session?.id || pi.session?.id || pi.sessionId || - process.env.PI_SESSION_ID || + // PI_SESSION_ID env var is intentionally omitted: it holds the previous session's + // stale ID at the time session_start/input fire, causing wrong attribution. getSessionIdFromFile(); if (foundId && foundId !== fallbackSessionId) { const wasOnFallback = !currentSessionId || currentSessionId === fallbackSessionId; currentSessionId = foundId; - if (wasOnFallback && pendingSessionStart) { - const ps = pendingSessionStart; - pendingSessionStart = undefined; - callcli("SessionStart", {}, ps.ctx); + if (wasOnFallback) { + if (pendingSessionStart) { + const ps = pendingSessionStart; + pendingSessionStart = undefined; + callcli("SessionStart", {}, ps.ctx); + } + if (pendingPromptSubmit) { + const pp = pendingPromptSubmit; + pendingPromptSubmit = undefined; + callcli("UserPromptSubmit", { user_prompt: pp.text }, pp.ctx); + } } } else if (!currentSessionId) { currentSessionId = fallbackSessionId; @@ -1599,6 +1610,12 @@ export default function (pi: ExtensionAPI) { pi.on("session_start", (event, ctx) => { try { + // Reset state for the new session — currentSessionId from a previous session + // must not carry over into the new one (Pi reuses the same process across sessions). + currentSessionId = undefined; + pendingSessionStart = undefined; + pendingPromptSubmit = undefined; + const id = event?.sessionId || event?.session_id || event?.session?.id || ctx?.sessionId || ctx?.session?.id || pi.session?.id || pi.sessionId; if (id) { callcli("SessionStart", { session_id: id }, ctx); @@ -1643,9 +1660,15 @@ export default function (pi: ExtensionAPI) { const text = event.text || event.input || event.content || (typeof event === "string" ? event : ""); if (text) { if (text === "/failproofai-status") return; - callcli("UserPromptSubmit", { + const eventSessionId = event?.sessionId || event?.session_id || event?.session?.id; + if (!eventSessionId && (!currentSessionId || currentSessionId === fallbackSessionId)) { + // Pi doesn't include session ID in input events; defer until tool_call provides it + pendingPromptSubmit = { text, ctx }; + return; + } + callcli("UserPromptSubmit", { user_prompt: text, - session_id: event?.sessionId || event?.session_id || event?.session?.id + session_id: eventSessionId }, ctx); } } catch {} diff --git a/src/hooks/types.ts b/src/hooks/types.ts index 9f443d8b..709de43c 100644 --- a/src/hooks/types.ts +++ b/src/hooks/types.ts @@ -14,7 +14,6 @@ export const HOOK_EVENT_TYPES = [ "SessionStart", "SessionEnd", "UserPromptSubmit", - "AssistantResponse", "PreToolUse", "PermissionRequest", "PermissionDenied",