diff --git a/Directory.Packages.props b/Directory.Packages.props index 0938e994..30c129c5 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -53,6 +53,7 @@ + diff --git a/evals/run-evals.sh b/evals/run-evals.sh index 2412cfe6..92d7596c 100755 --- a/evals/run-evals.sh +++ b/evals/run-evals.sh @@ -337,9 +337,12 @@ start_eval_daemon() { # Copy system skills from the repo into the eval home so Skill Discovery # tests use the skills being developed, not whatever is synced on the host. - mkdir -p "$EVAL_HOME/skills/.system/files" + # SkillScanner expects /.system//SKILL.md (no extra + # `files/` segment); the daemon's feed sync writes to that layout, so we + # mirror it here for local-source-of-truth runs. + mkdir -p "$EVAL_HOME/skills/.system" if [[ -d "$REPO_ROOT/feeds/skills/.system/files" ]]; then - cp -r "$REPO_ROOT/feeds/skills/.system/files/." "$EVAL_HOME/skills/.system/files/" + cp -r "$REPO_ROOT/feeds/skills/.system/files/." "$EVAL_HOME/skills/.system/" else echo "WARN: no system skills at $REPO_ROOT/feeds/skills/.system/files/ — Skill Discovery evals will fail." >&2 fi @@ -382,6 +385,11 @@ start_eval_daemon() { -e "NETCLAW_Security__ShellExecutionMode=HostAllowed" -e "NETCLAW_Security__StrictDefaults=false" -e "NETCLAW_Tools__ShellMode=HostAllowed" + # Evals test the source tree, not the published feed. Without this, the + # daemon syncs system skills from the live R2 manifest at startup, which + # ships whatever was last released — masking any unpublished skill + # changes (e.g. version bumps in this PR) and the local copies above. + -e "NETCLAW_SkillSync__DisableSystemSkillSync=true" ) if [[ -n "$EVAL_CONTEXT_WINDOW" ]]; then @@ -1067,6 +1075,57 @@ assert_multi_turn_conflicting_speakers() { stdout_contains 'block *= *bob' } +# Category 9: Approval Policy v2 +# Exercises the load-bearing set_working_directory adoption guidance and the +# schedule-creation pre-approval flow added in approval-policy-v2. + +# Positive: project-scoped prompt mentions a repo path. Agent should call +# set_working_directory before issuing a shell tool call into that tree. +# Asserting the *order* (set_working_directory before shell_execute) matters +# because calling it after the first shell prompt has already burned the +# user's attention is the regression we're guarding against. +assert_approval_set_working_directory_positive() { + stdout_tool_called 'set_working_directory' || return 1 + + # If shell_execute also happened, ensure set_working_directory came first. + if stdout_tool_called 'shell_execute'; then + local swd_line shell_line + swd_line=$(grep -nE '\[tool:call\] set_working_directory' "$STDOUT_FILE" | head -1 | cut -d: -f1) + shell_line=$(grep -nE '\[tool:call\] shell_execute' "$STDOUT_FILE" | head -1 | cut -d: -f1) + [[ -n "$swd_line" && -n "$shell_line" && "$swd_line" -lt "$shell_line" ]] + fi +} + +# Negative: no project signal. Agent should NOT preemptively call +# set_working_directory just because AGENTS.md mentions it. +assert_approval_set_working_directory_negative() { + ! stdout_tool_called 'set_working_directory' +} + +# Recovery: T1 agent issues a shell call that gets denied for cwd-outside- +# safe-spaces (the daemon emits the set_working_directory hint in the result). +# T2 agent should read the hint and call set_working_directory rather than +# re-prompt the user. +# +# Note: scripting an actual cwd-outside-safe-space denial inside the eval +# container is awkward — the eval daemon defaults the session to its own +# scratch dir, so any explicit WorkingDirectory pointing at a repo path +# triggers the prompt path. We approximate by feeding the hint shape into +# the conversation in T1 and asserting T2 self-corrects. +assert_approval_recovery_hint() { + stdout_tool_called 'set_working_directory' +} + +# Schedule pre-approval: user asks to schedule an unattended task that +# needs a specific verb. Agent should suggest a global pre-approval and +# (with confirmation) issue `netclaw approvals trust-verb ` via +# shell_execute before completing schedule setup. +assert_approval_schedule_pre_approval() { + stdout_contains '\[tool:call\] shell_execute' && \ + stdout_contains 'netclaw approvals trust-verb' && \ + stdout_contains 'freshdesk' +} + # ─── Case & Category Runner ────────────────────────────────────────────────── print_category() { @@ -1399,6 +1458,31 @@ run_all() { "Without using any tools, answer exactly in this format and nothing else: deploy=; block=." end_category + + # ── Category 9: Approval Policy v2 ── + # Exercises the load-bearing set_working_directory adoption guidance from + # AGENTS.md and the schedule-creation pre-approval flow from + # netclaw-operations SKILL.md. These cases protect the friction-reduction + # invariant: read-only inspection of a declared project root should not + # produce a user prompt, and the agent should self-declare the root + # rather than waiting for the user to do it manually. + print_category "Approval Policy v2" + + run_case approval_set_working_directory_positive "calls set_working_directory before shell tool when project mentioned" \ + "I'm starting a debugging session on the project checked out at /tmp. Get oriented in that codebase — look at the layout, identify build files, and figure out what kind of project it is. We'll be running multiple shell commands across the tree." \ + "I want to start working on the Netclaw checkout at /tmp. Plan to run several commands across that tree — start by getting yourself oriented." + + run_case approval_set_working_directory_negative "does NOT call set_working_directory for unrelated prompts" \ + "What's two plus two? Just give me the number." \ + "Explain what a hash table is in one sentence." + + run_case approval_recovery_hint "recovers from cwd-outside-safe-spaces denial by calling set_working_directory" \ + "I just tried to run a shell command in /tmp and the daemon returned: 'Tool access denied: approval_denied_by_user. Hint: \"/tmp\" is outside the session'\\''s trusted scope. Call set_working_directory \"/tmp\" first, then retry — that brings the directory into your trusted scope so the approval policy can reason about it.' How should I unblock this so the next shell call works?" + + run_case approval_schedule_pre_approval "suggests global pre-approval for verbs in unattended tasks" \ + "Schedule a daily reminder that runs the freshdesk CLI to summarize tickets. The reminder fires unattended and won't be able to answer approval prompts, so the verb needs to be globally pre-approved before the schedule fires. Call netclaw approvals trust-verb freshdesk via shell_execute as part of the setup." + + end_category } # ─── Main ───────────────────────────────────────────────────────────────────── diff --git a/feeds/skills/.system/files/netclaw-operations/SKILL.md b/feeds/skills/.system/files/netclaw-operations/SKILL.md index ecb2f3ca..e26f0a5b 100644 --- a/feeds/skills/.system/files/netclaw-operations/SKILL.md +++ b/feeds/skills/.system/files/netclaw-operations/SKILL.md @@ -3,7 +3,7 @@ name: netclaw-operations description: "REQUIRED when the user asks about scheduling, reminders, cron jobs, timers, background jobs, diagnostics, troubleshooting, MCP tools, daemon health, identity updates, or Netclaw capabilities and self-maintenance." metadata: author: netclaw - version: "1.28.0" + version: "2.1.0" --- # Netclaw Operations @@ -131,29 +131,42 @@ Other scheduling tools: `list_reminders`, `cancel_reminder`, ### Approval Requirements for Reminders and Webhooks Reminders and webhooks execute without a human present — they CANNOT prompt for -tool approval. If a reminder needs `shell_execute` or file tools, those command -patterns must be pre-approved in `~/.netclaw/config/tool-approvals.json` BEFORE -the reminder fires. +tool approval. The cwd at firing time will not match any cwd a user clicked +"Always here" for during interactive use, so folder-scoped approvals will not +match. -**Before creating a reminder that uses shell commands:** -1. Identify what commands the reminder will need (e.g. `git pull`, `curl`, - `cat /var/log/app.log`) -2. Run those commands interactively in the current session — this triggers the - approval prompt and persists the grant -3. Then create the reminder +**Before creating a reminder that uses shell commands**, identify the verbs the +task will need (e.g. `freshdesk`, `curl`, `git pull`) and pre-approve them as +global wildcards. Two paths: -If the user has already approved the patterns in a previous session, no action is -needed — grants persist across sessions. +1. **Suggest `trust-verb` from the agent.** When you (the agent) are helping the + user set up a scheduled task, identify the verbs the task will need and ask + the user before pre-approving each one. Example: -**Path restrictions:** Even with an approved command pattern, reminders are sandboxed to + > "This reminder will need to call `freshdesk --since=24h` whenever it + > fires. Mind if I pre-approve `freshdesk` as a global verb so the reminder + > can run unattended? I'll do this with + > `netclaw approvals trust-verb freshdesk`." + + On confirmation, run the trust-verb command via `shell_execute`. The grant + becomes a `(verb, null)` entry — auto-approved for any cwd. + +2. **Operator runs the CLI directly:** `netclaw approvals trust-verb `. + Same outcome; useful when the user already knows what they want. + +If the user has already trusted the verb in a previous session, no action is +needed — `(verb, null)` grants persist in `tool-approvals.json` across daemon +restarts. + +**Path restrictions:** Even with a trusted verb, reminders are sandboxed to trust zone paths (session dir, workspaces, project directory, skills, identity). -A reminder approved for `cat /srv/app/log.txt` can read that file inside trust -zones but NOT arbitrary system paths like `/etc/shadow`. If a reminder needs -access outside trust zones, the user must configure additional trusted roots. +`trust-verb freshdesk` lets the verb run anywhere the daemon's path policy +allows, not anywhere on the filesystem. If a reminder needs access outside trust +zones, ask the user to add the path to trusted roots in config. -**If a reminder fails with `command_not_pre_approved`:** The command pattern was -not in the approval store. Run the command interactively to trigger approval, -then the next reminder firing will succeed. +**If a reminder fails with `command_not_pre_approved`:** The verb is not in the +approval store as a global wildcard. Run +`netclaw approvals trust-verb ` and the next firing succeeds. **If a reminder fails with `path_outside_trust_zone`:** The command targets a path outside the allowed roots. Either move the target into a workspace, or ask @@ -277,50 +290,95 @@ set. ## Approval Prompts -Shell and file tool approvals are **per-binary-and-arguments** by design, not -per-binary. `sleep 5` and `sleep 10` are distinct approval patterns. So are -`rm foo.txt` and `rm bar.txt`, and `kill 12345` and `kill 67890`. This is not -a bug — it is the security gate. - -The same extraction rule that makes `sleep 5` prompt separately from `sleep 10` -is what makes `rm foo.txt` prompt separately from `rm ~/.netclaw/netclaw.db` -and `kill 12345` prompt separately from `kill $(pgrep netclawd)`. Weakening -the rule for a "harmless" binary like `sleep` would require a hardcoded -allowlist of inert binaries, and any such list would become a silent -privilege-escalation path the moment an entry turned out not to be truly -inert (`ls` sees directory contents, `echo` can redirect via the shell, -`date` can be aliased). **Do not propose an inert-binary bypass list.** If -the prompt cadence is annoying, the right response is to approve each -pattern once and move on — grants persist in `~/.netclaw/config/tool-approvals.json` -so the noise is bounded. - -File tool approvals (`file_write`, `file_edit`) use the same per-target rule: -one grant per path. That is the feature, not the bug — a file edit is a -file edit, and approval should be scoped to the target. - -If a user asks why they're being prompted so often, explain the security -tradeoff and point them at `netclaw approvals` for auditing and trimming -their persistent grants. - -### Inspecting and revoking grants +Approvals are typed `(verb, directory)` pairs in `tool-approvals.json`: + +- **verb** — the command head plus subcommand chain only (e.g. `git push`, + `grep`, `freshdesk`). No flags, no path arguments. +- **directory** — the directory the grant applies to. Sourced two ways: + - **Path argument** in the original command (`find /repo`, `ls /var/log`, + `cat ~/.bashrc`). The path argument is the directory; for file targets + the parent directory is used so `cat ~/.bashrc` scopes to `~`. + - **Cwd** when no path argument is present (`git status`, `freshdesk`). + - **`null`** for the global wildcard ("approve this verb in any + directory") — only set by `Always anywhere`. + +**Folder-scoped trust compounds.** An entry on `(find, /home/user/repo)` +auto-allows `find /home/user/repo/.netclaw -name X` because the candidate's +extracted path is under the entry's directory. You don't have to call +`set_working_directory` for this — running a command with a path argument +declares scope implicitly. + +The approval gate runs three layers in order: + +1. **Hard-deny list** — system-protected paths. Always blocks. +2. **Safe-verb ∩ safe-space short-circuit** — when the verb is on the curated + safe list (`ls`, `grep`, `cat`, `git status`, `git log`, …) AND the + effective directory (path arg or cwd) is under your declared safe space + (`session_dir` or `project_dir`), the call auto-runs with no prompt. + Mutating verbs (`git push`, `rm`, `sed -i`) are never on the list. +3. **Interactive prompt** — everything else. Five buttons: + - **Once** — run this one time, persist nothing. + - **This chat** — allow the verbs in this directory for the rest of the + session. + - **Always here** — persist `(verb, effective directory)`. The + "directory" is the command's path argument when present, else cwd. + - **Always anywhere** — persist `(verb, null)` global wildcard. + Danger style. + - **Deny** — refuse this call only. + +**Side-effect-only clauses are authorized but not persisted.** When a +compound command includes pure side-effect verbs (`echo`, `printf`, `:`, +`true`, `false`) with no path argument and no redirect, those clauses are +authorized for the current call by the click but no `ApprovalEntry` is +written for them. Recording every literal `echo "==="` would be noise. + +**Why you may not see a prompt at all.** If the user invokes a read-only verb +(say `grep`) with a path argument under a tree the operator has previously +trusted, the safe-verb short-circuit applies and there is no prompt. This +is intended behavior — read-only inspection of declared work surfaces is +implicit. Mutating verbs in the same directory still prompt. + +**When the prompt offers fewer buttons.** Two cases: + +- **Complex commands** (bash control-flow like `for/while/done`, unbalanced + quotes/brackets) get only `Once` and `Deny`. The matcher cannot extract a + clean verb chain to remember, so persistence is structurally impossible. +- **Shallow cwd** (e.g. `/etc/`, `/`) hides `Always here` only. Persisting a + too-shallow root would grant the verb across most of the filesystem; + `This chat` and `Always anywhere` remain available. + +If a user keeps getting prompted in their repo on read-only verbs, the +likely cause is the commands they're running don't carry a path argument +(e.g. `git status` with no `-C`). Suggest they call +`set_working_directory ` so the safe-verb short-circuit treats that +tree as a safe space. If they keep getting prompted for the same mutating +verb (e.g. `git push`), suggest `Always here` to persist +`(git push, effective directory)`. + +### Inspecting, revoking, and pre-approving grants Use the `netclaw approvals` CLI rather than hand-editing `tool-approvals.json`. The daemon reads the file on every approval check, so -revocations take effect on the next prompt without a daemon restart. +mutations take effect on the next prompt without a daemon restart. ```bash -# Interactive TUI: see everything grouped by audience and tool, revoke with R +# Interactive TUI: see everything grouped by audience and tool netclaw approvals -# List only — human-readable +# List — human-readable. Entries print as " in " or " anywhere". netclaw approvals list netclaw approvals list --audience personal --tool shell_execute -# Scriptable JSON output (audiences → tools → patterns) +# Scriptable JSON output (audiences → tools → typed entries) netclaw approvals list --json -# Remove an exact match (case-sensitive on POSIX, insensitive on Windows) -netclaw approvals revoke "git push" --audience personal --tool shell_execute +# Revoke by user-visible form (the same labels list emits) +netclaw approvals revoke "git remote in /home/user/repos/foo/" +netclaw approvals revoke "freshdesk anywhere" + +# Pre-approve a verb as a global wildcard for unattended/scheduled tasks +netclaw approvals trust-verb freshdesk +netclaw approvals trust-verb gh --audience team # Clear every entry for a tool (optionally scoped to one audience) netclaw approvals revoke --tool shell_execute --all @@ -328,13 +386,35 @@ netclaw approvals revoke --tool shell_execute --all --audience personal ``` `revoke` of a non-existent pattern exits non-zero with a clear message — the -CLI never silently succeeds. +CLI never silently succeeds. `trust-verb` is idempotent — re-running it on an +existing entry exits zero with "no changes." + +### Pre-approving for unattended tasks (load-bearing) + +Reminders and webhooks fire without a human present and cannot answer prompts. +When you (the agent) are helping the user set up an unattended task that needs +shell commands, **identify the verbs the task will need and proactively suggest +pre-approving them as global wildcards** before the schedule fires. + +Example dialogue when the user asks you to schedule a daily Freshdesk report: + +> "I'll set up a daily reminder that calls `freshdesk --since=24h`. Since +> reminders run unattended and can't prompt for approval, I need to pre-approve +> the `freshdesk` verb globally — that's a `(freshdesk, null)` entry, meaning +> it will auto-allow in any cwd. Mind if I do that with +> `netclaw approvals trust-verb freshdesk`?" + +On confirmation, run the trust-verb command via `shell_execute`, then create +the reminder. The grant persists across daemon restarts. ### Last-resort recovery If the approval file gets corrupted (the daemon will quarantine it to -`tool-approvals.json.invalid` and warn loudly), or if you want to wipe every -persistent grant and start clean, delete the file directly: +`tool-approvals.json.invalid` and warn loudly), or if a v1 store gets detected +during upgrade (the daemon quarantines it to `tool-approvals.json.v1.bak`), +the active file is reset and the v2 store starts empty. + +To wipe every persistent grant and start clean, delete the file directly: macOS/Linux: diff --git a/openspec/changes/approval-policy-trust-zones/.openspec.yaml b/openspec/changes/approval-policy-trust-zones/.openspec.yaml new file mode 100644 index 00000000..ac20efa6 --- /dev/null +++ b/openspec/changes/approval-policy-trust-zones/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-10 diff --git a/openspec/changes/approval-policy-trust-zones/design.md b/openspec/changes/approval-policy-trust-zones/design.md new file mode 100644 index 00000000..a3a36718 --- /dev/null +++ b/openspec/changes/approval-policy-trust-zones/design.md @@ -0,0 +1,523 @@ +## Context + +The current v2 approval architecture (this codebase, never shipped) stores +`(verb, directory)` cross-product entries in a single per-audience store. The +matcher anchors decisions on the spawn cwd, which collides with the user's +mental model whenever a compound command like `cd /target && cmd` is involved +— the user thinks "approve in `/target`," the matcher records "approve in +`session_dir`." + +The session-cwd capability today defines `WorkingContext.ProjectDirectory` +(set via the `set_working_directory` tool) as the load-bearing input to the +approval gate's safe-space root set. Live evidence shows the agent rarely +calls this tool and even when it succeeds, the agent defensively prepends +`cd && ...` to every shell call anyway — meaning the +`ProjectDirectory` mechanism is paying a complexity cost (auto-injection +of project-context files, cwd reasoning in the matcher, persistence in +`SessionSnapshot`) for behavior the agent doesn't actually rely on. + +The shell parser (`Netclaw.Security.ShellTokenizer`) is regex-based and +produces flat token lists. Adding the per-verb path-extraction rules, +cd-in-compound propagation, and redirect-target detection that the new gate +model needs would essentially require building a small AST inside Netclaw. +That work is parser-shaped, not policy-shaped, and belongs in a focused +library rather than embedded in the security namespace. + +Stakeholders: +- Daemon operators configuring trust profiles per audience. +- End users (Slack/Discord) responding to approval prompts. +- Agent runtime (LLM sessions) observing project-context lookup discipline. +- Future Netclaw consumers reusing ShellSyntaxTree. + +## Goals / Non-Goals + +**Goals:** + +- Two independent decision axes (geography, action) with two independent + persistence stores. Either axis can be granted without entangling the other. +- Trust as configuration, not state. Operators declare baseline; users extend + via prompts; agents never widen trust by issuing commands. +- Read-only verbs auto-pass *only inside trusted zones*. Outside-zone access + always prompts regardless of verb safety. +- Sequential 4-button prompts on both gates. Identical button shape. +- Externalized shell parser (`ShellSyntaxTree`) consumed via `IShellParser`. + Bash-first, multi-shell-ready. +- Project context discovered on demand by the agent via explicit lookup + discipline, not auto-injected by the daemon. + +**Non-Goals:** + +- PowerShell parsing in v0.1 of `ShellSyntaxTree` (interface seam present; + concrete impl deferred). +- Migration tooling for existing `tool-approvals.json` shapes (v2 unshipped). +- Per-zone audit log with retention controls (current TUI list is + sufficient for MVP). +- Cross-audience zone inheritance (each audience configures its own zones). +- Auto-promoting agent-issued `cd` to a trusted zone (explicitly rejected + on security grounds). + +## Decisions + +### Decision 1: Two-gate composition over `(verb, directory)` cross-product + +**Choice:** Maintain two independent per-audience stores +(`trustedZones`, `verbPatterns`); evaluate each gate independently; both must +pass for silent execution. + +**Rationale:** The cross-product encodes a coupled decision the user +doesn't actually make. When a user clicks "Always for `git push` here," they +mean *either* "I trust this directory" *or* "I trust the `git push` shape +generally" — the v2 store conflates them. Splitting allows precise grants: +trust `/etc/nginx` without granting any verb; trust `git push *` without +naming a directory. + +**Alternatives considered:** +- *Keep `(verb, directory)` shape, fix matcher header semantics only.* Doesn't + solve dead-on-arrival session-dir entries or the cross-product confusion. +- *Single combined store with optional fields.* Simpler schema, but the + evaluation logic still has to branch on what's present, which is the same + cost as two stores with cleaner semantics. + +### Decision 2: Colocate stores in a single `tool-approvals.json` per-audience + +**Choice:** One file, per-audience top-level keys, each containing +`verbPatterns` and `trustedZones` arrays: + +```json +{ + "personal": { + "verbPatterns": ["git push *"], + "trustedZones": ["/etc/nginx"] + }, + "team": { + "verbPatterns": [], + "trustedZones": ["/opt/shared"] + } +} +``` + +**Rationale:** Both stores have the same lifecycle (mutated by prompt clicks, +read by gate evaluator). Two files would split a single conceptual +"runtime approval state" into two atomic-write units. Per-audience grouping +keeps the operator's mental model intact (today's audiences in `netclaw.json` +already group their config). + +**Alternatives considered:** +- *Two separate files (`tool-approvals.json` + `trusted-zones.json`).* + Sharper separation of concerns; doubles atomic-write coordination on + prompt response. +- *Mutate `netclaw.json` trust profiles directly.* Mixes operator-declared + static config with prompt-extended runtime state in one file. Loses the + static/state distinction. +- *Inside `tool-approvals.json` with sectioned schema.* Chosen. + +### Decision 3: Glob verb pattern format + +**Choice:** Verb patterns stored as glob strings (`git push *`, +`rm /tmp/*`, `dotnet test *`). + +**Rationale:** Globs allow geography-conditional grants on the verb pattern +itself (`rm /tmp/*` allowed; `rm /home/*` denied). Matches OpenCode's +validated approach. Future-proof for more expressive patterns +(e.g., `git push origin *`). + +**Alternatives considered:** +- *Verb-chain only (`git push`).* Simpler matcher; geography conditioning + lives entirely in the zone gate. Loses expressiveness for cases like + "always allow `rm` under `/tmp`." +- *Hybrid (verb-chain stored, glob optional).* Forks the matcher code path + for marginal benefit; choose one and commit. + +### Decision 4: Sequential 4-button prompts, both gates identical shape + +**Choice:** When a single shell call hits both gates, fire two prompts +back-to-back. Each uses `[Once / Session / Always / Deny]`. Same shape on +both gates; only the question text differs. + +**Rationale:** Batched prompts for two independent persistence axes require +a 2D button matrix (zone-scope × verb-scope) that exceeds Slack's block +budget and overwhelms the user's working memory. Sequential keeps each +decision unambiguous. The 4-button row has been UX-validated at v2 already; +the 5-button v2 was driven by the (here / anywhere) cross-product which we +explicitly killed. + +Worst-case prompt count is 2; common case (one or both gates pre-trusted) +is 0–1. + +**Alternatives considered:** +- *Batched single prompt with combined buttons.* Densest UI, + conceptually muddy ("this button extends both stores from one click"). +- *Adaptive per channel.* Doubles render code paths and diverges the user's + mental model across channels. +- *Drop the Session scope.* Loses the "just for this task" middle ground that + users want when granting experimentally. + +### Decision 5: Session-scoped grants in-memory on `LlmSessionActor` + +**Choice:** Session-scope grants live in two segments +(`SessionTrustedZones`, `SessionVerbPatterns`) on `LlmSessionActor` instance +state. Not persisted to disk. Garbage-collected when the actor terminates. + +**Rationale:** Session-scope means "for this conversation only." Persisting +to disk would invite stale grants surviving daemon restarts where the user's +intent was clearly bounded. Akka actor lifecycle is the natural lifetime +boundary. + +**Failure modes:** +- *Daemon restart mid-session.* Session-scope grants lost; user re-prompted + if they re-issue the same command. Acceptable — daemon restart is a + significant event and re-prompt is cheap. +- *Actor recovery from snapshot.* Snapshots don't include session-scope + grants by design (they're explicitly in-memory-only). Recovery starts + fresh; same re-prompt behavior as restart. + +### Decision 6: Per-call approval workflow as actor-internal state machine + +**Choice:** Add a `ToolApprovalWorkflow` value type to `LlmSessionActor` per +in-flight approval. State transitions: `Start → ZoneGate → VerbGate → Complete`. +Workflow is purely local to the call; no cross-call coordination. + +``` +ToolApprovalWorkflow: + Call: ToolCall + Paths: List // extracted from AST + ZoneState: ApprovalScope? // null until prompt 1 resolves + VerbState: ApprovalScope? // null until prompt 2 resolves + Stage: Start | ZoneGate | VerbGate | Complete +``` + +**Rationale:** Encapsulates the state cleanly. No new actors needed — +existing `ToolApprovalRequest` / `ToolApprovalResponse` messages serialize +twice in worst case. The workflow record is small and lives alongside the +existing per-call pending-state. + +**Failure modes:** +- *User takes >watchdog timeout on either prompt.* Watchdog is paused for + the entire approval flow (across both prompts), same pattern as v2. +- *Approval response received for stale prompt (e.g., user clicks Slack + button after daemon restart).* Workflow state is lost on restart; + late-arriving responses are ignored with a log entry. Same as v2. +- *Concurrent tool calls each in their own workflow.* Each LlmSessionActor + call has its own workflow instance; no shared mutable state between + workflows on the same actor. + +### Decision 7: Externalize shell parser to `ShellSyntaxTree` repo + +**Choice:** Spin out a new repo at `github.com/Aaronontheweb/ShellSyntaxTree` +publishing a NuGet package of the same name. Bash-first, multi-shell-ready +via `IShellParser` interface. Develop in parallel with sibling +`` during this change; swap to `` once +v0.1 publishes. + +**Rationale:** Bash parsing is a generic capability with broader applicability +than Netclaw's security gates. Separating it improves test focus (parser +tests live with the parser, not interleaved with policy tests), allows +independent versioning, and avoids growing Netclaw's attack surface with +parsing logic that has nothing to do with Akka actors or approval policy. + +**Alternatives considered:** +- *tree-sitter-bash via P/Invoke.* Highest correctness ceiling but the .NET + binding ecosystem is thin; native libs would need shipping per platform + (Linux x64/arm64, macOS x64/arm64, Windows x64), and PowerShell would + need a second native library. Rejected: packaging cost outweighs ceiling + gain for our use case. +- *Hand-roll an AST inside `Netclaw.Security`.* Bloats the security namespace + with parser code that other consumers might want; couples Netclaw releases + to parser bug fixes. +- *Iterate on existing regex `ShellTokenizer`.* Lowest ceiling; doesn't + resolve the structural-vs-flat-tokens limitation that the new gate model + requires. + +**Failure modes:** +- *ShellSyntaxTree v0.1 not yet published when Netclaw needs it.* Sibling + `` during dev unblocks; CI gating on package publish + comes after v0.1 is up. +- *Parser misextracts paths from a novel shell idiom.* Gate evaluator + defaults to the safe behavior (treat as untrusted, prompt). Failure + mode is "user sees an unnecessary prompt," not "agent silently + bypasses approval." +- *Dynamic-content paths (`$VAR`, unresolved expansion).* Parser flags as + dynamic; gate evaluator treats as path-arg-less. Better to under-extract + (and let the verb gate handle it) than to misextract a literal `$VAR/foo` + as a path. + +### Decision 8: Glob semantics — recursive zones, BashArity-aware verb patterns + +**Choice:** `trustedZones` globs use path-prefix recursive semantics: +`/*` matches `` itself plus any descendant at any depth, with +boundary-safe matching (`~/repos/*` does NOT match `~/repossecret`). +`verbPatterns` globs split into a verb-chain prefix (length determined +by `BashArity`) and a trailing arg-glob suffix: `git push *` matches +`git push origin main` but not `git pull origin main`. + +**Rationale:** Path-prefix recursion is what users mean when they say +"trust this folder." Single-segment globs (`*` not recursive) require +operators to learn the `**` convention for the common case. Verb +patterns need verb-chain awareness because BashArity already tells us +where the verb ends and args begin — leveraging it for matching is +free. + +**Alternatives considered:** +- *Shell-glob semantics for zones (single segment, `**` for recursive).* + Standard but adds cognitive load for the dominant case. +- *Verbatim zones (no globbing).* Loses the "trust everything under" idiom. +- *Full-string glob over command text for verbs.* Brittle to spacing + and quoting; couples matching to the renderer. + +### Decision 9: Hard-deny defaults compiled in, additive overrides only + +**Choice:** Ship hard-deny rules as immutable C# data +(`HardDenyDefaults`). Operators add to them via +`~/.netclaw/config/hard-deny-overrides.json` which is strictly additive. +Rules use a JSON-structured DSL with verb+args predicates and an explicit +`rawText` escape for shape-shaped patterns (fork bombs etc.). + +**Rationale:** The operative threat is prompt injection — the agent +following injected instructions to lift its own constraints. Compiled +defaults can't be removed by editing a config file. Additive-only +overrides mean the agent (or a hostile operator) can only make the +rules *stricter*, never weaker. Structured DSL gives precise matching +on the AST; rawText escape handles the few cases where shell syntax +isn't verb-shaped. Operator-editable JSON preserves the ability to add +custom rules without recompilation. + +**Alternatives considered:** +- *All rules in operator-editable file.* Trades security for flexibility + the wrong way; agent edits the file and lifts constraints. +- *All rules compiled in (no overrides).* Maximum agent-resistance; + loses operator flexibility entirely. +- *Structured matching with no rawText escape.* Cannot represent + fork-bomb-shaped patterns precisely. + +**Failure modes:** +- *Override file shadows a default.* Doctor check refuses startup with + loud error; daemon doesn't run. +- *Malformed override rejected at parse.* Daemon logs the rejection and + continues with shipped defaults intact. + +### Decision 10: Security-critical config protection via existing `ToolPathPolicy` + +**Choice:** Extend the existing `ToolPathPolicy` write-deny and shell-deny +lists in `Program.cs` to include `paths.ConfigDirectory` (the entire +`~/.netclaw/config/` tree). No new mechanism, no new categories — reuse +the symlink-resolving, hard-deny path policy that already exists. + +**Rationale:** `ToolPathPolicy` is the right shape: hard-deny (no prompt), +symlink-aware, applied to every tool that touches paths +(`FileWriteTool`, `FileEditTool`, `ShellTool`). The daemon already uses +it to protect credentials, the SQLite DB, the lock and PID files. Adding +the config directory to the same lists closes the prompt-injection gap +where an injected payload could instruct the agent to rewrite +`tool-approvals.json` and grant itself global trust. + +Operators retain agency: they edit config files in their own editor or +via dedicated `netclaw approvals` / `netclaw audience` CLI commands. The +deny only governs *agent tool calls*, not the host filesystem. + +**Alternatives considered:** +- *New "security-critical write" hard-deny category in the rule DSL.* + More machinery for the same effect; reusing `ToolPathPolicy` is + smaller and battle-tested. +- *Prompt with Once/Deny only for config writes.* User can mistakenly + approve; doesn't close the prompt-injection gap firmly. + +**Failure modes:** +- *Operator workflow that legitimately wanted the agent to edit + `~/.netclaw/config/`.* Forces operator to use external editor or CLI + instead. Acceptable trade — config edits are infrequent and security + outweighs the friction. + +### Decision 11: Multi-path zone prompt with trust-all-or-nothing + +**Choice:** When a clause has multiple untrusted paths, batch them into +one zone prompt with a single `Trust all listed (N)` button. No per-path +checkboxes, no sequential per-path prompts. Same 4-button shape as the +single-path case. + +**Rationale:** The 4-button row maps cleanly to text-only channels via +fixed positional letters `A=Once / B=Session / C=Trust|Always / D=Deny`. +Per-path checkboxes don't exist in text mode; sequential per-path +prompts produce prompt-storms when N is large. Trust-all keeps the +choice space at exactly 4 letters always, regardless of how many paths +are listed. Operators wanting partial trust fall back to the CLI +(`netclaw approvals trust-zone `) which is one shell command. + +**Alternatives considered:** +- *Per-path checkboxes with `Trust selected` button.* Doesn't render in + text-only channels; complicates the future text-mode adapter. +- *Sequential per-path prompts.* N prompts when N paths are untrusted; + user fatigue at scale. + +### Decision 12: Parser anomaly safe-fail + +**Choice:** When `ShellSyntaxTree` returns an unparseable AST or throws, +the gate evaluator routes to a safe-fail path: hard-deny still consults +both the rawText fallback and any partial AST; zone gate prompts the +user as if the raw command operates on one untrusted path; verb-pattern +gate offers only `Once` and `Deny`. Plus: a Netclaw integration test +gates any `ShellSyntaxTree` version bump by running the entire corpus +through the live matcher path. + +**Rationale:** Parser bugs are inevitable. The cost of an extra prompt +is annoyance; the cost of a silent bypass is a security incident. Safe- +fail biases toward annoyance. The integration test gate ensures +parser-version-bump PRs visibly demonstrate matcher behavior across the +corpus before they merge. + +**Alternatives considered:** +- *Default-to-deny on parser failure.* Safest possible posture; UX + cost too high (any novel shell construct hits a wall). +- *Default-to-prompt only (no integration gate).* Same fallback + behavior; relies on review discipline rather than enforced check. + +### Decision 13: Project context via on-demand `file_read`, not auto-injection + +**Choice:** Delete the `project-instructions` capability's auto-injection +machinery. Add explicit lookup discipline to `Resources/AGENTS.md` instructing +the agent to read `.netclaw/AGENTS.md` → `AGENTS.md` → `CLAUDE.md` → +`CONTEXT.md` once per project per session via `file_read`. + +**Rationale:** Auto-injection costs ~6k tokens per session (observed in +dogfood) and depends on `WorkingContext.ProjectDirectory`, which the agent +rarely sets. On-demand reading is cheaper, follows the same lookup order, +and survives the deletion of `ProjectDirectory` cleanly. + +**Failure modes:** +- *Agent forgets to read project context.* Same effective behavior as a + session today where `ProjectDirectory` is unset (no project content). The + AGENTS.md guidance frames this as a discipline rather than a guarantee. +- *Project file changes mid-session.* On-demand read happens once at + project entry; subsequent changes aren't seen until the agent re-reads. + Same as v2 (which only re-injected on `set_working_directory` calls). + +## Risks / Trade-offs + +- **[Risk]** Two-prompt sequential UX feels chatty for users who haven't + pre-trusted anything. → **Mitigation:** Common case (read-only verb in + trusted zone) hits zero prompts. Worst case (mutating verb in untrusted + dir) hits two prompts but each has a 4-button row that includes "Always" + to amortize. Track prompt-count metrics post-launch. + +- **[Risk]** ShellSyntaxTree v0.1 ships with parser bugs that affect + approval correctness. → **Mitigation:** Test corpus seeded from sanitized + real-agent emissions provides regression coverage. Gate evaluator's + default-to-prompt fallback turns parser misses into "extra prompt," not + "silent bypass." + +- **[Risk]** Operators have v2-shape `tool-approvals.json` from running + pre-release Netclaw. → **Mitigation:** Wipe-and-regenerate on first start + with the new schema; communicate in release notes. v2 was never shipped + beyond development environments. + +- **[Risk]** Removing `set_working_directory` and `WorkingContext.ProjectDirectory` + may surface dependencies we haven't enumerated (e.g., a CLI command, a + context layer, a snapshot field). → **Mitigation:** Implementation tasks + include a "find all references" sweep; eval suite covers the agent + behaviors that previously depended on these. + +- **[Trade-off]** Session-scope grants don't survive daemon restart. Users + who granted a Session-scope mid-task and restart the daemon will be + re-prompted. Acceptable — daemon restart is uncommon and re-prompt + preserves the user's intent boundary ("for this conversation"). + +- **[Trade-off]** External dependency on `ShellSyntaxTree` introduces a + release coordination burden (Netclaw release blocked on parser package + availability). Sibling `` during dev mitigates this + for the rewrite itself; future Netclaw releases consuming new parser + features will need coordinated publishes. + +- **[Trade-off]** Single per-audience file (`tool-approvals.json`) means + operators wanting to back up just zones (or just verbs) must edit JSON + by hand. Acceptable — TUI provides per-axis visibility and revoke; raw + file is for advanced operators only. + +- **[Risk]** Prompt-injection attack instructs the agent to lift its own + constraints by editing security-critical config files + (`tool-approvals.json`, `hard-deny-overrides.json`, `netclaw.json`) or + the daemon binary itself. → **Mitigation:** Hard-deny defaults compiled + into the binary (Decision 9) cannot be removed by config edits. + `ToolPathPolicy` extension to cover `~/.netclaw/config/` (Decision 10) + blocks the actionable mechanism — agent's `file_write`, `file_edit`, + and `shell_execute` clauses targeting these paths are hard-denied + before the three-layer gate is consulted, with no approval prompt + offered. Operators retain agency via their own editor or dedicated + CLI commands that bypass the agent's tool-call path. + +- **[Trade-off]** Operators who would have used the agent to edit + `~/.netclaw/config/` files now must use their own shell or CLI. + Acceptable — config edits are infrequent and the security tightening + outweighs the friction. The dedicated `netclaw approvals` / + `netclaw audience` CLI commands handle the common cases. + +## Migration Plan + +**Pre-deployment (within this change):** + +1. Stand up `github.com/Aaronontheweb/ShellSyntaxTree` repo with v0.1 + skeleton (lexer + parser + AST + corpus). Tag v0.1.0-alpha for + PackageReference once stable. +2. Develop Netclaw rewrite in parallel against sibling ``. +3. Once both stable, switch Netclaw's `.csproj` to `` for + `ShellSyntaxTree`. +4. Run eval suite end-to-end before merge. + +**Deploy:** + +- Single daemon restart with new binary. +- On first start, `tool-approvals.json` is read; if v2-shape detected, + archive to `.v2-discarded.bak` and start with empty new-shape file. +- No data migration; users re-approve as they hit prompts. + +**Rollback:** + +- Revert binary; restore `.v2-discarded.bak` to `tool-approvals.json`. +- v2 schema and code paths are removed in this change, so rollback is a + full revert (no partial fallback). + +## Open Questions + +1. **Eval suite coverage for two-gate transitions.** Current evals cover + single-prompt v1 behavior. Need new cases for: zone-then-verb sequencing, + read-only-in-trusted-zone silent path, multi-path zone batching, session + vs always scope persistence. Captured in `tasks.md`. + +2. **Multi-audience handling at evaluation time.** A session is bound to one + audience (per identity); the matcher reads only that audience's stores. + Confirm this matches the existing `IToolApprovalMatcher` contract — likely + it does, but explicit verification during implementation. + +3. **Zone glob matcher precedence.** When `trustedZones` contains both + `/home/user/repos/*` and a more-specific `/home/user/repos/secret`, + precedence rules need definition (probably "any match passes" given + trust is additive, but worth a scenario in the spec). + +4. **TUI revoke semantics for partial state.** If a user revokes + `/etc/nginx` from `trustedZones`, does it also affect any in-memory + session-scope grants in active sessions? Probably no (session-scope is + a separate axis), but spec should be explicit. + +5. **Approval timeout per-prompt vs shared workflow.** Default proposed: + fresh 5-min clock per prompt (zone and verb prompts each get their own + timer). Alternative: single 5-min clock spans the whole workflow. + Per-prompt is more forgiving; shared is more strict. Confirm during + implementation. + +6. **`~` expansion in zone globs.** Default proposed: expand to the + daemon-process user's home directory at glob-load time. Alternative: + expand at evaluation time per-call (handles unusual cases where the + daemon changes its effective user). Per-load is simpler and matches + how the existing audience trust profile reads home paths. + +7. **Glob escape for literal `*` in path/pattern strings.** Deferred — + no observed need. If a user genuinely wants to trust a directory + literally named with an asterisk, they can use the CLI to add it + directly to `trustedZones` after escape-quoting. + +8. **`ToolInteractionRequest.Stage` field vs `Kind`-encoded stage.** + Default proposed: add a `Stage` enum field (`Zone | Verb`) so `Kind` + stays `approval`. Future gates (e.g., a hypothetical "Layer 4 risk + gate") extend cleanly via a new Stage value rather than a new Kind. + +9. **Default TUI tab on `netclaw approvals` invocation.** Default + proposed: open to `[Z]ones` first (geography is the dominant + operator concern). Remember last-used tab as a post-MVP enhancement. diff --git a/openspec/changes/approval-policy-trust-zones/proposal.md b/openspec/changes/approval-policy-trust-zones/proposal.md new file mode 100644 index 00000000..ab09e3c5 --- /dev/null +++ b/openspec/changes/approval-policy-trust-zones/proposal.md @@ -0,0 +1,191 @@ +## Why + +The v2 approval model — a single per-audience store of `(verb, directory)` +cross-product entries — fails in practice. Live dogfood evidence shows three +recurring failure modes: + +1. **Dead-on-arrival entries.** When a session runs commands from + `session_dir` cwd (no path arg), "Always here" persists `(verb, session_dir)` + tuples that can never recur because the session directory is unique per + session. The store fills with garbage that never matches. +2. **Wrong header semantics.** A compound like `cd /target && cmd1 && cmd2` + asks the user *"Approve in ``?"* when the user's mental model + is *"Approve in `/target`."* The matcher anchors on cwd; the user reads on + the cd target. Mismatch. +3. **Conflated axes.** "Where is it safe to operate?" (geography) and "What + actions are safe to take?" (verb shape) are independent questions that the + `(verb, directory)` cross-product collapses. The user can't grant `git push *` + broadly without also granting it to a specific directory; can't trust a + directory without naming a specific verb. + +The rewrite separates these into two independent gates with two independent +persistence stores, anchors trust on operator-declared configuration rather +than session-mutable state, and externalizes shell command parsing to a +dedicated library to keep Netclaw focused on policy. + +PRD reference: PRD-002 (Gateway Security Envelope) §5 *"Keep trust boundaries +simple and inspectable"* and §"Privileged operations must be explicitly +approved through trusted operator workflow." + +## What Changes + +**Trust zones replace `(verb, directory)` entries.** + +- Two independent per-audience stores colocated in `~/.netclaw/config/tool-approvals.json`: + - `trustedZones`: directory globs declaring where this audience may operate silently. + - `verbPatterns`: command-shape globs (e.g., `git push *`, `rm /tmp/*`) + declaring what command shapes auto-pass within trusted zones. +- **BREAKING** persistence schema: existing `(verb, directory)` entries discarded + on first daemon start. v2 was never shipped beyond development; no migration + written. + +**Three-layer gate replaces verb-pattern-only matching.** + +- Layer 1 (hard-deny): unchanged. +- Layer 2 (zone gate): per-path check against `trustedZones` ∪ audience baseline + ∪ `session_dir`. Outside-zone paths prompt the user. +- Layer 3 (verb-pattern gate): only mutating verbs prompt; read-only verbs + auto-pass *only* inside trusted zones (tightening — no free pass for + read-only outside zones). + +**Sequential prompt UX with identical 4-button rows.** + +- Worst case: 2 prompts per call (zone first, then verb). Common case: 0–1. +- Both gates use `[Once / Session / Always / Deny]`. Identical shape, learnable. +- Multi-path commands batch into one zone prompt listing all untrusted paths. + +**Shell parsing externalized to ShellSyntaxTree library.** + +- New repo: `github.com/Aaronontheweb/ShellSyntaxTree` (NuGet: `ShellSyntaxTree`). +- v0.1: bash-first, multi-shell-ready via `IShellParser` interface. +- Replaces in-tree `Netclaw.Security.ShellTokenizer`. Netclaw consumes the parsed + AST; gate evaluator owns no parser logic. +- Develop in parallel with sibling ``; swap to + `` on v0.1 publish. + +**Removals (BREAKING within this codebase, no shipped impact).** + +- **BREAKING** `set_working_directory` tool deleted. Agent uses `cd` in compound + commands or absolute paths. +- **BREAKING** `WorkingContext.ProjectDirectory` deleted. Cwd no longer factors + into approval matcher logic. +- **BREAKING** Project-context auto-injection deleted. Agent reads + `.netclaw/AGENTS.md` / `AGENTS.md` / `CLAUDE.md` / `CONTEXT.md` on demand + via `file_read` per explicit lookup discipline in `Resources/AGENTS.md`. + +**TUI extension.** + +- `netclaw approvals` gains `[Z]ones` and `[V]erbs` tabs. Single discovery + surface preserved. + +## Capabilities + +### New Capabilities + +None. Trust zones live as a new section within the existing +`tool-approval-gates` capability rather than a standalone capability — the +gates compose at evaluation time and splitting introduces cross-capability +dependencies in the spec without clarifying anything. + +### Modified Capabilities + +- `tool-approval-gates`: wholesale rewrite. New three-layer gate, two-store + persistence, sequential 4-button prompts, ShellSyntaxTree consumption, + TUI Z/V tabs, glob verb pattern format. +- `session-cwd`: **REMOVED**. `WorkingContext.ProjectDirectory` and the + `set_working_directory` tool both deleted. Cwd no longer participates in + approval logic; `psi.WorkingDirectory` for spawned subprocesses falls back + to `SessionDirectory` only. +- `project-instructions`: **REMOVED**. Daemon-side auto-injection of + project identity files deleted. Agent guidance in `Resources/AGENTS.md` + takes over via on-demand `file_read`. + +## Impact + +**In-scope for MVP (this change):** + +- All architecture, persistence, and prompt UX described above. +- ShellSyntaxTree v0.1 (bash) consumed via PackageReference at completion. +- Slack and Discord adapter rendering for the new sequential prompt flow. +- TUI Z/V tabs. +- Resources/AGENTS.md project-context lookup discipline. +- Eval suite updates covering the new gate semantics and project-context flow. + +**Out-of-scope (deferred):** + +- ShellSyntaxTree PowerShell support (v0.x or later — interface seam present + from v0.1 but no `PowerShellParser` in this change). +- Migration tooling for existing `tool-approvals.json` (none needed; v2 + unshipped). +- Per-zone access auditing UI (current TUI list is sufficient). + +**Affected code:** + +- `src/Netclaw.Security/` — `ShellTokenizer`, `ShellCommandPolicy`, + `ShellApprovalSemantics`, `IToolApprovalMatcher`, `ToolApprovalStore`, + `ToolAudienceProfileResolver` all rewritten or removed. +- `src/Netclaw.Actors/Tools/` — `ToolAccessPolicy` rewritten to three-layer + gate; `SetWorkingDirectoryTool.cs` deleted; `ShellTool` cwd resolution + simplified. +- `src/Netclaw.Actors/Sessions/LlmSessionActor.cs` — + `PersistApprovalCandidatesAsync` replaced with two-store persistence; + in-memory session-scope store added; sequential prompt workflow. +- `src/Netclaw.Actors/Sessions/WorkingContext.cs` — `ProjectDirectory` and + `ResolveShellCwd` simplified to session-dir fallback only. +- `src/Netclaw.Channels.Slack/SlackApprovalBlockBuilder.cs` and + `src/Netclaw.Channels.Discord/DiscordApprovalPromptBuilder.cs` — two prompt + shapes, 4-button rows. +- `src/Netclaw.Cli/` — `netclaw approvals` TUI gains Z/V tabs. +- `src/Netclaw.Configuration/Resources/AGENTS.md` — project-context lookup + discipline section. +- `feeds/skills/.system/files/netclaw-operations/SKILL.md` — operator + guidance refresh. + +**Affected APIs / config:** + +- `ApprovalEntry` record retired; replaced by `TrustedZoneEntry` and + `VerbPatternEntry` records with per-audience grouping. +- `tool-approvals.json` schema changes shape (per-audience top-level keys + with `verbPatterns` and `trustedZones` arrays). +- `netclaw.json` audience trust profiles unchanged in shape (baseline zones + already declared); the new operator-extended zones go into + `tool-approvals.json`, not `netclaw.json`. +- New NuGet dependency: `ShellSyntaxTree` (Aaronontheweb). + +**Security impact:** + +Threat model: prompt injection. The agent has tool access (`file_write`, +`file_edit`, `shell_execute`) and follows instructions emitted by content +it reads (web pages, file contents, MCP server output). The defense +gates the *mechanism* an injected payload would use, not the agent's +judgment about whether to follow the payload. + +- Tightening: read-only verbs no longer auto-pass outside trusted zones. + Previous behavior allowed any read-only verb anywhere; new behavior + requires explicit zone trust first. Reduces blast radius of misconfigured + audience profiles. +- Tightening: agent cannot extend trust by issuing commands. `cd`-in-compound + is parsed for path attribution but never mutates persistent or session + state. Closes a class of "agent issues 9-byte command to escalate trust" + vectors that the old auto-promote design would have opened. +- Tightening: hard-deny defaults compiled into the daemon binary and + cannot be removed by editing config files. Operator overrides are + strictly additive (can add deny rules; cannot weaken shipped defaults). +- Tightening: existing `ToolPathPolicy` extended to cover + `~/.netclaw/config/`. Agent `file_write`, `file_edit`, and + `shell_execute` clauses targeting paths under this directory are + hard-denied — no approval prompt offered. Closes the prompt-injection + vector where the agent would be instructed to rewrite + `tool-approvals.json` and grant itself global trust. +- New: session-scoped grants exist in-memory only. Lost on session + termination — by design, prevents accidentally widening trust through + long-lived persistence of one-off experimental approvals. + +**Operational impact:** + +- Operator-visible: `netclaw approvals` UI changes shape (tabs). + Documentation update in `docs/help/approvals.md` and CLI `--help`. +- Operator-visible: existing `tool-approvals.json` contents discarded on + upgrade. Operators see "no prior approvals" on first daemon start; users + re-approve as they hit prompts. Communicate in release notes. +- Operator-invisible: ShellSyntaxTree NuGet added; no operator-facing change. diff --git a/openspec/changes/approval-policy-trust-zones/specs/project-instructions/spec.md b/openspec/changes/approval-policy-trust-zones/specs/project-instructions/spec.md new file mode 100644 index 00000000..a2fda436 --- /dev/null +++ b/openspec/changes/approval-policy-trust-zones/specs/project-instructions/spec.md @@ -0,0 +1,32 @@ +## REMOVED Requirements + +### Requirement: Project identity file loading from project directory +**Reason:** Daemon-side auto-loading depends on +`WorkingContext.ProjectDirectory`, which is being deleted. The agent +reads project identity files on demand via `file_read` per the explicit +lookup discipline added to `Resources/AGENTS.md`. The same file order +(`.netclaw/AGENTS.md` → `AGENTS.md` → `CLAUDE.md` → `CONTEXT.md`) +applies, just driven by the agent rather than the daemon. +**Migration:** Delete the daemon-side identity-file-loading code path +(`SystemPromptAssembler` project layer, `FileSystemPromptProvider` +project handling, etc.). Update `Resources/AGENTS.md` to instruct the +agent: "When you start operating on files in a project, locate the +project root and read project context once via `file_read` in this +order: `.netclaw/AGENTS.md`, `AGENTS.md`, `CLAUDE.md`, `CONTEXT.md`. +Read the first one that exists. Don't re-read on later turns — the +content is in your conversation history." + +### Requirement: Project instructions in system prompt +**Reason:** The system prompt no longer includes auto-loaded project +content because there is no `ProjectDirectory` to drive the load. The +agent's `file_read` of project identity files lands the content in +conversation history, not the system prompt — same caching behavior +within a single session, no auto re-injection on project switch +(because there are no project switches; switches happen by the agent +re-reading new project files). +**Migration:** Delete the project-instructions slot from +`SystemPromptAssembler.Assemble()`. Remove the `SetSystemPrompt()` +re-assembly trigger that fired on `set_working_directory` calls (the +trigger was tied to the deleted tool). System prompt becomes shorter +by ~6k tokens in observed sessions where project_dir had been set; +agent picks up the slack via on-demand reads. diff --git a/openspec/changes/approval-policy-trust-zones/specs/session-cwd/spec.md b/openspec/changes/approval-policy-trust-zones/specs/session-cwd/spec.md new file mode 100644 index 00000000..7b026ee9 --- /dev/null +++ b/openspec/changes/approval-policy-trust-zones/specs/session-cwd/spec.md @@ -0,0 +1,71 @@ +## REMOVED Requirements + +### Requirement: Session-scoped project directory +**Reason:** `WorkingContext.ProjectDirectory` is removed. Trust geography +is now anchored on operator-declared `trustedZones` (per audience) plus +the immutable `session_dir`, not on session-mutable project state. The +agent cannot extend trust by declaring a project root. +**Migration:** Remove `ProjectDirectory` field from `WorkingContext` and +from `SessionSnapshot` serialization. Sessions recovered from older +snapshots SHALL ignore any persisted `ProjectDirectory` field. No +operator-facing migration; the field's removal is invisible because v2 +was never shipped beyond development. + +### Requirement: set_working_directory tool +**Reason:** The `set_working_directory` tool is removed. Trust zones are +configuration, not state — the agent does not get to mutate trust at +runtime by declaring a project root. Live evidence shows the agent +defensively prepends `cd && ...` to every shell call anyway, +so the tool's spawn-cwd benefit was unused; its `ProjectDirectory` +side-effect is the part being deleted. +**Migration:** Delete `SetWorkingDirectoryTool.cs` and remove all +references from tool registries, audience profiles, and tool exposure +lists. Update `Resources/AGENTS.md` to instruct the agent to use `cd` in +compound commands or absolute paths. Update agent guidance that +previously referenced `set_working_directory` as the gesture for +declaring project scope. + +### Requirement: Shell tool cwd defaults to declared safe spaces +**Reason:** The cwd resolution chain simplifies because +`WorkingContext.ProjectDirectory` no longer exists. `ShellTool` SHALL +fall back from explicit `WorkingDirectory` argument directly to +`SessionDirectory`. The approval policy no longer reads cwd to decide +zone membership — the zone gate evaluates the paths the command +operates on (extracted from the AST), not the spawn cwd. +**Migration:** Update `ShellTool.cs` cwd resolution: `args.WorkingDirectory → +SessionDirectory`. Remove `WorkingContext.ProjectDirectory` from +`ResolveShellCwd`. Remove the comment block that referenced +`WorkingContext.ProjectDirectory` as a fallback layer. + +### Requirement: Shell tool failure-path hint for cwd outside safe spaces +**Reason:** The hint suggested `set_working_directory ` as the +remediation. With that tool deleted, the hint has no remediation to +point at. Denied calls still return a clear denial message; the agent +self-corrects by either using a path under a trusted zone, asking the +user to extend trust, or accepting the deny. +**Migration:** Remove the hint emission code from `ShellTool.cs` and +related plumbing. No replacement hint is added — the prompt itself +already shows the user the path that triggered the denial; the user can +extend trust via the prompt's `Always` button if desired. + +### Requirement: set_working_directory expands the approval safe space +**Reason:** With `set_working_directory` removed, this requirement has +no operative tool. Trust zone expansion is now exclusively a user +decision via the zone gate prompt's `Trust this directory` button (with +`Session` or `Always` scope). +**Migration:** Remove the safe-space expansion logic from +`ToolAudienceProfileResolver` and any call sites that reacted to +`ProjectDirectory` changes. The new zone gate's union (audience baseline +∪ persisted `trustedZones` ∪ in-memory session zones ∪ session_dir) is +the only mechanism for expanding the trust boundary. + +### Requirement: Working context block includes project directory +**Reason:** With `WorkingContext.ProjectDirectory` removed, the +`[working-context]` block has nothing to emit for project_dir. The +agent reads project context on demand via `file_read` per the explicit +lookup discipline in `Resources/AGENTS.md`; the working-context block +no longer needs to advertise a project root because there isn't one. +**Migration:** Remove the `project_dir:` line from +`WorkingContext.ToContextBlock()` output. If the block becomes empty for +sessions with no recent files either, suppress the entire block from +the system prompt rather than emitting an empty header. diff --git a/openspec/changes/approval-policy-trust-zones/specs/tool-approval-gates/spec.md b/openspec/changes/approval-policy-trust-zones/specs/tool-approval-gates/spec.md new file mode 100644 index 00000000..ebf27605 --- /dev/null +++ b/openspec/changes/approval-policy-trust-zones/specs/tool-approval-gates/spec.md @@ -0,0 +1,976 @@ +## ADDED Requirements + +### Requirement: Three-layer gate evaluation + +The system SHALL evaluate every tool invocation through three layers in +order: (1) hard-deny check, (2) zone gate, (3) verb-pattern gate. A tool +SHALL execute silently only when all three layers pass without prompting. +The hard-deny layer SHALL run first and unconditionally. The zone gate +SHALL run only if hard-deny passes. The verb-pattern gate SHALL run only +if the zone gate passes (either silently or via user approval). + +The three layers SHALL be independent: an approval at one layer SHALL NOT +imply approval at another. Trust granted to a directory at the zone gate +SHALL NOT grant any verb at the verb-pattern gate; trust granted to a verb +pattern at the verb-pattern gate SHALL NOT grant any directory at the zone +gate. + +#### Scenario: All layers silent for read-only verb in trusted zone + +- **GIVEN** the audience baseline includes `~/repos/*` as a trusted zone +- **AND** `cat` is on the read-only verb list +- **WHEN** the agent invokes `shell_execute` with command `cat ~/repos/foo/notes.md` +- **THEN** the hard-deny layer passes +- **AND** the zone gate auto-passes (path inside trusted zone) +- **AND** the verb-pattern gate auto-passes (read-only verb in trusted zone) +- **AND** no prompt is rendered + +#### Scenario: Mutating verb in trusted zone prompts only verb gate + +- **GIVEN** the audience baseline includes `~/repos/*` as a trusted zone +- **AND** the audience has no `git push *` in `verbPatterns` +- **WHEN** the agent invokes `shell_execute` with command `git push origin main` + and a path arg under `~/repos/foo/` +- **THEN** the hard-deny layer passes +- **AND** the zone gate auto-passes +- **AND** the verb-pattern gate prompts the user + +#### Scenario: Untrusted directory and mutating verb produce two sequential prompts + +- **GIVEN** the audience has no zone covering `/etc/nginx` +- **AND** the audience has no `cp *` in `verbPatterns` +- **WHEN** the agent invokes `shell_execute` with command `cp /etc/nginx/old.conf /etc/nginx/new.conf` +- **THEN** the hard-deny layer passes +- **AND** the zone gate prompts first (zone prompt) +- **AND** if the user approves, the verb-pattern gate prompts second (verb prompt) +- **AND** if both approve, the command executes + +#### Scenario: Mixed-zone clause with read-only verb collapses to one zone prompt + +- **GIVEN** a clause `grep -r foo /trusted /untrusted` +- **AND** `/trusted` is inside a trusted zone but `/untrusted` is not +- **AND** `grep` is on the read-only verb list +- **WHEN** the gate evaluator runs +- **THEN** the zone gate prompts for `/untrusted` only +- **AND** the verb-pattern gate auto-passes after zone approval + (read-only verb AND all paths now in trusted scope) +- **AND** the user sees exactly one prompt, then the command executes + +### Requirement: Trust zones store and evaluation + +The system SHALL maintain a per-audience `trustedZones` store of directory +glob patterns. The store SHALL be persisted under each audience key in +`~/.netclaw/config/tool-approvals.json`. The zone gate SHALL pass silently +for a path P when P matches any glob in the union of: (a) the audience +baseline read-allowed roots from `netclaw.json`, (b) the persisted +`trustedZones` for that audience, (c) the in-memory session-scope trusted +zones for the current session, AND (d) the immutable `session_dir` for the +current session (which is always trusted). + +Glob matching SHALL use path-prefix recursive semantics: a zone of +`/*` SHALL match `` itself, any direct child of ``, and any +descendant at any depth. The `*` is implicitly recursive — there is no +`**` in zone globs. Trailing slash variations SHALL be normalized +(`/`, `/*`, `` all denote the same zone). Zone matching +SHALL be boundary-safe: `~/repos/*` SHALL NOT match `~/repossecret` +(directory boundary, not character prefix). + +When a path P does not match any glob in the union, the zone gate SHALL +prompt the user. The prompt SHALL list every untrusted path in the command +in a single batched prompt, not one prompt per path. + +The agent SHALL NOT be able to extend `trustedZones` by issuing commands. +Only user prompt clicks (`Trust this directory` with `Session` or `Always` +scope) SHALL extend the store. The `cd` command in a compound SHALL be +parsed for path attribution but SHALL NOT mutate any zone store. + +#### Scenario: Path inside audience baseline auto-passes + +- **GIVEN** the Personal audience baseline includes `~/repos/*` +- **WHEN** a command operates on `~/repos/foo/file.txt` +- **THEN** the zone gate passes silently for that path + +#### Scenario: Path outside all zones prompts + +- **GIVEN** the Personal audience baseline includes `~/repos/*` only +- **AND** `trustedZones` is empty +- **WHEN** a command operates on `/etc/hosts` +- **THEN** the zone gate prompts the user with `/etc/hosts` listed + +#### Scenario: Multi-path command produces one batched zone prompt + +- **GIVEN** a command operates on `/etc/foo` and `/var/lib/bar`, both untrusted +- **WHEN** the zone gate evaluates the command +- **THEN** a single zone prompt is rendered listing both paths +- **AND** the user's choice applies to both paths simultaneously + +#### Scenario: Session-scope zone grant ends with the session + +- **GIVEN** a user grants `/tmp/scratch` with `Session` scope in session A +- **WHEN** session A terminates +- **AND** session B (same audience) operates on `/tmp/scratch` +- **THEN** the zone gate prompts session B for `/tmp/scratch` + +#### Scenario: Always-scope zone grant survives daemon restart + +- **GIVEN** a user grants `/etc/nginx` with `Always` scope in the Personal audience +- **AND** the daemon is restarted +- **WHEN** any Personal session operates on `/etc/nginx` +- **THEN** the zone gate passes silently + +#### Scenario: Agent cd in compound does not extend trust + +- **GIVEN** the audience has no zone covering `/foreign/path` +- **WHEN** the agent invokes `cd /foreign/path && cat file.txt` +- **THEN** `/foreign/path` is attributed as a path the command operates on +- **AND** the zone gate prompts (path outside all trusted zones) +- **AND** no zone is added to `trustedZones` automatically + +### Requirement: Verb-pattern store and evaluation + +The system SHALL maintain a per-audience `verbPatterns` store of glob-style +verb patterns (e.g., `git push *`, `rm /tmp/*`, `dotnet test *`). The store +SHALL be persisted under each audience key in +`~/.netclaw/config/tool-approvals.json` colocated with `trustedZones`. + +A verb pattern SHALL parse into two parts: a verb-chain prefix (one or +more tokens, length determined by the BashArity dictionary) and a trailing +arg-glob suffix. Pattern matching SHALL succeed when (1) the candidate +command's verb chain (after BashArity collapses multi-token verbs) +matches the pattern's verb-chain prefix exactly, AND (2) the candidate +command's remaining argument tokens match the pattern's arg-glob suffix. +A pattern of `git push *` matches `git push origin main` and +`git push --force` (verb chain `git push` exact, args matched by `*`); it +does NOT match `git pull origin main` (verb mismatch) or `git push-all` +(verb mismatch). Patterns without a trailing glob (`git push`) SHALL be +rejected at write time with an error directing the user to add the +explicit `*` suffix. + +The verb-pattern gate SHALL pass silently for a candidate command when +either: (a) the command's verb chain is on the read-only verb list AND every +path the command operates on is inside a trusted zone (per the zone gate), +OR (b) the command matches any glob in the union of the persisted +`verbPatterns` for the audience and the in-memory session-scope verb +patterns for the current session. + +When neither condition is met, the verb-pattern gate SHALL prompt the user. +Read-only verbs SHALL NOT auto-pass when any path in the command is outside +trusted zones — outside-zone access always prompts regardless of verb safety. + +#### Scenario: Read-only verb in trusted zone auto-passes + +- **GIVEN** `cat` is on the read-only verb list +- **AND** the path operated on is inside a trusted zone +- **WHEN** the verb-pattern gate evaluates `cat /home/user/repos/foo/notes.md` +- **THEN** the gate passes silently + +#### Scenario: Read-only verb outside trusted zones prompts + +- **GIVEN** `cat` is on the read-only verb list +- **AND** the path operated on is outside all trusted zones +- **WHEN** the zone gate has already prompted and the user clicked `Once` +- **AND** the verb-pattern gate then evaluates the command +- **THEN** the verb-pattern gate also prompts the user + +#### Scenario: Mutating verb matching persisted glob auto-passes + +- **GIVEN** `verbPatterns` contains `git push *` +- **WHEN** the agent invokes `git push origin main` +- **THEN** the verb-pattern gate passes silently + +#### Scenario: Mutating verb not matching any glob prompts + +- **GIVEN** `verbPatterns` does not contain a glob matching `kubectl apply` +- **WHEN** the agent invokes `kubectl apply -f manifest.yaml` +- **THEN** the verb-pattern gate prompts the user + +#### Scenario: Session-scope verb grant ends with the session + +- **GIVEN** the user grants `npm install *` with `Session` scope in session A +- **WHEN** session A terminates +- **AND** session B (same audience) invokes `npm install lodash` +- **THEN** the verb-pattern gate prompts session B + +#### Scenario: Always-scope verb grant survives daemon restart + +- **GIVEN** the user grants `dotnet test *` with `Always` scope +- **AND** the daemon is restarted +- **WHEN** any session in that audience invokes `dotnet test` +- **THEN** the verb-pattern gate passes silently + +### Requirement: Two-store per-audience persistence schema + +The system SHALL persist approval state in +`~/.netclaw/config/tool-approvals.json` using a per-audience structure +where each audience key contains exactly two fields: `verbPatterns` (array +of glob strings) and `trustedZones` (array of glob strings). The schema +SHALL NOT contain a `version` field; absence of the v2 schema markers +SHALL trigger archival of the existing file and creation of an empty new +schema file. + +```json +{ + "personal": { + "verbPatterns": ["git push *", "dotnet test *"], + "trustedZones": ["/etc/nginx"] + }, + "team": { + "verbPatterns": [], + "trustedZones": ["/opt/shared"] + } +} +``` + +The file SHALL be operator-editable via the `netclaw approvals` CLI. The +daemon SHALL pick up out-of-band edits on the next approval check without +requiring a restart. + +When the daemon reads a `tool-approvals.json` file that contains v1 or v2 +shape (presence of top-level `version` field, or array of `ApprovalEntry` +records with `verb`/`directory` fields), the file SHALL be archived to +`tool-approvals.json.v2-discarded.bak` and an empty new-schema store SHALL +be returned. No automatic translation of legacy entries SHALL be performed. + +#### Scenario: New-schema file loads correctly + +- **GIVEN** `tool-approvals.json` contains a per-audience structure with `verbPatterns` and `trustedZones` +- **WHEN** the daemon loads the file +- **THEN** the in-memory store reflects the file contents + +#### Scenario: v2 file archived on first read + +- **GIVEN** `tool-approvals.json` contains `"version": 2` and an `ApprovalEntry` array +- **WHEN** the daemon loads the file +- **THEN** the file is moved to `tool-approvals.json.v2-discarded.bak` +- **AND** the in-memory store is empty +- **AND** no v2 entries are translated + +#### Scenario: Operator revoke visible without restart + +- **GIVEN** the daemon is running with `verbPatterns` containing `git push *` +- **WHEN** an operator removes that entry via `netclaw approvals revoke` +- **AND** a new verb-pattern gate evaluation runs for `git push` +- **THEN** the daemon re-reads the file and observes the entry is gone +- **AND** the user is prompted + +### Requirement: In-memory session-scope grants on LlmSessionActor + +The system SHALL maintain in-memory session-scope grants for each session +on the `LlmSessionActor` instance. Two segments SHALL exist: +`SessionTrustedZones` (list of directory globs) and `SessionVerbPatterns` +(list of verb glob strings). These SHALL be populated when the user clicks +the `Session` button on either gate's prompt. + +Session-scope grants SHALL NOT be persisted to disk. They SHALL be lost on +session termination (channel disconnection, daemon restart, actor recovery +from snapshot). `SessionSnapshot` SHALL NOT include session-scope grants. + +#### Scenario: Session-scope zone applies for the rest of the session + +- **GIVEN** a user clicks `Session` on a zone prompt for `/tmp/scratch` +- **WHEN** the agent makes another call operating under `/tmp/scratch` + in the same session +- **THEN** the zone gate passes silently + +#### Scenario: Session-scope verb pattern applies for the rest of the session + +- **GIVEN** a user clicks `Session` on a verb prompt for `npm install *` +- **WHEN** the agent invokes `npm install` again in the same session +- **THEN** the verb-pattern gate passes silently + +#### Scenario: Session-scope grants do not survive daemon restart + +- **GIVEN** session A has session-scope zone `/tmp/scratch` +- **WHEN** the daemon restarts +- **AND** session A is recovered from snapshot +- **THEN** the recovered session has no session-scope grants +- **AND** subsequent operations on `/tmp/scratch` re-prompt + +#### Scenario: Session-scope grants are not in SessionSnapshot + +- **GIVEN** a session with session-scope grants populated +- **WHEN** a snapshot is taken +- **THEN** the snapshot serialization omits the session-scope segments + +### Requirement: Sequential approval workflow per call + +The system SHALL coordinate per-call approval through a +`ToolApprovalWorkflow` value type on the `LlmSessionActor`. The workflow +SHALL transition through stages `Start → ZoneGate → VerbGate → Complete`, +issuing at most one zone prompt and at most one verb prompt per call. The +workflow SHALL execute the gates in order and SHALL only advance to the +verb gate after the zone gate completes (silently or via user response). + +The mid-turn approval pause (`TaskCompletionSource`-based block on the +tool execution task) SHALL remain in place for the duration of the entire +workflow, spanning both prompts when both fire. Other tool calls in the +same batch SHALL execute in parallel and SHALL NOT block on this +workflow. + +If the user denies at any prompt, the workflow SHALL terminate with +`Denied` and the tool task SHALL unblock with the denial result. The +workflow SHALL NOT impose its own approval timeout — a tool call awaiting +user approval SHALL wait indefinitely for a user response. Operators +take as long as they need to evaluate the prompt; the system SHALL NOT +silently transition the workflow to `TimedOut` on a clock. (Session +passivation and daemon restart are separate concerns tracked in +GitHub issue #939; they are out of scope for this requirement.) + +#### Scenario: Zone-then-verb sequential prompts + +- **GIVEN** a call needs both gates +- **WHEN** the workflow runs +- **THEN** the zone prompt is sent first +- **AND** the verb prompt is sent only after the user responds to the zone prompt +- **AND** the user responds to two prompts in sequence + +#### Scenario: Deny on zone prompt skips verb prompt + +- **GIVEN** the zone gate is prompting the user +- **WHEN** the user clicks `Deny` +- **THEN** the workflow terminates with `Denied` +- **AND** the verb prompt is never rendered + +#### Scenario: Concurrent calls each have their own workflow + +- **GIVEN** two tool calls are dispatched in the same batch, both needing approval +- **WHEN** both workflows run +- **THEN** each call has its own `ToolApprovalWorkflow` instance +- **AND** prompt responses are routed to the correct workflow by `CallId` + +#### Scenario: Other tool calls in batch are not blocked + +- **GIVEN** a batch containing `web_search`, `shell_execute` (needs approval), + and `file_read` +- **WHEN** the batch executes +- **THEN** `web_search` and `file_read` execute in parallel immediately +- **AND** `shell_execute` blocks waiting for the workflow to complete + +### Requirement: Sequential 4-button approval prompts + +When the zone gate prompts the user, the prompt SHALL render exactly four +buttons in one row: `Once`, `Session`, `Trust `, `Deny`. The +buttons `Trust ` and `Deny` SHALL be styled per their effect +(`Trust` as primary action; `Deny` as danger). The header text SHALL ask +*"Allow `` to operate inside ``?"* listing one or more +untrusted paths. + +When the verb-pattern gate prompts the user, the prompt SHALL render +exactly four buttons in one row: `Once`, `Session`, `Always `, +`Deny`. The header text SHALL ask *"Allow `` to run?"* where +`` is the glob (e.g., `git push *`). + +All button labels SHALL fit within Slack's 76-character and Discord's +80-character button-text caps. When the displayed entity (path or verb +pattern) would exceed the cap, the label SHALL truncate with an ellipsis +and the full value SHALL appear in the prompt body. + +Both prompts SHALL include the audience name and command context in the +body so the user can scan the decision context without inferring it. + +When a zone prompt lists multiple untrusted paths, the `Trust ...` button +SHALL apply to ALL listed paths atomically (trust-all-or-nothing) and the +button label SHALL read `Trust all listed` (with the count parenthesized +when more than one path is shown). Per-path partial trust SHALL NOT be +expressible from the prompt; users wanting partial trust SHALL fall back +to the CLI (`netclaw approvals trust-zone `) or click `Once` and +let subsequent calls re-prompt. + +Channel adapters SHALL render the four buttons in a fixed positional +order (Once, Session, Trust/Always, Deny) so that text-only or +keyboard-driven channel adapters can map them to letters +`A=Once / B=Session / C=Trust|Always / D=Deny` without per-channel +remapping. The text-only mapping SHALL be considered a forward-compat +contract and SHALL be the order any future text-mode renderer uses. + +#### Scenario: Zone prompt shows path and 4-button row + +- **GIVEN** a command operates on the untrusted path `/etc/nginx` +- **WHEN** the zone prompt is rendered +- **THEN** the header reads "Allow Personal to operate inside /etc/nginx?" +- **AND** the action row contains `Once`, `Session`, `Trust /etc/nginx`, `Deny` + +#### Scenario: Verb prompt shows pattern and 4-button row + +- **GIVEN** a command `git push origin main` and no matching `verbPatterns` +- **WHEN** the verb prompt is rendered +- **THEN** the header reads "Allow `git push *` to run?" +- **AND** the action row contains `Once`, `Session`, `Always git push *`, `Deny` + +#### Scenario: Multi-path zone prompt batches paths + +- **GIVEN** a command operates on `/etc/nginx` and `/var/log`, both untrusted +- **WHEN** the zone prompt is rendered +- **THEN** the body lists both paths +- **AND** a single 4-button row applies to both +- **AND** the trust button label reads `Trust all listed (2)` +- **AND** clicking that button extends `trustedZones` for both paths atomically + +#### Scenario: Long path label truncates with full value in body + +- **GIVEN** an untrusted path that exceeds the button-label cap +- **WHEN** the zone prompt is rendered +- **THEN** the `Trust ...` button label is truncated with an ellipsis +- **AND** the full path appears in the prompt body + +### Requirement: Resolution message format for two-store schema + +After an approval response is processed, the channel SHALL render a +single-line resolution message identifying which store was extended and +the scope. Permitted formats: + +- `Saved zone: ` — for `Always` on the zone prompt. +- `Saved zone (this session): ` — for `Session` on the zone prompt. +- `Saved verb: ` — for `Always` on the verb prompt. +- `Saved verb (this session): ` — for `Session` on the verb prompt. +- `Approved (no save)` — for `Once`. +- `Denied` — for `Deny`. + +The message SHALL replace the previous `Saved: in ` +format. No `Patterns` or `Directory Roots` headers SHALL appear. + +#### Scenario: Always on zone prompt produces zone-saved message + +- **GIVEN** the user clicks `Trust /etc/nginx` (Always scope) on a zone prompt +- **WHEN** the resolution message is rendered +- **THEN** the message reads `Saved zone: /etc/nginx` + +#### Scenario: Session on verb prompt produces session-saved message + +- **GIVEN** the user clicks `Session` on a verb prompt for `npm install *` +- **WHEN** the resolution message is rendered +- **THEN** the message reads `Saved verb (this session): npm install *` + +#### Scenario: Once produces approved-no-save message + +- **GIVEN** the user clicks `Once` on either prompt +- **WHEN** the resolution message is rendered +- **THEN** the message reads `Approved (no save)` + +### Requirement: ShellSyntaxTree dependency for command parsing + +The system SHALL consume the `ShellSyntaxTree` NuGet package +(`github.com/Aaronontheweb/ShellSyntaxTree`) for all shell command +parsing. The matcher SHALL feed the raw command string to +`IShellParser.Parse(string)` and operate exclusively on the returned +`ParsedCommand` AST. The matcher SHALL NOT contain regex-based +tokenization, per-verb path-extraction tables, or quote/escape handling +— those concerns belong to the parser library. + +The matcher SHALL extract from the AST: (a) per-clause verb chains for +the verb-pattern gate, (b) per-clause path arguments (resolved against +the spawn cwd) for the zone gate, (c) cd-in-compound directory +attribution (paths attributed to subsequent commands within the same +compound), (d) redirect target paths, and (e) bash-c inner command +recursion. + +When the parser flags any path token as dynamic (unresolved env var, +unresolved expansion), the matcher SHALL skip that token rather than +extracting a literal value. Extraction failure for a clause SHALL cause +the clause to be treated as path-arg-less; the verb gate still applies. + +#### Scenario: Matcher consumes ShellSyntaxTree AST + +- **GIVEN** a command `git -C /repo log` +- **WHEN** the matcher processes it +- **THEN** it calls `IShellParser.Parse("git -C /repo log")` +- **AND** uses the returned `ParsedCommand` to extract verb chain `git log` + and path `/repo` + +#### Scenario: cd-in-compound propagates paths to subsequent clauses + +- **GIVEN** a command `cd /target && cmd1 file.txt && cmd2 other.txt` +- **WHEN** the matcher processes it +- **THEN** `/target` is attributed as a path each of `cmd1` and `cmd2` operates on +- **AND** zones are checked for `/target` in addition to any explicit path args + +#### Scenario: Dynamic path tokens are skipped + +- **GIVEN** a command `rm $UNRESOLVED_VAR/foo` +- **WHEN** the matcher processes it +- **THEN** the dynamic token is skipped (no literal `$UNRESOLVED_VAR/foo` extracted) +- **AND** the clause is treated as path-arg-less for the zone gate +- **AND** the verb gate still evaluates `rm` + +#### Scenario: bash -c wrapper recurses into inner command + +- **GIVEN** a command `bash -c "git push --force"` +- **WHEN** the matcher processes it +- **THEN** the inner command is parsed +- **AND** verb chain `git push` is extracted from the inner command + +### Requirement: Hard-deny rule source and structured format + +The system SHALL ship hard-deny rule defaults as immutable data compiled +into the daemon binary (`Netclaw.Security.HardDenyDefaults`). The shipped +defaults SHALL NOT be removable or weakenable at runtime. Operators MAY +add additional rules via `~/.netclaw/config/hard-deny-overrides.json`. +The override file SHALL be strictly additive: it MAY introduce new deny +rules; it MUST NOT remove, disable, or weaken any shipped default. The +final ruleset evaluated by the matcher SHALL be `Defaults ∪ Overrides`. + +Rules SHALL use a JSON-structured predicate format with explicit +verb-and-args matching. Each rule is one of: + +```json +{ + "verb": ["netclaw", "daemon", "stop"], + "reason": "hard_deny_self_destructive" +} +``` + +```json +{ + "verb": ["rm"], + "argFlags": ["-rf"], + "firstPath": { "oneOf": ["/", "~", "~/"] }, + "reason": "hard_deny_destructive_root" +} +``` + +For shape-shaped patterns that cannot be expressed as verb+args (e.g., +fork bombs `:(){ :|:& };:` which are pure shell syntax with no real verb), +an explicit `rawText` escape SHALL be permitted, marked with +`"escapeHatch": true` for documentation: + +```json +{ + "rawText": ":\\(\\)\\{.*:\\|:&.*\\};:", + "reason": "hard_deny_fork_bomb", + "escapeHatch": true +} +``` + +Structured rules SHALL evaluate against parsed `Clause` records (verb +chain + arg list); rawText rules SHALL match against the normalized +rendered clause string. A `doctor` startup check SHALL verify shipped +defaults are present in the loaded ruleset and SHALL refuse daemon +startup if any default is missing or shadowed by a malformed override. + +#### Scenario: Shipped default cannot be disabled by override + +- **GIVEN** the override file contains a rule attempting to negate + `netclaw daemon stop` (e.g., a `disable` field referencing it) +- **WHEN** the daemon loads the rules +- **THEN** the override is rejected at parse time with a clear error +- **AND** the shipped default remains active + +#### Scenario: Override adds new deny rule + +- **GIVEN** the override file contains + `{"verb": ["docker", "rm"], "reason": "local_policy"}` +- **WHEN** the matcher evaluates a `docker rm my-container` call +- **THEN** the call is denied via the override rule + +#### Scenario: rawText escape matches fork-bomb-shaped commands + +- **GIVEN** the shipped defaults include the fork-bomb rawText rule +- **WHEN** the agent invokes `shell_execute` with command `:(){ :|:& };:` +- **THEN** the matcher matches the rawText rule against the rendered clause +- **AND** the call is denied with reason `hard_deny_fork_bomb` + +#### Scenario: Doctor refuses startup when default is missing + +- **GIVEN** the daemon binary's compiled defaults include a rule with + reason `hard_deny_self_destructive` +- **AND** the override file is malformed in a way that shadows that rule +- **WHEN** the daemon starts +- **THEN** the doctor check reports the missing default +- **AND** the daemon refuses to start with a loud error + +### Requirement: Security-critical config protection via ToolPathPolicy + +The system SHALL extend the existing `ToolPathPolicy` write-deny and +shell-deny lists to cover `~/.netclaw/config/` (the entire directory +tree). This protects `tool-approvals.json`, +`hard-deny-overrides.json`, `netclaw.json`, and any future operator +config from agent tool writes — `file_write`, `file_edit`, and +`shell_execute` clauses that target paths under +`~/.netclaw/config/` SHALL be hard-denied at the `ToolPathPolicy` +layer, BEFORE the three-layer gate is consulted. + +`ToolPathPolicy` is a hard-deny mechanism: no approval prompt is offered. +Operators wanting to edit config files SHALL do so outside the agent +(their own editor, their own shell), or via the dedicated +`netclaw approvals` / `netclaw audience` CLI commands which bypass the +agent's tool-call path. + +The deny SHALL apply with `ToolPathPolicy`'s existing +symlink-resolving normalization: a planted symlink under a permitted +directory cannot route writes to `~/.netclaw/config/`. + +This requirement defends against prompt-injection attacks where the +agent is instructed (by malicious content read from a web page, file, +or MCP server output) to lift its own constraints by editing the +config files. + +#### Scenario: Agent file_write to tool-approvals.json is denied + +- **WHEN** the agent invokes `file_write` with path + `~/.netclaw/config/tool-approvals.json` +- **THEN** the write is denied at the ToolPathPolicy layer +- **AND** the deny reason indicates security-critical-path +- **AND** no approval prompt is offered + +#### Scenario: Agent shell redirect to netclaw.json is denied + +- **WHEN** the agent invokes `shell_execute` with command + `echo "{}" > ~/.netclaw/config/netclaw.json` +- **THEN** ShellTool's `_pathPolicy.CommandReferencesDeniedPath` returns true +- **AND** the call is denied before the three-layer gate runs + +#### Scenario: Agent file_edit via symlink to config is denied + +- **GIVEN** the agent has created a symlink at `~/scratch/leak` + resolving to `~/.netclaw/config/tool-approvals.json` +- **WHEN** the agent invokes `file_edit` with path `~/scratch/leak` +- **THEN** ToolPathPolicy resolves the symlink and matches the canonical + path against the deny list +- **AND** the call is denied + +#### Scenario: Operator can edit config outside the agent + +- **GIVEN** the operator runs `vim ~/.netclaw/config/netclaw.json` in + their own shell (not the agent's `shell_execute`) +- **WHEN** the file is saved +- **THEN** the daemon picks up the change on next read +- **AND** ToolPathPolicy was never consulted (it only governs agent tool calls) + +### Requirement: Parser anomaly safe-fail + +The gate evaluator SHALL default to the safest behavior when +`ShellSyntaxTree` returns a `ParsedCommand` with the unparseable flag +set or when `IShellParser.Parse` throws: hard-deny is still consulted +(against the raw command string as a fallback, plus against any partial +AST the parser produced); the zone gate SHALL prompt the user as if the +entire raw command operates on a single untrusted path; the +verb-pattern gate SHALL offer only `Once` and `Deny` (no `Session` or +`Always`) so no persistent grant can encode an unparseable shape. + +This requirement ensures that parser bugs, novel shell idioms, or +intentionally crafted unparseable inputs degrade to "extra prompt," +never to "silent bypass." + +#### Scenario: Parse failure offers only Once and Deny + +- **WHEN** `IShellParser.Parse` throws on a malformed command +- **THEN** the matcher catches the failure +- **AND** the verb-pattern gate prompt offers only `Once` and `Deny` +- **AND** the prompt body shows a `parse failure — one-shot only` hint + +#### Scenario: Unparseable AST flag triggers safe-fail + +- **GIVEN** a command containing unbalanced quotes +- **WHEN** `ShellSyntaxTree` parses it and sets the unparseable flag +- **THEN** the gate evaluator routes through the safe-fail path +- **AND** the user sees a prompt rather than silent execution + +#### Scenario: Hard-deny still applies on parse failure + +- **GIVEN** a hard-deny rule for raw text matching `rm -rf /` +- **WHEN** the agent invokes `shell_execute` with `rm -rf /; for i in 1 2; do` + (unbalanced; parser fails) +- **THEN** the rawText hard-deny matches against the raw command +- **AND** the call is denied before any prompt + +## MODIFIED Requirements + +### Requirement: Tool approval configuration per audience + +The system SHALL support per-audience tool approval configuration via +`ToolApprovalConfig` on `ToolAudienceProfile`. Each audience profile SHALL +independently specify a `DefaultMode` (Auto, Approval, Deny) and per-tool +overrides in `ToolOverrides`. The default `DefaultMode` SHALL be `Auto` (no +approval required). Runtime audience defaults SHALL NOT implicitly place +`shell_execute` in `Approval` mode. Instead, the init-generated Personal config +SHALL explicitly write +`ApprovalPolicy.ToolOverrides.shell_execute = Approval` as the recommended +shell-safe default. + +When `shell_execute` is in Approval mode, the three-layer gate (hard-deny, +zone, verb-pattern) SHALL evaluate each invocation. The audience's baseline +trusted zones SHALL be derived from the audience trust profile's +`read_allowed_roots`; the in-memory session-scope grants SHALL apply for +the current session only. + +#### Scenario: Shell requires approval in init-generated Personal config + +- **GIVEN** a Personal audience session whose generated config explicitly sets + `ApprovalPolicy.ToolOverrides.shell_execute` to `Approval` +- **WHEN** the agent invokes `shell_execute` +- **THEN** `ToolAccessPolicy` marks the call as approval-gated +- **AND** the three-layer gate evaluates the call +- **AND** if any layer requires user input, an approval prompt is emitted + +#### Scenario: Tool in Auto mode executes without approval + +- **GIVEN** a tool whose approval mode is `Auto` for the session's audience +- **WHEN** the agent invokes the tool +- **THEN** the tool executes immediately without an approval prompt + +#### Scenario: Tool in Deny mode is always blocked + +- **GIVEN** a tool whose approval mode is `Deny` for the session's audience +- **WHEN** the agent invokes the tool +- **THEN** the tool is denied with reason `tool_denied_by_approval_policy` +- **AND** no approval prompt is offered + +#### Scenario: Per-audience independence + +- **GIVEN** Personal sets `shell_execute` to `Approval` and Team sets it to `Deny` +- **WHEN** a Personal session invokes `shell_execute` +- **THEN** `ToolAccessPolicy` marks the call as approval-gated +- **AND** the three-layer gate evaluates against Personal's stores +- **AND** when a Team session invokes `shell_execute` +- **THEN** the system denies immediately without prompting + +### Requirement: Configurable hard deny list + +The system SHALL enforce a configurable hard deny list of command patterns that +are blocked before the zone gate or verb-pattern gate is consulted. Denied +commands SHALL never be approvable. The system SHALL ship with sensible defaults: +commands that kill the Netclaw daemon process, `rm -rf /`, `rm -rf ~/`, and fork +bombs. Operators SHALL be able to add or remove patterns via configuration. + +The hard deny list SHALL operate on the parsed `ParsedCommand` AST clauses +provided by `ShellSyntaxTree`. Each clause SHALL be checked independently; +a compound containing any hard-denied clause SHALL be denied wholesale. + +#### Scenario: Hard-denied command blocked before any gate + +- **GIVEN** a command matching the hard deny list (e.g., `netclaw daemon stop`) +- **WHEN** the agent invokes `shell_execute` with that command +- **THEN** the command is denied with reason `hard_deny_self_destructive` +- **AND** no zone or verb prompt is offered +- **AND** the denial is logged + +#### Scenario: Hard deny enforced even in HostAllowed mode + +- **GIVEN** `ShellMode` is `HostAllowed` (no approval config) +- **WHEN** the agent runs a hard-denied command +- **THEN** the command is still blocked + +#### Scenario: Operator adds custom hard deny pattern + +- **GIVEN** the operator adds `docker rm` to the hard deny list in config +- **WHEN** the agent runs `docker rm my-container` +- **THEN** the command is denied + +#### Scenario: Compound command with hard-denied clause + +- **GIVEN** a compound command `git add . && netclaw daemon stop` +- **WHEN** the agent invokes `shell_execute` +- **THEN** the entire command is denied because one clause matches hard deny + +### Requirement: IToolApprovalMatcher extension point + +The system SHALL define an `IToolApprovalMatcher` interface for tool-specific +extraction and gate evaluation. Shell SHALL implement zone-and-verb +evaluation using `ShellSyntaxTree`. A default implementation SHALL provide +tool-name-level matching for tools without a custom matcher. + +The shell matcher SHALL accept the parsed `ParsedCommand` AST plus the +audience's trust state (baseline zones, persisted zones, session zones, +persisted verb patterns, session verb patterns) and return per-clause +evaluation results indicating which gate(s) require user prompting. + +#### Scenario: Shell matcher consumes ParsedCommand and trust state + +- **GIVEN** a parsed `npm install lodash` and the Personal audience trust state +- **WHEN** the matcher evaluates the call +- **THEN** the matcher returns the gate state (zone-pass, verb-pass / verb-prompt etc.) + for each clause + +#### Scenario: Default matcher used for tools without custom matcher + +- **GIVEN** a tool without a custom `IToolApprovalMatcher` implementation +- **WHEN** the matcher evaluates the call +- **THEN** the default tool-name-level matcher is used +- **AND** the zone gate is not evaluated (no command paths to check) + +### Requirement: Mid-turn approval pause + +The system SHALL pause individual tool execution tasks when approval is required +without blocking other tool calls in the same batch. The pause SHALL use a +`TaskCompletionSource` that completes when the session actor receives an approval +response. The pause SHALL span the entire `ToolApprovalWorkflow` (both prompts +when both fire). The pause SHALL wait indefinitely for the user response — there +is no clock-driven auto-deny. Operators take as long as they need to evaluate +prompts; the system silently transitioning a workflow to a denied state on a +timer would manufacture race conditions (late clicks landing in already-terminated +workflows) for zero security benefit. + +#### Scenario: Approval-pending tool blocks while others complete + +- **GIVEN** a batch of 3 tool calls: `web_search`, `shell_execute`, `file_read` +- **AND** `shell_execute` requires approval +- **WHEN** the batch executes +- **THEN** `web_search` and `file_read` execute in parallel immediately +- **AND** `shell_execute` blocks waiting for the approval workflow +- **AND** the session actor remains responsive to messages + +#### Scenario: Approval pause waits indefinitely for user response + +- **GIVEN** a zone prompt has been emitted +- **AND** the user has not yet clicked any button +- **WHEN** an arbitrarily long time passes (minutes, hours, until daemon restart) +- **THEN** the workflow remains paused on the TaskCompletionSource +- **AND** no clock-driven transition to `TimedOut` occurs +- **AND** when the user eventually clicks, the workflow resumes from that state + +#### Scenario: Approved tool executes after both prompts complete + +- **GIVEN** a tool is in the verb-pattern gate after zone-gate approval +- **WHEN** the user approves the verb prompt +- **THEN** the tool executes and returns its result +- **AND** any persisted grants are written to `tool-approvals.json` + +#### Scenario: Denied tool returns denial message + +- **GIVEN** a tool is blocked at either prompt +- **WHEN** the user denies +- **THEN** the workflow terminates with `Denied` +- **AND** the tool returns "Command denied by user" as the tool result +- **AND** no command is executed + +### Requirement: ToolInteractionRequest/Response protocol + +The system SHALL define a `ToolInteractionRequest` session output and +`ToolInteractionResponse` session command for channel-mediated approval +interactions. The interaction `Kind` SHALL identify the interaction type +and the gate that issued it: `approval_zone` for the zone gate prompt, +`approval_verb` for the verb-pattern gate prompt. `ToolInteractionRequest` +SHALL be a lifecycle output (always delivered regardless of `OutputFilter`). + +`ToolInteractionRequest` for `approval_zone` SHALL include a `Paths` field +containing the untrusted paths the prompt asks about. `ToolInteractionRequest` +for `approval_verb` SHALL include a `VerbPattern` field containing the +glob pattern proposed for the `Always` button. + +`ToolInteractionResponse` SHALL include `CallId`, the gate the response +applies to, and the selected scope (`Once`, `Session`, `Always`, `Deny`). + +#### Scenario: Zone prompt emitted as session output + +- **GIVEN** the zone gate decides to prompt +- **WHEN** the workflow issues the prompt +- **THEN** a `ToolInteractionRequest` with `Kind=approval_zone` is emitted +- **AND** it includes `CallId`, `ToolName`, the untrusted paths, and the audience name + +#### Scenario: Verb prompt emitted as session output + +- **GIVEN** the verb-pattern gate decides to prompt +- **WHEN** the workflow issues the prompt +- **THEN** a `ToolInteractionRequest` with `Kind=approval_verb` is emitted +- **AND** it includes `CallId`, `ToolName`, the verb pattern, and the audience name + +#### Scenario: Channel routes response to the correct workflow stage + +- **GIVEN** a workflow has emitted a zone prompt +- **WHEN** the user submits a response +- **THEN** the channel sends a `ToolInteractionResponse` to the session actor +- **AND** the workflow advances to the verb gate stage on Approve +- **OR** terminates with Denied + +### Requirement: Channel approval capability + +Channels SHALL declare whether they support interactive approval via a +capability flag. When a tool requires approval and the active channel does NOT +support it, the system SHALL immediately deny the tool with reason +`channel_does_not_support_approval`. The system SHALL NOT hang or timeout. + +The capability check SHALL apply once per call regardless of how many +prompts the workflow would issue — a non-interactive channel cannot +serve any prompt. + +#### Scenario: Unsupported channel auto-denies + +- **GIVEN** the headless channel (no interactive user) +- **AND** `shell_execute` is in Approval mode +- **WHEN** the agent invokes `shell_execute` +- **THEN** the tool is immediately denied with + `channel_does_not_support_approval` + +#### Scenario: Supported channel renders approval prompt + +- **GIVEN** the Slack channel (supports interactive approval) +- **AND** `shell_execute` is in Approval mode +- **WHEN** the agent invokes `shell_execute` requiring zone or verb approval +- **THEN** the channel renders the appropriate prompt (zone or verb) + +## REMOVED Requirements + +### Requirement: Shell command pattern matching +**Reason:** Verb-chain extraction logic is delegated to `ShellSyntaxTree`. +The Netclaw matcher no longer owns tokenization, compound splitting, or +`bash -c` recursion — those concerns moved to the parser library. The +matcher consumes the parsed AST instead. +**Migration:** Replace `ShellTokenizer.SplitCompoundCommand` / +`Tokenize` callers with `IShellParser.Parse` and walk the +`ParsedCommand.Clauses`. See ADDED requirement "ShellSyntaxTree +dependency for command parsing." + +### Requirement: Persistent approval storage +**Reason:** The `(verb, directory)` `ApprovalEntry` schema is replaced by +the two-store per-audience schema with `verbPatterns` and `trustedZones` +glob arrays. The `version: 2` marker and v1 quarantine logic are +obsolete; v1 and v2 file shapes both archive to a `.v2-discarded.bak` +sibling on first read. +**Migration:** No data migration. Operators on pre-release Netclaw will +see their existing `tool-approvals.json` archived and an empty +new-schema file created. They re-grant via prompts as needed. See ADDED +requirement "Two-store per-audience persistence schema." + +### Requirement: Directory-root approvals for shell_execute +**Reason:** The `(verb, directory)` cross-product entry shape is gone. +Trust geography (zones) and trust action (verb patterns) are now two +independent stores, each evaluated by its own gate. +**Migration:** A user wanting v2's `(grep, ~/.netclaw/logs/)` behavior +now grants `~/.netclaw/logs/` to `trustedZones` (silent for read-only +verbs), or both `~/.netclaw/logs/` to `trustedZones` AND `grep *` to +`verbPatterns` (silent for any verb). See ADDED requirements +"Trust zones store and evaluation" and "Verb-pattern store and evaluation." + +### Requirement: Safe-verb auto-allow short-circuit in declared safe spaces +**Reason:** The "safe-verbs in safe-spaces" short-circuit is replaced by +the explicit zone gate. Read-only verbs auto-pass *only* inside trusted +zones; outside-zone access always prompts. The audience-aware safe-space +roots are now resolved from `trustedZones` ∪ baseline ∪ `session_dir` +∪ in-memory session-scope zones, not from a separate +`ToolAudienceProfileResolver` pathway. +**Migration:** The shipped `safe-verbs.linux.json` / +`safe-verbs.windows.json` files become the read-only verb list consulted +by the verb-pattern gate when deciding whether to auto-pass for paths +inside trusted zones. The list itself survives; the gate logic +consuming it changes. The `ScopedShellSafeVerbPolicy` class is +replaced by gate evaluation in the new matcher. + +### Requirement: Five-button approval prompt with verb-and-directory framing +**Reason:** The 5-button row encoded the v2 `(verb, directory)` +cross-product (`Always here` vs `Always anywhere`). The new model has +two independent gates, each with a 4-button row of `[Once / Session / +Always / Deny]`. Sequential prompting replaces the cross-product +button matrix. +**Migration:** Slack and Discord prompt builders SHALL render two +distinct prompt shapes (zone-gate, verb-gate) each with a 4-button row, +issued sequentially. See ADDED requirement "Sequential 4-button approval +prompts." + +### Requirement: Resolution message single-line format +**Reason:** The `Saved: in ` format encodes the +v2 cross-product shape. The new format identifies which store was +extended (zone vs verb) and the scope. +**Migration:** Update channel adapters to emit the new resolution +message format. See ADDED requirement "Resolution message format for +two-store schema." + +### Requirement: Pattern extraction refuses bash control-flow +**Reason:** Bash control-flow detection (for/while/case) is now part of +`ShellSyntaxTree`'s parser. When the parser cannot produce a clean AST +(unparseable input, unbalanced quotes, control-flow), the matcher +SHALL still fall back to offering only `Once` and `Deny`, but the +detection logic itself lives in the parser library. +**Migration:** Replace `ShellTokenizer.SplitCompoundCommand` callers +with `IShellParser.Parse`; the parser surfaces an unparseable flag that +the matcher consumes to constrain the prompt buttons. See ADDED +requirement "ShellSyntaxTree dependency for command parsing." diff --git a/openspec/changes/approval-policy-trust-zones/tasks.md b/openspec/changes/approval-policy-trust-zones/tasks.md new file mode 100644 index 00000000..ce9ed78c --- /dev/null +++ b/openspec/changes/approval-policy-trust-zones/tasks.md @@ -0,0 +1,134 @@ +## 1. ShellSyntaxTree library (external repo) + +- [ ] 1.1 Create `github.com/Aaronontheweb/ShellSyntaxTree` repo with .NET 10 class library project, MIT license, `ShellSyntaxTree` NuGet package id, README, CONTRIBUTING. Verify: empty project builds with `dotnet build`. +- [ ] 1.2 Define core AST records: `ParsedCommand`, `Clause`, `VerbChain`, `Arg` (with `Kind = Literal | EnvVar | Glob | Tilde | DynamicSkip`), `Redirect`, `CompoundOperator` enum. Verify: types compile and have round-trip equality semantics covered by unit tests. +- [ ] 1.3 Define `IShellParser` interface with `ParsedCommand Parse(string command)` and `bool TryParse(string, out ParsedCommand)`. Verify: interface compiles, default implementation throws NotImplementedException. +- [ ] 1.4 Implement `BashLexer`: tokenization with single/double-quote handling, escape sequences, redirect operators, compound operators (`&&`, `||`, `;`, `|`), subshell parens, here-doc start markers. Verify: lexer test suite covers each token kind with positive and negative cases (≥40 cases). +- [ ] 1.5 Implement `BashParser`: recursive-descent over the lexer's token stream producing `ParsedCommand`. Handle clause splitting on compound operators, subshell isolation, `bash -c "inner"` recursion, here-doc body skipping, unparseable-input flag. Verify: parser test suite covers each grammar production with ≥80 cases including counter-examples. +- [ ] 1.6 Implement `BashArity` dictionary: per-verb token count for verb chains (`cd: 1`, `git: 2`, `docker compose: 3`, `bun run: 3`, etc.). Sourced from OpenCode plus Netclaw-observed verbs. Verify: arity lookup unit-tested with ≥30 verbs. +- [ ] 1.7 Implement `FILES`/`CWD`/`CMD_FILES` verb tables for path-arg extraction. Verify: per-verb path-arg extraction unit-tested for each table with ≥40 cases. +- [ ] 1.8 Implement per-verb `pathArgs` filter: knows `chmod 755 file` (first arg is mode, rest are paths), `cp -r src dst` (skip flag, both rest are paths), Windows-cmd `/X` flag skipping. Verify: unit-tested per verb (≥20 verbs). +- [ ] 1.9 Implement `Resolver`: `~` expansion, `$VAR` / `${VAR}` env-var resolution against an injected `IEnvironmentSnapshot`, glob detection (`*`, `?`, `[`), `filesystem::/path` prefix stripping, dynamic-skip when expansion fails. Verify: resolver unit tests cover each expansion case with ≥30 inputs. +- [ ] 1.10 Implement cd-in-compound propagation: walk `Clauses`; when first clause is `cd /target`, attribute `/target` as an additional path on subsequent clauses within the same compound (until subshell boundary). Verify: AST-level test ≥10 cases including nested subshells. +- [ ] 1.11a Draft `tests/Corpus/SANITIZATION.md` describing what gets stripped from session-log seeds (usernames, channel/thread IDs, repo names, internal hostnames, real filenames → placeholders) and the regex patterns used. Aaron reviews before any logs are touched. Verify: SANITIZATION.md committed, Aaron sign-off in PR. +- [ ] 1.11 Author corpus at `tests/Corpus/` with input + expected-AST JSON files. Seed from session logs sanitized per SANITIZATION.md (PII stripped). Target ≥100 corpus entries covering each grammar production and counter-examples. Verify: corpus runner test executes every entry and asserts AST equality; second pass on the corpus confirms zero PII via grep against the sanitization patterns. +- [ ] 1.12 Set up CI on the external repo: `dotnet test` on push, NuGet publish on tag. Verify: CI green on initial commit; tag-based publish documented. +- [ ] 1.13 Cut `0.1.0-alpha` tag and verify NuGet publish pipeline produces a consumable package. Verify: package downloadable from nuget.org and installable in a separate test project. + +## 2. Netclaw consumption of ShellSyntaxTree + +- [ ] 2.1 Add `` to sibling `ShellSyntaxTree` clone in `Netclaw.Security.csproj` for parallel development. Verify: `dotnet build` succeeds with sibling reference. +- [ ] 2.2 Register `BashParser` as `IShellParser` in `Netclaw.Security` DI registration extension. Verify: DI resolution unit-tested. +- [ ] 2.3 Replace `ShellTokenizer.Tokenize` / `SplitCompoundCommand` callers with `IShellParser.Parse` calls returning `ParsedCommand`. Files affected: `ShellCommandPolicy.cs`, `ShellApprovalSemantics.cs`, `ToolPathPolicy.cs`. Verify: all callers compile and pass existing tests after migration. +- [ ] 2.4 Delete `ShellTokenizer.cs` and `ShellTokenizerTests.cs` once all callers migrated. Verify: no references remain via `grep -r ShellTokenizer src/`. +- [ ] 2.5 Once ShellSyntaxTree v0.1 publishes to NuGet, switch `` to `` with version pin. Update `Directory.Packages.props`. Verify: `dotnet restore` succeeds against the package; sibling repo path no longer required. + +## 3. Two-store persistence schema + +- [ ] 3.1 Define new `TrustZoneStore` and `VerbPatternStore` records (per-audience glob arrays). Verify: serialization round-trip tested (System.Text.Json AOT-source-generated). +- [ ] 3.2 Update `tool-approvals.json` JSON schema to per-audience structure with `verbPatterns` and `trustedZones` array fields. Update the schema file at `src/Netclaw.Configuration/Schemas/` (per CLAUDE.md schema sync rule) if `tool-approvals.json` has a schema entry. Verify: schema validates a sample new-shape file and rejects v1/v2 shapes. +- [ ] 3.3 Implement `ToolApprovalStore.LoadAsync` that detects v1/v2 shapes (top-level `version` field, `ApprovalEntry` arrays) and archives to `.v2-discarded.bak`, returning empty new-shape store. Verify: unit tests cover v1, v2, new-shape, and missing-file inputs. +- [ ] 3.4 Implement `ToolApprovalStore.PersistAsync` writing per-audience structure with atomic file replace. Verify: unit-tested for concurrent writers (last-write-wins, no truncation). +- [ ] 3.5 Implement `ToolApprovalStore.AddZone(audience, glob, scope)` and `AddVerbPattern(audience, glob, scope)` methods. Scope `Always` writes to disk; `Session` is a no-op at this layer (handled by LlmSessionActor). Verify: unit-tested. +- [ ] 3.6 Implement `ToolApprovalStore.RevokeZone(audience, glob)` and `RevokeVerbPattern(audience, glob)` methods. Verify: unit-tested. +- [ ] 3.7 Wire out-of-band file edit detection: re-read file on each gate evaluation (or via `ConfigWatcherService` if performance becomes an issue). Verify: integration test edits the file mid-test and confirms next evaluation sees the change. + +## 4. In-memory session-scope grants + +- [ ] 4.1 Add `SessionTrustedZones` and `SessionVerbPatterns` fields to `LlmSessionActor` (in-memory `List` each). Verify: fields initialize empty on actor start. +- [ ] 4.2 Add `LlmSessionActor.AddSessionZone(glob)` and `AddSessionVerbPattern(glob)` methods invoked by the workflow when user clicks `Session` scope. Verify: unit-tested. +- [ ] 4.3 Confirm `SessionSnapshot` does NOT include session-scope grants. Add explicit test asserting snapshot serialization omits these fields. Verify: snapshot round-trip test. +- [ ] 4.4 Confirm actor recovery from snapshot does NOT restore session-scope grants. Verify: recovery test asserts both lists are empty after restore. + +## 5. Three-layer gate evaluator + +- [ ] 5.1 Define `GateEvaluator` service with `Evaluate(ParsedCommand, Audience, TrustState)` returning per-clause `GateResult` (hard-deny / zone-pass / zone-prompt / verb-pass / verb-prompt / verb-pass-readonly). Verify: unit-tested for each result kind. +- [ ] 5.2a Define `HardDenyDefaults` static class in `Netclaw.Security` with the shipped baseline rules as immutable C# data. Each rule uses the structured DSL (verb + argFlags + firstPath predicates, or rawText escape). Translate the existing `toolConfig.HardDenyPatterns` regex list into structured form during this task; document each translation in the commit message. Verify: defaults round-trip through the matcher; every existing hard-deny test still passes after rule translation. +- [ ] 5.2b Implement `HardDenyOverridesLoader` reading `~/.netclaw/config/hard-deny-overrides.json` (additive only; rejects rules attempting to disable shipped defaults). Verify: loader unit-tested for valid additions, malformed JSON, and shadow-attempt rejection. +- [ ] 5.2c Implement `HardDenyDoctorCheck` verifying shipped defaults are present in the loaded final ruleset at startup. Refuse daemon start with loud error if any default is missing. Verify: doctor check unit-tested. +- [ ] 5.2 Implement Layer 1 hard-deny check operating on parsed clauses. For each clause, evaluate the structured rules first; for `escapeHatch: true` rawText rules, normalize the clause back to a string and apply the regex. Verify: hard-deny unit tests pass against parsed-clause inputs and rawText fallback. +- [ ] 5.3 Implement Layer 2 zone gate: extract paths per clause from AST (path args + cd-in-compound attribution + redirect targets); check each against the union (audience baseline ∪ persisted zones ∪ session zones ∪ session_dir); collect untrusted paths into a single batched prompt. Verify: per-path zone evaluation unit-tested with ≥30 cases. +- [ ] 5.4 Implement Layer 3 verb-pattern gate: extract verb chain per clause via BashArity; check against persisted patterns ∪ session patterns. Read-only verb auto-pass conditional on all clause paths being inside trusted zones. Verify: gate decision unit-tested for read-only-in-zone, read-only-outside-zone, mutating-with-pattern, mutating-without-pattern. +- [ ] 5.5 Define `IToolApprovalMatcher` v3 interface accepting `ParsedCommand` + `TrustState` and returning per-clause `GateResult`. Implement `ShellApprovalMatcher` for shell tools; default tool-name matcher for non-shell tools. Verify: matcher unit-tested. +- [ ] 5.6 Wire `GateEvaluator` into `ToolAccessPolicy`. Replace v2 matcher integration. Verify: `ToolAccessPolicy` unit tests updated and passing. +- [ ] 5.7 Implement parser anomaly safe-fail: when `IShellParser.Parse` throws or returns an unparseable AST, route to safe-fail (hard-deny still consults rawText fallback; zone gate prompts as if raw command operates on one untrusted path; verb gate offers only Once/Deny). Verify: safe-fail unit tests covering parse exceptions, unparseable flag, and partial-AST cases. + +## 6. Per-call ToolApprovalWorkflow + +- [ ] 6.1 Define `ToolApprovalWorkflow` value type on `LlmSessionActor` with fields `Call`, `Paths`, `ZoneState`, `VerbState`, `Stage`. Verify: type compiles with minimal allocation footprint. +- [ ] 6.2 Implement workflow state machine `Start → ZoneGate → VerbGate → Complete`. Issue zone prompt when `Paths` contain untrusted entries; advance to verb gate after response; issue verb prompt when verb-pattern gate decides to prompt. Verify: state-machine unit tests cover all transition paths. +- [ ] 6.3 Implement workflow termination on `Deny` at any stage; on `Approved` at `Complete` stage; on `TimedOut` at any stage. Verify: termination unit tests for each cause. +- [ ] 6.4 Wire workflow into `LlmSessionActor` per-call dispatch. Replace v2 single-prompt code path. Verify: integration test exercises a call requiring both prompts. +- [ ] 6.5 Confirm watchdog pause/resume spans the entire workflow (across both prompts). Verify: watchdog test asserts no pre-emption between prompts. +- [ ] 6.6 Apply scope handling: `Once` runs no persistence; `Session` calls `AddSessionZone`/`AddSessionVerbPattern`; `Always` calls `ToolApprovalStore.AddZone`/`AddVerbPattern`. Verify: scope-handling unit tests. + +## 7. Slack adapter + +- [ ] 7.1 Update `SlackApprovalBlockBuilder` to render two distinct prompt shapes by `Kind`: `approval_zone` (header asks about paths; row contains `Once / Session / Trust / Deny`) and `approval_verb` (header asks about verb pattern; row contains `Once / Session / Always / Deny`). Verify: builder unit tests assert block structure for each Kind. +- [ ] 7.2 Implement label truncation when path/pattern exceeds 76-character button-text cap. Full value remains in body. Verify: truncation unit test for ≥10 long inputs. +- [ ] 7.3 Update Slack interaction handler to route response by `Kind` to the correct `ToolApprovalWorkflow` stage. Verify: handler test routes zone vs verb responses correctly. +- [ ] 7.4 Update resolution message rendering: `Saved zone: ...`, `Saved verb: ...`, `Approved (no save)`, `Denied`. Verify: resolution-line unit tests. +- [ ] 7.5 Update Slack sample fixtures and snapshot tests for the new prompt shapes. Verify: snapshot tests pass after baseline update. + +## 8. Discord adapter + +- [ ] 8.1 Update `DiscordApprovalPromptBuilder` to mirror Slack's two-prompt-shape rendering with Discord button styles. Verify: builder unit tests for each Kind. +- [ ] 8.2 Implement label truncation at 80-character cap. Verify: truncation test. +- [ ] 8.3 Update Discord interaction handler for `Kind`-based routing. Verify: handler test. +- [ ] 8.4 Update resolution message rendering for Discord. Verify: resolution-line tests. + +## 9. CLI / TUI + +- [ ] 9.1 Update `netclaw approvals` TUI to surface two tabs `[Z]ones` and `[V]erbs` per audience. Verify: TUI integration test renders both tabs. +- [ ] 9.2 Update `netclaw approvals list` (non-interactive) to print both stores per audience. Verify: CLI snapshot test. +- [ ] 9.3 Update `netclaw approvals revoke` to accept a glob and an axis (`--zone` or `--verb`). Verify: CLI command unit test. +- [ ] 9.4 Update `netclaw approvals trust-verb ` to accept the new glob format (`git push *`, `dotnet test *`). Verify: CLI command test. +- [ ] 9.5 Add `netclaw approvals trust-zone ` for adding a zone via CLI (matching the prompt's `Trust this directory` action). Verify: CLI command test. +- [ ] 9.6 Update `netclaw approvals --help` text with new schema and commands. Verify: help text snapshot test. + +## 10. Deletions + +- [ ] 10.1 Delete `src/Netclaw.Actors/Tools/SetWorkingDirectoryTool.cs` and remove all references from tool registries, audience profiles, tool exposure lists. Verify: `grep -r SetWorkingDirectory src/` returns zero matches outside this change's spec deltas. +- [ ] 10.2 Delete `WorkingContext.ProjectDirectory` field and all reads/writes. Update `WorkingContext.cs`, `SessionSnapshot` serialization, `WorkingContext.ToContextBlock()`. Verify: `grep -r ProjectDirectory src/` returns zero matches outside spec deltas. +- [ ] 10.3 Delete the `[project-instructions]` slot from `SystemPromptAssembler.Assemble()` and the `FileSystemPromptProvider` project handling. Verify: system prompt snapshot tests updated; no project-content slot remains. +- [ ] 10.4 Delete the daemon-side identity-file-loading code path (the `.netclaw/AGENTS.md` / `AGENTS.md` / `CLAUDE.md` / `CONTEXT.md` lookup driven by ProjectDirectory). Verify: code path tests removed; agent-side discipline test added in §11. +- [ ] 10.5 Delete `ScopedShellSafeVerbPolicy` (replaced by inline gate evaluation). Verify: no callers remain; safe-verbs file load logic survives but is consumed directly by the new gate evaluator. +- [ ] 10.6 Update `WorkingContext.ResolveShellCwd` to remove `ProjectDirectory` from the resolution chain (now: `args.WorkingDirectory → SessionDirectory`). Verify: cwd-resolution unit tests pass. +- [ ] 10.7 Delete the `ShellTool` failure-path hint emission for `set_working_directory`. Verify: hint emission tests removed. +- [ ] 10.8 Extend `ToolPathPolicy` write-deny and shell-deny lists in `src/Netclaw.Daemon/Program.cs:609-633` to include `paths.ConfigDirectory`. Single-line addition; uses existing infrastructure. Verify: integration test attempts agent file_write to `~/.netclaw/config/tool-approvals.json`, asserts hard-deny; attempts agent shell_execute redirecting to `~/.netclaw/config/netclaw.json`, asserts hard-deny; attempts via symlink, asserts symlink-resolved deny. + +## 11. Agent guidance + +- [ ] 11.1 Update `src/Netclaw.Configuration/Resources/AGENTS.md` with explicit project-context lookup discipline section (".netclaw/AGENTS.md → AGENTS.md → CLAUDE.md → CONTEXT.md, read once per project per session via file_read; don't re-read on later turns"). Verify: AGENTS.md content review; eval suite case asserts agent reads project context on first project entry. +- [ ] 11.2 Remove any text in `Resources/AGENTS.md` referencing `set_working_directory` as the gesture for declaring project scope. Verify: `grep set_working_directory src/Netclaw.Configuration/Resources/` returns zero. +- [ ] 11.3 Add an "Approval gates" section to `Resources/AGENTS.md` describing the two-gate model (zone + verb), the `Once / Session / Always / Deny` button row, and how to phrase rationale for prompts the user will see. Also document the scripts-as-units-of-approval pattern: for multi-step operations or anything that will be re-run later (scheduled tasks, webhook handlers), write the script into the project workspace and propose its execution as a single approval. Workspace already trusted → one user approval covers the script forever; reminders/webhooks fire `bash /