From 8168913b83c7c8b8ec794ffcdfa302da1a79927e Mon Sep 17 00:00:00 2001 From: Daniel Wise Date: Tue, 3 Mar 2026 05:22:02 -0800 Subject: [PATCH 1/2] feat(runtime): register AGENTS commands as policy-gated runtime actions Add AGENTS command registration, deterministic duplicate handling, policy-gated execution/approval flow, structured observability payloads, tests, docs, and archive synced OpenSpec change. --- docs/agents-runtime-actions.md | 32 +++ .../agents-md-runtime-actions/tasks.md | 24 -- .../.openspec.yaml | 0 .../design.md | 0 .../proposal.md | 0 .../specs/agents-runtime-actions/spec.md | 0 .../tasks.md | 24 ++ openspec/specs/agents-runtime-actions/spec.md | 49 ++++ src/cli/app.tsx | 86 +++++- src/cli/commands/chat.tsx | 7 +- src/cli/runtime.ts | 11 +- src/config/agents-loader.ts | 46 +++- src/observability/transcripts.ts | 1 + src/tools/exec-command.ts | 40 ++- src/tools/registry.ts | 245 +++++++++++++++++- tests/agents-loader.test.ts | 19 ++ tests/tool-registry.test.ts | 167 ++++++++++++ 17 files changed, 708 insertions(+), 43 deletions(-) create mode 100644 docs/agents-runtime-actions.md delete mode 100644 openspec/changes/agents-md-runtime-actions/tasks.md rename openspec/changes/{agents-md-runtime-actions => archive/2026-03-03-agents-md-runtime-actions}/.openspec.yaml (100%) rename openspec/changes/{agents-md-runtime-actions => archive/2026-03-03-agents-md-runtime-actions}/design.md (100%) rename openspec/changes/{agents-md-runtime-actions => archive/2026-03-03-agents-md-runtime-actions}/proposal.md (100%) rename openspec/changes/{agents-md-runtime-actions => archive/2026-03-03-agents-md-runtime-actions}/specs/agents-runtime-actions/spec.md (100%) create mode 100644 openspec/changes/archive/2026-03-03-agents-md-runtime-actions/tasks.md create mode 100644 openspec/specs/agents-runtime-actions/spec.md create mode 100644 tests/tool-registry.test.ts diff --git a/docs/agents-runtime-actions.md b/docs/agents-runtime-actions.md new file mode 100644 index 0000000..0504145 --- /dev/null +++ b/docs/agents-runtime-actions.md @@ -0,0 +1,32 @@ +# AGENTS Runtime Actions + +`AGENTS.md` command entries are now registered as invokable runtime tools. + +## Command Registration + +- Every command in `## Commands` is registered as `agents:`. +- If no built-in tool already uses the same name, an unprefixed alias (``) is also registered. +- Duplicate command names in `AGENTS.md` are resolved deterministically: first definition wins and a warning is emitted. + +## Resolution Rules + +- `agents:` always resolves to AGENTS command lookup. +- Unknown `agents:` references return a deterministic not-found result with no shell execution. +- If `` collides with an existing built-in tool, built-in lookup wins and AGENTS command remains available as `agents:`. + +## Execution and Policy + +- AGENTS commands execute through the same command executor as `exec-command`. +- Side effects are classified from the resolved shell command (`read`, `write`, `destructive`, `network`). +- Policy checks run before process execution. +- Interactive mode requires approval for mutating commands. +- Automation mode preserves write allowlist behavior. + +## Observability Payload + +Tool results and runtime observability include structured fields: + +- `actionName` +- `resolvedCommand` +- `policyOutcome` +- `executionSummary` diff --git a/openspec/changes/agents-md-runtime-actions/tasks.md b/openspec/changes/agents-md-runtime-actions/tasks.md deleted file mode 100644 index 54ae185..0000000 --- a/openspec/changes/agents-md-runtime-actions/tasks.md +++ /dev/null @@ -1,24 +0,0 @@ -## 1. Command Registration - -- [ ] 1.1 Extend AGENTS config loading/validation to detect duplicate command names and emit deterministic warnings. -- [ ] 1.2 Add runtime registration logic that converts `AGENTS.md` command entries into invokable runtime action descriptors. -- [ ] 1.3 Define and implement command lookup/identifier resolution behavior (including unknown-command error responses). - -## 2. Execution and Policy Integration - -- [ ] 2.1 Implement an AGENTS command action adapter that executes through the existing command execution path. -- [ ] 2.2 Ensure AGENTS command actions classify side effects and invoke policy decisions before process execution. -- [ ] 2.3 Enforce interactive approval for mutating AGENTS commands and preserve automation allowlist behavior. - -## 3. Observability and UX - -- [ ] 3.1 Extend trace/transcript payloads to include AGENTS action name, resolved command, policy outcome, and execution summary. -- [ ] 3.2 Add consistent user-facing summaries for success, denial, and not-found outcomes of AGENTS command actions. -- [ ] 3.3 Document runtime command behavior and resolution rules in user/developer docs. - -## 4. Verification - -- [ ] 4.1 Add/update unit tests for AGENTS command parsing, duplicate-name handling, and registration. -- [ ] 4.2 Add runtime/policy tests for approval-required, allowlisted automation execution, and blocked execution scenarios. -- [ ] 4.3 Add observability/result-shape tests for successful execution, policy denial, and unknown command references. -- [ ] 4.4 Run `pnpm test`, `pnpm typecheck`, and `pnpm lint` to verify implementation stability. diff --git a/openspec/changes/agents-md-runtime-actions/.openspec.yaml b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/.openspec.yaml similarity index 100% rename from openspec/changes/agents-md-runtime-actions/.openspec.yaml rename to openspec/changes/archive/2026-03-03-agents-md-runtime-actions/.openspec.yaml diff --git a/openspec/changes/agents-md-runtime-actions/design.md b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/design.md similarity index 100% rename from openspec/changes/agents-md-runtime-actions/design.md rename to openspec/changes/archive/2026-03-03-agents-md-runtime-actions/design.md diff --git a/openspec/changes/agents-md-runtime-actions/proposal.md b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/proposal.md similarity index 100% rename from openspec/changes/agents-md-runtime-actions/proposal.md rename to openspec/changes/archive/2026-03-03-agents-md-runtime-actions/proposal.md diff --git a/openspec/changes/agents-md-runtime-actions/specs/agents-runtime-actions/spec.md b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/specs/agents-runtime-actions/spec.md similarity index 100% rename from openspec/changes/agents-md-runtime-actions/specs/agents-runtime-actions/spec.md rename to openspec/changes/archive/2026-03-03-agents-md-runtime-actions/specs/agents-runtime-actions/spec.md diff --git a/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/tasks.md b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/tasks.md new file mode 100644 index 0000000..a5ce74f --- /dev/null +++ b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/tasks.md @@ -0,0 +1,24 @@ +## 1. Command Registration + +- [x] 1.1 Extend AGENTS config loading/validation to detect duplicate command names and emit deterministic warnings. +- [x] 1.2 Add runtime registration logic that converts `AGENTS.md` command entries into invokable runtime action descriptors. +- [x] 1.3 Define and implement command lookup/identifier resolution behavior (including unknown-command error responses). + +## 2. Execution and Policy Integration + +- [x] 2.1 Implement an AGENTS command action adapter that executes through the existing command execution path. +- [x] 2.2 Ensure AGENTS command actions classify side effects and invoke policy decisions before process execution. +- [x] 2.3 Enforce interactive approval for mutating AGENTS commands and preserve automation allowlist behavior. + +## 3. Observability and UX + +- [x] 3.1 Extend trace/transcript payloads to include AGENTS action name, resolved command, policy outcome, and execution summary. +- [x] 3.2 Add consistent user-facing summaries for success, denial, and not-found outcomes of AGENTS command actions. +- [x] 3.3 Document runtime command behavior and resolution rules in user/developer docs. + +## 4. Verification + +- [x] 4.1 Add/update unit tests for AGENTS command parsing, duplicate-name handling, and registration. +- [x] 4.2 Add runtime/policy tests for approval-required, allowlisted automation execution, and blocked execution scenarios. +- [x] 4.3 Add observability/result-shape tests for successful execution, policy denial, and unknown command references. +- [x] 4.4 Run `pnpm test`, `pnpm typecheck`, and `pnpm lint` to verify implementation stability. diff --git a/openspec/specs/agents-runtime-actions/spec.md b/openspec/specs/agents-runtime-actions/spec.md new file mode 100644 index 0000000..f5732ed --- /dev/null +++ b/openspec/specs/agents-runtime-actions/spec.md @@ -0,0 +1,49 @@ +# agents-runtime-actions Specification + +## Purpose +TBD - created by archiving change agents-md-runtime-actions. Update Purpose after archive. +## Requirements +### Requirement: Runtime SHALL Register AGENTS Commands as Invokable Actions +The runtime SHALL load command entries from `AGENTS.md` and register each valid command as an invokable runtime action before agent execution begins. + +#### Scenario: Commands available after config load +- **WHEN** a workspace contains `AGENTS.md` with one or more valid command entries +- **THEN** runtime action discovery includes each command entry by its declared name + +#### Scenario: Missing AGENTS file +- **WHEN** no `AGENTS.md` file exists in the workspace +- **THEN** runtime action discovery proceeds without AGENTS command actions and without fatal error + +### Requirement: Runtime SHALL Execute AGENTS Commands Through Policy-Gated Command Execution +The system SHALL route AGENTS command actions through the standard command execution path and SHALL evaluate policy/approval before running shell commands. + +#### Scenario: Mutating command requires approval in interactive mode +- **WHEN** an AGENTS command action resolves to a mutating shell command and mode is interactive +- **THEN** the policy engine returns an approval-required decision before command execution + +#### Scenario: Automation allowlist permits safe write +- **WHEN** an AGENTS command action resolves to a write command in automation mode and policy allowlist explicitly permits it +- **THEN** the command is executed without additional interactive approval + +### Requirement: Runtime SHALL Produce Structured Results for AGENTS Command Actions +For every AGENTS command action execution attempt, the runtime SHALL produce structured result data including action name, resolved command, policy decision outcome, and execution summary. + +#### Scenario: Successful execution emits structured summary +- **WHEN** an AGENTS command action executes successfully +- **THEN** trace/transcript records include the AGENTS command name, executed command string, and summarized stdout/stderr outcome + +#### Scenario: Policy-blocked execution emits structured denial +- **WHEN** policy denies an AGENTS command action +- **THEN** trace/transcript records include denial reason and no command process is started + +### Requirement: Runtime SHALL Handle Invalid or Unknown AGENTS Command References Deterministically +The runtime SHALL return a deterministic error when an AGENTS command action reference is unknown, malformed, or conflicts in ways that prevent safe execution. + +#### Scenario: Unknown command name +- **WHEN** the agent requests execution of an AGENTS command name that is not registered +- **THEN** runtime returns a not-found error result with no shell execution + +#### Scenario: Duplicate command names in AGENTS definition +- **WHEN** `AGENTS.md` defines duplicate command names +- **THEN** runtime applies documented resolution behavior consistently and emits a structured warning + diff --git a/src/cli/app.tsx b/src/cli/app.tsx index de980f2..b176592 100644 --- a/src/cli/app.tsx +++ b/src/cli/app.tsx @@ -3,6 +3,7 @@ import TextInput from 'ink-text-input'; import { useMemo, useRef, useState } from 'react'; import type { AgentOrchestrator } from '../agent/orchestrator'; import type { TraceStore } from '../observability/traces'; +import type { TranscriptStore } from '../observability/transcripts'; import type { ToolRegistry } from '../tools/registry'; import type { ToolInvocation, ToolResult } from '../tools/schemas'; import { @@ -20,6 +21,7 @@ type AppProps = { orchestrator: AgentOrchestrator; tools: ToolRegistry; traces: TraceStore; + transcripts: TranscriptStore; }; type PendingApproval = { @@ -46,7 +48,7 @@ const APPROVAL_COMMANDS: SlashCommand[] = [ { command: '/help', description: 'Show available slash commands' }, ]; -export function ChatApp({ orchestrator, tools, traces }: AppProps) { +export function ChatApp({ orchestrator, tools, traces, transcripts }: AppProps) { const [value, setValue] = useState(''); const [output, setOutput] = useState(''); const [busy, setBusy] = useState(false); @@ -118,8 +120,11 @@ export function ChatApp({ orchestrator, tools, traces }: AppProps) { } const invocation = checkpoint.toolPlan[index]; - const sensitive = isSensitiveAction(invocation); - if (sensitive) { + const policyResult = await tools.invoke(invocation, { mode: 'interactive' }); + const policyOutcome = getPolicyOutcome(policyResult); + + let result = policyResult; + if (policyOutcome?.requiresApproval) { if (!(await transitionPhase('awaiting_approval'))) { setOutput('Error: invalid lifecycle transition while awaiting approval.'); return; @@ -154,13 +159,31 @@ export function ChatApp({ orchestrator, tools, traces }: AppProps) { setOutput('Error: invalid lifecycle transition while re-entering execution.'); return; } + + result = await tools.invoke(invocation, { + mode: 'interactive', + approvalGranted: true, + }); } - const result = await tools.invoke(invocation); assistantMessage += `\n${formatToolResult(result)}`; + const observabilityPayload = buildExecutionPayload(invocation, result); + await traces.write({ + timestamp: new Date().toISOString(), + type: 'tool.execution', + sessionId: SESSION_ID, + payload: observabilityPayload, + }); + await transcripts.write({ + timestamp: new Date().toISOString(), + sessionId: SESSION_ID, + role: 'system', + text: formatToolResult(result), + payload: observabilityPayload, + }); completed.add(index); - if (sensitive) { + if (policyOutcome?.requiresApproval || isSensitiveAction(invocation)) { await saveCheckpoint({ ...checkpoint, phase: lifecycleRef.current.getPhase(), @@ -371,6 +394,59 @@ function formatToolResult(result: ToolResult): string { return `Tool ${result.tool} failed: ${result.summary}${result.stderr ? ` (${result.stderr.trim()})` : ''}`; } +function getPolicyOutcome(result: ToolResult): { + allowed: boolean; + requiresApproval: boolean; + reason: string; + sideEffect: ToolInvocation['sideEffect']; +} | null { + const candidate = result.payload.policyOutcome; + if (!candidate || typeof candidate !== 'object') { + return null; + } + const policyOutcome = candidate as Record; + if ( + typeof policyOutcome.allowed !== 'boolean' || + typeof policyOutcome.requiresApproval !== 'boolean' || + typeof policyOutcome.reason !== 'string' || + (policyOutcome.sideEffect !== 'read' && + policyOutcome.sideEffect !== 'write' && + policyOutcome.sideEffect !== 'destructive' && + policyOutcome.sideEffect !== 'network') + ) { + return null; + } + return { + allowed: policyOutcome.allowed, + requiresApproval: policyOutcome.requiresApproval, + reason: policyOutcome.reason, + sideEffect: policyOutcome.sideEffect, + }; +} + +function buildExecutionPayload( + invocation: ToolInvocation, + result: ToolResult +): Record { + const policyOutcome = getPolicyOutcome(result); + const resolvedCommand = + typeof result.payload.resolvedCommand === 'string' + ? result.payload.resolvedCommand + : typeof result.payload.command === 'string' + ? result.payload.command + : null; + return { + actionName: + typeof result.payload.actionName === 'string' ? result.payload.actionName : invocation.tool, + tool: invocation.tool, + resolvedCommand, + policyOutcome, + executionSummary: result.summary, + ok: result.ok, + exitCode: result.exitCode, + }; +} + function formatHelpText(commands: SlashCommand[]): string { const lines = ['Available slash commands:']; for (const command of commands) { diff --git a/src/cli/commands/chat.tsx b/src/cli/commands/chat.tsx index 933939c..41e30d3 100644 --- a/src/cli/commands/chat.tsx +++ b/src/cli/commands/chat.tsx @@ -45,6 +45,11 @@ export async function runChatCommand(prompt?: string): Promise { } render( - + ); } diff --git a/src/cli/runtime.ts b/src/cli/runtime.ts index 4cff230..d1528a6 100644 --- a/src/cli/runtime.ts +++ b/src/cli/runtime.ts @@ -1,4 +1,5 @@ import { AgentOrchestrator } from '../agent/orchestrator'; +import { loadAgentsConfig } from '../config/agents-loader'; import { createDb } from '../db/client'; import { runMigrations } from '../db/migrate'; import { OptionalOtelExporter } from '../observability/otel'; @@ -12,6 +13,7 @@ import { ToolRegistry } from '../tools/registry'; export async function createRuntime() { await runMigrations(); const db = await createDb(); + const agentsConfig = await loadAgentsConfig(process.cwd()); const provider = createProviderAdapter(detectProvider()); const policyEngine = new DefaultPolicyEngine(createDefaultApprovalPolicy()); const orchestrator = new AgentOrchestrator({ @@ -28,6 +30,13 @@ export async function createRuntime() { traces: new TraceStore(), transcripts: new TranscriptStore(), otel: new OptionalOtelExporter(), - tools: new ToolRegistry(), + tools: new ToolRegistry({ + policyEngine, + defaultMode: 'interactive', + agentsConfig, + onWarning: (message) => { + console.warn(`[agents-config] ${message}`); + }, + }), }; } diff --git a/src/config/agents-loader.ts b/src/config/agents-loader.ts index 0c49276..fa0a211 100644 --- a/src/config/agents-loader.ts +++ b/src/config/agents-loader.ts @@ -6,9 +6,18 @@ export type AgentCommand = { command: string; }; +export type AgentsConfigWarning = { + type: 'duplicate-command'; + commandName: string; + keptIndex: number; + ignoredIndexes: number[]; + message: string; +}; + export type AgentsConfig = { commands: AgentCommand[]; hooks: Array<{ event: string; command: string }>; + warnings: AgentsConfigWarning[]; }; function parseSectionLines(content: string, heading: string): string[] { @@ -35,21 +44,52 @@ export async function loadAgentsConfig(cwd: string): Promise { const path = join(cwd, 'AGENTS.md'); const content = await readFile(path, 'utf8').catch(() => ''); if (!content) { - return { commands: [], hooks: [] }; + return { commands: [], hooks: [], warnings: [] }; } const commandLines = parseSectionLines(content, '## Commands'); const hookLines = parseSectionLines(content, '## Hooks'); - const commands = commandLines + const parsedCommands = commandLines .map((line) => line.match(/^[-*]\s*`?([^:`]+)`?\s*:\s*(.+)$/)) .filter((match): match is RegExpMatchArray => Boolean(match)) .map((match) => ({ name: match[1].trim(), command: match[2].trim() })); + const warnings: AgentsConfigWarning[] = []; + const commandNameToFirstIndex = new Map(); + const duplicateIndexes = new Map(); + for (const [index, command] of parsedCommands.entries()) { + const name = command.name; + const existing = commandNameToFirstIndex.get(name); + if (existing === undefined) { + commandNameToFirstIndex.set(name, index); + continue; + } + const duplicates = duplicateIndexes.get(name) ?? []; + duplicates.push(index); + duplicateIndexes.set(name, duplicates); + } + + const commands = parsedCommands.filter((command, index) => { + const firstIndex = commandNameToFirstIndex.get(command.name); + return firstIndex === index; + }); + + for (const [commandName, ignoredIndexes] of duplicateIndexes.entries()) { + const keptIndex = commandNameToFirstIndex.get(commandName) ?? 0; + warnings.push({ + type: 'duplicate-command', + commandName, + keptIndex, + ignoredIndexes, + message: `Duplicate AGENTS command "${commandName}" found at indexes ${ignoredIndexes.join(', ')}; using index ${keptIndex}.`, + }); + } + const hooks = hookLines .map((line) => line.match(/^[-*]\s*`?([^:`]+)`?\s*:\s*(.+)$/)) .filter((match): match is RegExpMatchArray => Boolean(match)) .map((match) => ({ event: match[1].trim(), command: match[2].trim() })); - return { commands, hooks }; + return { commands, hooks, warnings }; } diff --git a/src/observability/transcripts.ts b/src/observability/transcripts.ts index ec364e1..fc4c5d5 100644 --- a/src/observability/transcripts.ts +++ b/src/observability/transcripts.ts @@ -7,6 +7,7 @@ export type TranscriptEntry = { sessionId: string; role: 'user' | 'assistant' | 'system'; text: string; + payload?: Record; }; export class TranscriptStore { diff --git a/src/tools/exec-command.ts b/src/tools/exec-command.ts index 2760a9f..e503285 100644 --- a/src/tools/exec-command.ts +++ b/src/tools/exec-command.ts @@ -1,12 +1,48 @@ import { spawn } from 'node:child_process'; -import type { ToolResult } from './schemas'; +import type { ToolInvocation, ToolResult } from './schemas'; export type ExecCommandInput = { command: string; cwd: string; + toolName?: string; timeoutMs?: number; }; +const DESTRUCTIVE_PATTERNS = [ + /\brm\s+-rf\b/i, + /\bmkfs\b/i, + /\bdd\s+if=/i, + /\bshutdown\b/i, + /\breboot\b/i, + /\bchmod\b/i, +]; + +const WRITE_PATTERNS = [ + />/, + /\btee\b/i, + /\bmv\b/i, + /\bcp\b/i, + /\btouch\b/i, + /\bmkdir\b/i, + /\bpnpm\s+lint(?::\w+)?(?:\s+--\w+)?\s+--write\b/i, + /\bgit\s+add\b/i, +]; + +const NETWORK_PATTERNS = [/\bcurl\b/i, /\bwget\b/i, /\bnpm\s+install\b/i, /\bpnpm\s+add\b/i]; + +export function classifyCommandSideEffect(command: string): ToolInvocation['sideEffect'] { + if (DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) { + return 'destructive'; + } + if (WRITE_PATTERNS.some((pattern) => pattern.test(command))) { + return 'write'; + } + if (NETWORK_PATTERNS.some((pattern) => pattern.test(command))) { + return 'network'; + } + return 'read'; +} + export async function executeCommand(input: ExecCommandInput): Promise { const timeoutMs = input.timeoutMs ?? 120_000; @@ -37,7 +73,7 @@ export async function executeCommand(input: ExecCommandInput): Promise { clearTimeout(timer); resolve({ - tool: 'exec-command', + tool: input.toolName ?? 'exec-command', ok: code === 0 && !killedByTimeout, summary: killedByTimeout ? 'Command timed out' diff --git a/src/tools/registry.ts b/src/tools/registry.ts index 030617a..6cda5ea 100644 --- a/src/tools/registry.ts +++ b/src/tools/registry.ts @@ -1,29 +1,152 @@ import { z } from 'zod'; -import { executeCommand } from './exec-command'; +import type { AgentsConfig } from '../config/agents-loader'; +import type { DefaultPolicyEngine } from '../policy/engine'; +import type { ApprovalDecision } from '../policy/schemas'; +import { classifyCommandSideEffect, executeCommand } from './exec-command'; import { ToolInvocationSchema, type ToolResult } from './schemas'; -export type ToolHandler = (params: Record) => Promise; +export type ToolHandlerContext = { + invocationTool: string; + sideEffect: 'read' | 'write' | 'destructive' | 'network'; + mode: 'interactive' | 'automation'; + approvalGranted: boolean; +}; + +export type ToolHandler = ( + params: Record, + context: ToolHandlerContext +) => Promise; + +export type ToolInvokeOptions = { + mode?: 'interactive' | 'automation'; + approvalGranted?: boolean; +}; + +export type ToolRegistryOptions = { + policyEngine?: DefaultPolicyEngine; + defaultMode?: 'interactive' | 'automation'; + agentsConfig?: AgentsConfig; + onWarning?: (message: string) => void; +}; + +function deniedResult(input: { + tool: string; + actionName?: string; + resolvedCommand: string; + decision: ApprovalDecision; + summary: string; +}): ToolResult { + return { + tool: input.tool, + ok: false, + summary: input.summary, + payload: { + actionName: input.actionName ?? input.tool, + resolvedCommand: input.resolvedCommand, + policyOutcome: input.decision, + executionSummary: input.summary, + }, + stdout: '', + stderr: input.decision.reason, + exitCode: 126, + }; +} export class ToolRegistry { private handlers = new Map(); + private policyEngine?: DefaultPolicyEngine; + private defaultMode: 'interactive' | 'automation'; - constructor() { - this.register('exec-command', async (params) => { + constructor(options: ToolRegistryOptions = {}) { + this.policyEngine = options.policyEngine; + this.defaultMode = options.defaultMode ?? 'interactive'; + + this.register('exec-command', async (params, context) => { const command = z.string().parse(params.command); const cwd = z.string().default(process.cwd()).parse(params.cwd); const timeoutMs = z.number().int().positive().optional().parse(params.timeoutMs); - return executeCommand({ command, cwd, timeoutMs }); + + const decision = this.evaluatePolicy({ + command, + mode: context.mode, + sideEffect: context.sideEffect, + }); + if (decision && !decision.allowed && !decision.requiresApproval) { + return deniedResult({ + tool: context.invocationTool, + resolvedCommand: command, + decision, + summary: `Execution denied by policy: ${decision.reason}`, + }); + } + if (decision?.requiresApproval && !context.approvalGranted) { + return deniedResult({ + tool: context.invocationTool, + resolvedCommand: command, + decision, + summary: 'Approval required before command execution', + }); + } + + const result = await executeCommand({ + command, + cwd, + timeoutMs, + toolName: context.invocationTool, + }); + return { + ...result, + payload: { + ...result.payload, + policyOutcome: + decision ?? + ({ + allowed: true, + requiresApproval: false, + reason: 'Policy engine unavailable', + sideEffect: context.sideEffect, + } satisfies ApprovalDecision), + executionSummary: result.summary, + }, + }; }); + + for (const warning of options.agentsConfig?.warnings ?? []) { + options.onWarning?.(warning.message); + } + + for (const command of options.agentsConfig?.commands ?? []) { + this.registerAgentsCommand(command.name, command.command, options.onWarning); + } } register(name: string, handler: ToolHandler): void { this.handlers.set(name, handler); } - async invoke(raw: unknown): Promise { + async invoke(raw: unknown, options: ToolInvokeOptions = {}): Promise { const invocation = ToolInvocationSchema.parse(raw); const handler = this.handlers.get(invocation.tool); + const mode = options.mode ?? this.defaultMode; + const approvalGranted = options.approvalGranted ?? false; if (!handler) { + if (invocation.tool.startsWith('agents:')) { + const commandName = invocation.tool.slice('agents:'.length); + return { + tool: invocation.tool, + ok: false, + summary: `AGENTS command not found: ${commandName}`, + payload: { + actionName: commandName, + resolvedCommand: null, + policyOutcome: null, + executionSummary: 'Unknown AGENTS command', + }, + stdout: '', + stderr: `Unknown AGENTS command: ${commandName}`, + exitCode: 127, + }; + } return { tool: invocation.tool, ok: false, @@ -35,6 +158,114 @@ export class ToolRegistry { }; } - return handler(invocation.params); + return handler(invocation.params, { + invocationTool: invocation.tool, + sideEffect: invocation.sideEffect, + mode, + approvalGranted, + }); + } + + private registerAgentsCommand( + name: string, + command: string, + onWarning?: (message: string) => void + ): void { + const namespacedTool = `agents:${name}`; + + this.register(namespacedTool, async (params, context) => { + const cwd = z.string().default(process.cwd()).parse(params.cwd); + const timeoutMs = z.number().int().positive().optional().parse(params.timeoutMs); + const resolvedCommand = command; + const inferredSideEffect = classifyCommandSideEffect(resolvedCommand); + const decision = this.evaluatePolicy({ + command: resolvedCommand, + mode: context.mode, + sideEffect: inferredSideEffect, + }); + + if (decision && !decision.allowed && !decision.requiresApproval) { + return deniedResult({ + tool: context.invocationTool, + actionName: name, + resolvedCommand, + decision, + summary: `AGENTS command denied by policy: ${decision.reason}`, + }); + } + + if (decision?.requiresApproval && !context.approvalGranted) { + return deniedResult({ + tool: context.invocationTool, + actionName: name, + resolvedCommand, + decision, + summary: 'Approval required before AGENTS command execution', + }); + } + + const result = await executeCommand({ + command: resolvedCommand, + cwd, + timeoutMs, + toolName: context.invocationTool, + }); + + return { + ...result, + payload: { + ...result.payload, + actionName: name, + resolvedCommand, + policyOutcome: + decision ?? + ({ + allowed: true, + requiresApproval: false, + reason: 'Policy engine unavailable', + sideEffect: inferredSideEffect, + } satisfies ApprovalDecision), + executionSummary: result.summary, + }, + }; + }); + + if (!this.handlers.has(name)) { + this.register( + name, + async (params, context) => + this.handlers.get(namespacedTool)?.(params, context) ?? + Promise.resolve({ + tool: context.invocationTool, + ok: false, + summary: `No handler registered for ${context.invocationTool}`, + payload: {}, + stdout: '', + stderr: `No handler for ${context.invocationTool}`, + exitCode: 127, + }) + ); + return; + } + + onWarning?.( + `Skipping unprefixed AGENTS command alias "${name}" because a tool with that name already exists. Use "${namespacedTool}".` + ); + } + + private evaluatePolicy(input: { + command: string; + mode: 'interactive' | 'automation'; + sideEffect: 'read' | 'write' | 'destructive' | 'network'; + }): ApprovalDecision | null { + if (!this.policyEngine) { + return null; + } + return this.policyEngine.evaluateCommand({ + command: input.command, + cwd: process.cwd(), + mode: input.mode, + sideEffect: input.sideEffect, + }); } } diff --git a/tests/agents-loader.test.ts b/tests/agents-loader.test.ts index a078ea1..16ae05d 100644 --- a/tests/agents-loader.test.ts +++ b/tests/agents-loader.test.ts @@ -17,5 +17,24 @@ describe('loadAgentsConfig', () => { expect(config.commands).toHaveLength(2); expect(config.hooks).toHaveLength(1); expect(config.commands[0].name).toBe('build'); + expect(config.warnings).toEqual([]); + }); + + it('keeps first duplicate command and emits deterministic warning', async () => { + const dir = await mkdtemp(join(tmpdir(), 'dubsbot-agents-dup-')); + await writeFile( + join(dir, 'AGENTS.md'), + `# AGENTS\n\n## Commands\n- test: pnpm test\n- build: pnpm build\n- test: pnpm test:watch\n\n## Hooks\n- file-change: pnpm test\n`, + 'utf8' + ); + + const config = await loadAgentsConfig(dir); + expect(config.commands).toHaveLength(2); + expect(config.commands[0]).toEqual({ name: 'test', command: 'pnpm test' }); + expect(config.warnings).toHaveLength(1); + expect(config.warnings[0].type).toBe('duplicate-command'); + expect(config.warnings[0].commandName).toBe('test'); + expect(config.warnings[0].keptIndex).toBe(0); + expect(config.warnings[0].ignoredIndexes).toEqual([2]); }); }); diff --git a/tests/tool-registry.test.ts b/tests/tool-registry.test.ts new file mode 100644 index 0000000..d686e04 --- /dev/null +++ b/tests/tool-registry.test.ts @@ -0,0 +1,167 @@ +import { describe, expect, it } from 'vitest'; +import { createDefaultApprovalPolicy } from '../src/policy/defaults'; +import { DefaultPolicyEngine } from '../src/policy/engine'; +import { ToolRegistry } from '../src/tools/registry'; + +describe('ToolRegistry AGENTS runtime actions', () => { + it('registers AGENTS commands as invokable actions', async () => { + const registry = new ToolRegistry({ + agentsConfig: { + commands: [{ name: 'test', command: 'printf "ok"' }], + hooks: [], + warnings: [], + }, + }); + + const prefixed = await registry.invoke({ + tool: 'agents:test', + sideEffect: 'read', + params: {}, + }); + const alias = await registry.invoke({ + tool: 'test', + sideEffect: 'read', + params: {}, + }); + + expect(prefixed.ok).toBe(true); + expect(alias.ok).toBe(true); + expect(prefixed.payload.actionName).toBe('test'); + expect(prefixed.payload.resolvedCommand).toBe('printf "ok"'); + }); + + it('returns deterministic not-found response for unknown AGENTS command identifier', async () => { + const registry = new ToolRegistry(); + + const result = await registry.invoke({ + tool: 'agents:missing-command', + sideEffect: 'read', + params: {}, + }); + + expect(result.ok).toBe(false); + expect(result.summary).toContain('AGENTS command not found'); + expect(result.exitCode).toBe(127); + }); + + it('requires interactive approval for mutating AGENTS commands', async () => { + const registry = new ToolRegistry({ + policyEngine: new DefaultPolicyEngine(createDefaultApprovalPolicy()), + defaultMode: 'interactive', + agentsConfig: { + commands: [{ name: 'fix', command: 'echo hi > tmp.txt' }], + hooks: [], + warnings: [], + }, + }); + + const result = await registry.invoke({ + tool: 'agents:fix', + sideEffect: 'read', + params: {}, + }); + + expect(result.ok).toBe(false); + expect(result.summary).toContain('Approval required'); + expect(result.payload.policyOutcome).toMatchObject({ + requiresApproval: true, + sideEffect: 'write', + }); + }); + + it('allows automation write execution when command matches allowlist', async () => { + const registry = new ToolRegistry({ + policyEngine: new DefaultPolicyEngine( + createDefaultApprovalPolicy({ + automationWriteAllowlist: ['echo hi > tmp.txt'], + }) + ), + defaultMode: 'automation', + agentsConfig: { + commands: [{ name: 'fix', command: 'echo hi > tmp.txt' }], + hooks: [], + warnings: [], + }, + }); + + const result = await registry.invoke({ + tool: 'agents:fix', + sideEffect: 'read', + params: {}, + }); + + expect(result.ok).toBe(true); + expect(result.payload.policyOutcome).toMatchObject({ + allowed: true, + requiresApproval: false, + sideEffect: 'write', + }); + }); + + it('returns policy denial and structured payload for blocked commands', async () => { + const registry = new ToolRegistry({ + policyEngine: new DefaultPolicyEngine( + createDefaultApprovalPolicy({ + blockedCommandPatterns: ['dangerous-command'], + }) + ), + agentsConfig: { + commands: [{ name: 'explode', command: 'dangerous-command --now' }], + hooks: [], + warnings: [], + }, + }); + + const result = await registry.invoke({ + tool: 'agents:explode', + sideEffect: 'read', + params: {}, + }); + + expect(result.ok).toBe(false); + expect(result.payload).toMatchObject({ + actionName: 'explode', + resolvedCommand: 'dangerous-command --now', + executionSummary: expect.stringContaining('denied'), + }); + expect(result.payload.policyOutcome).toMatchObject({ + allowed: false, + requiresApproval: false, + reason: expect.stringContaining('blocked pattern'), + }); + }); + + it('captures structured execution summary on success', async () => { + const registry = new ToolRegistry({ + policyEngine: new DefaultPolicyEngine(createDefaultApprovalPolicy()), + agentsConfig: { + commands: [{ name: 'hello', command: 'printf "hello"' }], + hooks: [], + warnings: [], + }, + }); + + const result = await registry.invoke( + { + tool: 'agents:hello', + sideEffect: 'read', + params: {}, + }, + { + mode: 'interactive', + } + ); + + expect(result.ok).toBe(true); + expect(result.payload).toMatchObject({ + actionName: 'hello', + resolvedCommand: 'printf "hello"', + executionSummary: 'Command succeeded', + }); + expect(result.payload.policyOutcome).toMatchObject({ + allowed: true, + requiresApproval: false, + sideEffect: 'read', + }); + }); +}); From 0af4f3d58f5d6f71b9c71dd904296b8909aa7563 Mon Sep 17 00:00:00 2001 From: Daniel Wise Date: Tue, 3 Mar 2026 06:57:00 -0800 Subject: [PATCH 2/2] fix(runtime): harden policy evaluation inputs for command tools - infer and conservatively rank side effects for exec-command policy checks - pass invocation cwd into policy evaluation for exec and AGENTS commands - make write-path test hermetic and complete runtime actions spec purpose --- openspec/specs/agents-runtime-actions/spec.md | 3 +- src/tools/registry.ts | 28 +++++- tests/tool-registry.test.ts | 85 ++++++++++++++++--- 3 files changed, 99 insertions(+), 17 deletions(-) diff --git a/openspec/specs/agents-runtime-actions/spec.md b/openspec/specs/agents-runtime-actions/spec.md index f5732ed..050677a 100644 --- a/openspec/specs/agents-runtime-actions/spec.md +++ b/openspec/specs/agents-runtime-actions/spec.md @@ -1,7 +1,7 @@ # agents-runtime-actions Specification ## Purpose -TBD - created by archiving change agents-md-runtime-actions. Update Purpose after archive. +Define how runtime discovers AGENTS commands from `AGENTS.md`, registers them as invokable actions, evaluates policy and approvals before execution, and emits structured deterministic execution outcomes. ## Requirements ### Requirement: Runtime SHALL Register AGENTS Commands as Invokable Actions The runtime SHALL load command entries from `AGENTS.md` and register each valid command as an invokable runtime action before agent execution begins. @@ -46,4 +46,3 @@ The runtime SHALL return a deterministic error when an AGENTS command action ref #### Scenario: Duplicate command names in AGENTS definition - **WHEN** `AGENTS.md` defines duplicate command names - **THEN** runtime applies documented resolution behavior consistently and emits a structured warning - diff --git a/src/tools/registry.ts b/src/tools/registry.ts index 6cda5ea..dc82af5 100644 --- a/src/tools/registry.ts +++ b/src/tools/registry.ts @@ -29,6 +29,20 @@ export type ToolRegistryOptions = { onWarning?: (message: string) => void; }; +const SIDE_EFFECT_RANK: Record = { + read: 0, + write: 1, + network: 2, + destructive: 3, +}; + +function resolveConservativeSideEffect( + declared: ToolHandlerContext['sideEffect'], + inferred: ToolHandlerContext['sideEffect'] +): ToolHandlerContext['sideEffect'] { + return SIDE_EFFECT_RANK[declared] >= SIDE_EFFECT_RANK[inferred] ? declared : inferred; +} + function deniedResult(input: { tool: string; actionName?: string; @@ -65,11 +79,17 @@ export class ToolRegistry { const command = z.string().parse(params.command); const cwd = z.string().default(process.cwd()).parse(params.cwd); const timeoutMs = z.number().int().positive().optional().parse(params.timeoutMs); + const inferredSideEffect = classifyCommandSideEffect(command); + const effectiveSideEffect = resolveConservativeSideEffect( + context.sideEffect, + inferredSideEffect + ); const decision = this.evaluatePolicy({ command, + cwd, mode: context.mode, - sideEffect: context.sideEffect, + sideEffect: effectiveSideEffect, }); if (decision && !decision.allowed && !decision.requiresApproval) { return deniedResult({ @@ -104,7 +124,7 @@ export class ToolRegistry { allowed: true, requiresApproval: false, reason: 'Policy engine unavailable', - sideEffect: context.sideEffect, + sideEffect: effectiveSideEffect, } satisfies ApprovalDecision), executionSummary: result.summary, }, @@ -180,6 +200,7 @@ export class ToolRegistry { const inferredSideEffect = classifyCommandSideEffect(resolvedCommand); const decision = this.evaluatePolicy({ command: resolvedCommand, + cwd, mode: context.mode, sideEffect: inferredSideEffect, }); @@ -255,6 +276,7 @@ export class ToolRegistry { private evaluatePolicy(input: { command: string; + cwd: string; mode: 'interactive' | 'automation'; sideEffect: 'read' | 'write' | 'destructive' | 'network'; }): ApprovalDecision | null { @@ -263,7 +285,7 @@ export class ToolRegistry { } return this.policyEngine.evaluateCommand({ command: input.command, - cwd: process.cwd(), + cwd: input.cwd, mode: input.mode, sideEffect: input.sideEffect, }); diff --git a/tests/tool-registry.test.ts b/tests/tool-registry.test.ts index d686e04..c94ce80 100644 --- a/tests/tool-registry.test.ts +++ b/tests/tool-registry.test.ts @@ -1,4 +1,7 @@ -import { describe, expect, it } from 'vitest'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; import { createDefaultApprovalPolicy } from '../src/policy/defaults'; import { DefaultPolicyEngine } from '../src/policy/engine'; import { ToolRegistry } from '../src/tools/registry'; @@ -70,30 +73,88 @@ describe('ToolRegistry AGENTS runtime actions', () => { }); it('allows automation write execution when command matches allowlist', async () => { + const cwd = await mkdtemp(join(tmpdir(), 'dubsbot-tool-registry-')); + try { + const registry = new ToolRegistry({ + policyEngine: new DefaultPolicyEngine( + createDefaultApprovalPolicy({ + automationWriteAllowlist: ['echo hi > ./tmp.txt'], + }) + ), + defaultMode: 'automation', + agentsConfig: { + commands: [{ name: 'fix', command: 'echo hi > ./tmp.txt' }], + hooks: [], + warnings: [], + }, + }); + + const result = await registry.invoke({ + tool: 'agents:fix', + sideEffect: 'read', + params: { cwd }, + }); + + expect(result.ok).toBe(true); + expect(result.payload.policyOutcome).toMatchObject({ + allowed: true, + requiresApproval: false, + sideEffect: 'write', + }); + } finally { + await rm(cwd, { recursive: true, force: true }); + } + }); + + it('passes invocation cwd into policy evaluation', async () => { + const cwd = '/tmp/custom-workdir'; + const evaluateCommand = vi.fn().mockReturnValue({ + allowed: false, + requiresApproval: true, + reason: 'Approval required for side effect: write', + sideEffect: 'write', + }); const registry = new ToolRegistry({ - policyEngine: new DefaultPolicyEngine( - createDefaultApprovalPolicy({ - automationWriteAllowlist: ['echo hi > tmp.txt'], - }) - ), + policyEngine: { evaluateCommand } as unknown as DefaultPolicyEngine, defaultMode: 'automation', agentsConfig: { - commands: [{ name: 'fix', command: 'echo hi > tmp.txt' }], + commands: [{ name: 'fix', command: 'echo hi > ./tmp.txt' }], hooks: [], warnings: [], }, }); - const result = await registry.invoke({ + await registry.invoke({ tool: 'agents:fix', sideEffect: 'read', - params: {}, + params: { cwd }, }); - expect(result.ok).toBe(true); + expect(evaluateCommand).toHaveBeenCalledWith( + expect.objectContaining({ + cwd, + }) + ); + }); + + it('uses conservative side effect for exec-command policy checks', async () => { + const registry = new ToolRegistry({ + policyEngine: new DefaultPolicyEngine(createDefaultApprovalPolicy()), + defaultMode: 'interactive', + }); + + const result = await registry.invoke({ + tool: 'exec-command', + sideEffect: 'read', + params: { + command: 'echo hi > ./tmp.txt', + }, + }); + + expect(result.ok).toBe(false); expect(result.payload.policyOutcome).toMatchObject({ - allowed: true, - requiresApproval: false, + allowed: false, + requiresApproval: true, sideEffect: 'write', }); });