From 8168913b83c7c8b8ec794ffcdfa302da1a79927e Mon Sep 17 00:00:00 2001
From: Daniel Wise <io.dwise@gmail.com>
Date: Tue, 3 Mar 2026 05:22:02 -0800
Subject: [PATCH 1/2] feat(runtime): register AGENTS commands as policy-gated
 runtime actions

Add AGENTS command registration, deterministic duplicate handling,
policy-gated execution/approval flow, structured observability payloads,
tests, docs, and archive synced OpenSpec change.
---
 docs/agents-runtime-actions.md                |  32 +++
 .../agents-md-runtime-actions/tasks.md        |  24 --
 .../.openspec.yaml                            |   0
 .../design.md                                 |   0
 .../proposal.md                               |   0
 .../specs/agents-runtime-actions/spec.md      |   0
 .../tasks.md                                  |  24 ++
 openspec/specs/agents-runtime-actions/spec.md |  49 ++++
 src/cli/app.tsx                               |  86 +++++-
 src/cli/commands/chat.tsx                     |   7 +-
 src/cli/runtime.ts                            |  11 +-
 src/config/agents-loader.ts                   |  46 +++-
 src/observability/transcripts.ts              |   1 +
 src/tools/exec-command.ts                     |  40 ++-
 src/tools/registry.ts                         | 245 +++++++++++++++++-
 tests/agents-loader.test.ts                   |  19 ++
 tests/tool-registry.test.ts                   | 167 ++++++++++++
 17 files changed, 708 insertions(+), 43 deletions(-)
 create mode 100644 docs/agents-runtime-actions.md
 delete mode 100644 openspec/changes/agents-md-runtime-actions/tasks.md
 rename openspec/changes/{agents-md-runtime-actions => archive/2026-03-03-agents-md-runtime-actions}/.openspec.yaml (100%)
 rename openspec/changes/{agents-md-runtime-actions => archive/2026-03-03-agents-md-runtime-actions}/design.md (100%)
 rename openspec/changes/{agents-md-runtime-actions => archive/2026-03-03-agents-md-runtime-actions}/proposal.md (100%)
 rename openspec/changes/{agents-md-runtime-actions => archive/2026-03-03-agents-md-runtime-actions}/specs/agents-runtime-actions/spec.md (100%)
 create mode 100644 openspec/changes/archive/2026-03-03-agents-md-runtime-actions/tasks.md
 create mode 100644 openspec/specs/agents-runtime-actions/spec.md
 create mode 100644 tests/tool-registry.test.ts
diff --git a/docs/agents-runtime-actions.md b/docs/agents-runtime-actions.md
new file mode 100644
index 0000000..0504145
--- /dev/null
+++ b/docs/agents-runtime-actions.md
@@ -0,0 +1,32 @@
+# AGENTS Runtime Actions
+
+`AGENTS.md` command entries are now registered as invokable runtime tools.
+
+## Command Registration
+
+- Every command in `## Commands` is registered as `agents:<name>`.
+- If no built-in tool already uses the same name, an unprefixed alias (`<name>`) is also registered.
+- Duplicate command names in `AGENTS.md` are resolved deterministically: first definition wins and a warning is emitted.
+
+## Resolution Rules
+
+- `agents:<name>` always resolves to AGENTS command lookup.
+- Unknown `agents:<name>` references return a deterministic not-found result with no shell execution.
+- If `<name>` collides with an existing built-in tool, built-in lookup wins and AGENTS command remains available as `agents:<name>`.
+
+## Execution and Policy
+
+- AGENTS commands execute through the same command executor as `exec-command`.
+- Side effects are classified from the resolved shell command (`read`, `write`, `destructive`, `network`).
+- Policy checks run before process execution.
+- Interactive mode requires approval for mutating commands.
+- Automation mode preserves write allowlist behavior.
+
+## Observability Payload
+
+Tool results and runtime observability include structured fields:
+
+- `actionName`
+- `resolvedCommand`
+- `policyOutcome`
+- `executionSummary`
diff --git a/openspec/changes/agents-md-runtime-actions/tasks.md b/openspec/changes/agents-md-runtime-actions/tasks.md
deleted file mode 100644
index 54ae185..0000000
--- a/openspec/changes/agents-md-runtime-actions/tasks.md
+++ /dev/null
@@ -1,24 +0,0 @@
-## 1. Command Registration
-
-- [ ] 1.1 Extend AGENTS config loading/validation to detect duplicate command names and emit deterministic warnings.
-- [ ] 1.2 Add runtime registration logic that converts `AGENTS.md` command entries into invokable runtime action descriptors.
-- [ ] 1.3 Define and implement command lookup/identifier resolution behavior (including unknown-command error responses).
-
-## 2. Execution and Policy Integration
-
-- [ ] 2.1 Implement an AGENTS command action adapter that executes through the existing command execution path.
-- [ ] 2.2 Ensure AGENTS command actions classify side effects and invoke policy decisions before process execution.
-- [ ] 2.3 Enforce interactive approval for mutating AGENTS commands and preserve automation allowlist behavior.
-
-## 3. Observability and UX
-
-- [ ] 3.1 Extend trace/transcript payloads to include AGENTS action name, resolved command, policy outcome, and execution summary.
-- [ ] 3.2 Add consistent user-facing summaries for success, denial, and not-found outcomes of AGENTS command actions.
-- [ ] 3.3 Document runtime command behavior and resolution rules in user/developer docs.
-
-## 4. Verification
-
-- [ ] 4.1 Add/update unit tests for AGENTS command parsing, duplicate-name handling, and registration.
-- [ ] 4.2 Add runtime/policy tests for approval-required, allowlisted automation execution, and blocked execution scenarios.
-- [ ] 4.3 Add observability/result-shape tests for successful execution, policy denial, and unknown command references.
-- [ ] 4.4 Run `pnpm test`, `pnpm typecheck`, and `pnpm lint` to verify implementation stability.
diff --git a/openspec/changes/agents-md-runtime-actions/.openspec.yaml b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/.openspec.yaml
similarity index 100%
rename from openspec/changes/agents-md-runtime-actions/.openspec.yaml
rename to openspec/changes/archive/2026-03-03-agents-md-runtime-actions/.openspec.yaml
diff --git a/openspec/changes/agents-md-runtime-actions/design.md b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/design.md
similarity index 100%
rename from openspec/changes/agents-md-runtime-actions/design.md
rename to openspec/changes/archive/2026-03-03-agents-md-runtime-actions/design.md
diff --git a/openspec/changes/agents-md-runtime-actions/proposal.md b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/proposal.md
similarity index 100%
rename from openspec/changes/agents-md-runtime-actions/proposal.md
rename to openspec/changes/archive/2026-03-03-agents-md-runtime-actions/proposal.md
diff --git a/openspec/changes/agents-md-runtime-actions/specs/agents-runtime-actions/spec.md b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/specs/agents-runtime-actions/spec.md
similarity index 100%
rename from openspec/changes/agents-md-runtime-actions/specs/agents-runtime-actions/spec.md
rename to openspec/changes/archive/2026-03-03-agents-md-runtime-actions/specs/agents-runtime-actions/spec.md
diff --git a/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/tasks.md b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/tasks.md
new file mode 100644
index 0000000..a5ce74f
--- /dev/null
+++ b/openspec/changes/archive/2026-03-03-agents-md-runtime-actions/tasks.md
@@ -0,0 +1,24 @@
+## 1. Command Registration
+
+- [x] 1.1 Extend AGENTS config loading/validation to detect duplicate command names and emit deterministic warnings.
+- [x] 1.2 Add runtime registration logic that converts `AGENTS.md` command entries into invokable runtime action descriptors.
+- [x] 1.3 Define and implement command lookup/identifier resolution behavior (including unknown-command error responses).
+
+## 2. Execution and Policy Integration
+
+- [x] 2.1 Implement an AGENTS command action adapter that executes through the existing command execution path.
+- [x] 2.2 Ensure AGENTS command actions classify side effects and invoke policy decisions before process execution.
+- [x] 2.3 Enforce interactive approval for mutating AGENTS commands and preserve automation allowlist behavior.
+
+## 3. Observability and UX
+
+- [x] 3.1 Extend trace/transcript payloads to include AGENTS action name, resolved command, policy outcome, and execution summary.
+- [x] 3.2 Add consistent user-facing summaries for success, denial, and not-found outcomes of AGENTS command actions.
+- [x] 3.3 Document runtime command behavior and resolution rules in user/developer docs.
+
+## 4. Verification
+
+- [x] 4.1 Add/update unit tests for AGENTS command parsing, duplicate-name handling, and registration.
+- [x] 4.2 Add runtime/policy tests for approval-required, allowlisted automation execution, and blocked execution scenarios.
+- [x] 4.3 Add observability/result-shape tests for successful execution, policy denial, and unknown command references.
+- [x] 4.4 Run `pnpm test`, `pnpm typecheck`, and `pnpm lint` to verify implementation stability.
diff --git a/openspec/specs/agents-runtime-actions/spec.md b/openspec/specs/agents-runtime-actions/spec.md
new file mode 100644
index 0000000..f5732ed
--- /dev/null
+++ b/openspec/specs/agents-runtime-actions/spec.md
@@ -0,0 +1,49 @@
+# agents-runtime-actions Specification
+
+## Purpose
+TBD - created by archiving change agents-md-runtime-actions. Update Purpose after archive.
+## Requirements
+### Requirement: Runtime SHALL Register AGENTS Commands as Invokable Actions
+The runtime SHALL load command entries from `AGENTS.md` and register each valid command as an invokable runtime action before agent execution begins.
+
+#### Scenario: Commands available after config load
+- **WHEN** a workspace contains `AGENTS.md` with one or more valid command entries
+- **THEN** runtime action discovery includes each command entry by its declared name
+
+#### Scenario: Missing AGENTS file
+- **WHEN** no `AGENTS.md` file exists in the workspace
+- **THEN** runtime action discovery proceeds without AGENTS command actions and without fatal error
+
+### Requirement: Runtime SHALL Execute AGENTS Commands Through Policy-Gated Command Execution
+The system SHALL route AGENTS command actions through the standard command execution path and SHALL evaluate policy/approval before running shell commands.
+
+#### Scenario: Mutating command requires approval in interactive mode
+- **WHEN** an AGENTS command action resolves to a mutating shell command and mode is interactive
+- **THEN** the policy engine returns an approval-required decision before command execution
+
+#### Scenario: Automation allowlist permits safe write
+- **WHEN** an AGENTS command action resolves to a write command in automation mode and policy allowlist explicitly permits it
+- **THEN** the command is executed without additional interactive approval
+
+### Requirement: Runtime SHALL Produce Structured Results for AGENTS Command Actions
+For every AGENTS command action execution attempt, the runtime SHALL produce structured result data including action name, resolved command, policy decision outcome, and execution summary.
+
+#### Scenario: Successful execution emits structured summary
+- **WHEN** an AGENTS command action executes successfully
+- **THEN** trace/transcript records include the AGENTS command name, executed command string, and summarized stdout/stderr outcome
+
+#### Scenario: Policy-blocked execution emits structured denial
+- **WHEN** policy denies an AGENTS command action
+- **THEN** trace/transcript records include denial reason and no command process is started
+
+### Requirement: Runtime SHALL Handle Invalid or Unknown AGENTS Command References Deterministically
+The runtime SHALL return a deterministic error when an AGENTS command action reference is unknown, malformed, or conflicts in ways that prevent safe execution.
+
+#### Scenario: Unknown command name
+- **WHEN** the agent requests execution of an AGENTS command name that is not registered
+- **THEN** runtime returns a not-found error result with no shell execution
+
+#### Scenario: Duplicate command names in AGENTS definition
+- **WHEN** `AGENTS.md` defines duplicate command names
+- **THEN** runtime applies documented resolution behavior consistently and emits a structured warning
+
diff --git a/src/cli/app.tsx b/src/cli/app.tsx
index de980f2..b176592 100644
--- a/src/cli/app.tsx
+++ b/src/cli/app.tsx
@@ -3,6 +3,7 @@ import TextInput from 'ink-text-input';
 import { useMemo, useRef, useState } from 'react';
 import type { AgentOrchestrator } from '../agent/orchestrator';
 import type { TraceStore } from '../observability/traces';
+import type { TranscriptStore } from '../observability/transcripts';
 import type { ToolRegistry } from '../tools/registry';
 import type { ToolInvocation, ToolResult } from '../tools/schemas';
 import {
@@ -20,6 +21,7 @@ type AppProps = {
   orchestrator: AgentOrchestrator;
   tools: ToolRegistry;
   traces: TraceStore;
+  transcripts: TranscriptStore;
 };
 
 type PendingApproval = {
@@ -46,7 +48,7 @@ const APPROVAL_COMMANDS: SlashCommand[] = [
   { command: '/help', description: 'Show available slash commands' },
 ];
 
-export function ChatApp({ orchestrator, tools, traces }: AppProps) {
+export function ChatApp({ orchestrator, tools, traces, transcripts }: AppProps) {
   const [value, setValue] = useState('');
   const [output, setOutput] = useState<string>('');
   const [busy, setBusy] = useState(false);
@@ -118,8 +120,11 @@ export function ChatApp({ orchestrator, tools, traces }: AppProps) {
       }
 
       const invocation = checkpoint.toolPlan[index];
-      const sensitive = isSensitiveAction(invocation);
-      if (sensitive) {
+      const policyResult = await tools.invoke(invocation, { mode: 'interactive' });
+      const policyOutcome = getPolicyOutcome(policyResult);
+
+      let result = policyResult;
+      if (policyOutcome?.requiresApproval) {
         if (!(await transitionPhase('awaiting_approval'))) {
           setOutput('Error: invalid lifecycle transition while awaiting approval.');
           return;
@@ -154,13 +159,31 @@ export function ChatApp({ orchestrator, tools, traces }: AppProps) {
           setOutput('Error: invalid lifecycle transition while re-entering execution.');
           return;
         }
+
+        result = await tools.invoke(invocation, {
+          mode: 'interactive',
+          approvalGranted: true,
+        });
       }
 
-      const result = await tools.invoke(invocation);
       assistantMessage += `\n${formatToolResult(result)}`;
+      const observabilityPayload = buildExecutionPayload(invocation, result);
+      await traces.write({
+        timestamp: new Date().toISOString(),
+        type: 'tool.execution',
+        sessionId: SESSION_ID,
+        payload: observabilityPayload,
+      });
+      await transcripts.write({
+        timestamp: new Date().toISOString(),
+        sessionId: SESSION_ID,
+        role: 'system',
+        text: formatToolResult(result),
+        payload: observabilityPayload,
+      });
       completed.add(index);
 
-      if (sensitive) {
+      if (policyOutcome?.requiresApproval || isSensitiveAction(invocation)) {
         await saveCheckpoint({
           ...checkpoint,
           phase: lifecycleRef.current.getPhase(),
@@ -371,6 +394,59 @@ function formatToolResult(result: ToolResult): string {
   return `Tool ${result.tool} failed: ${result.summary}${result.stderr ? ` (${result.stderr.trim()})` : ''}`;
 }
 
+function getPolicyOutcome(result: ToolResult): {
+  allowed: boolean;
+  requiresApproval: boolean;
+  reason: string;
+  sideEffect: ToolInvocation['sideEffect'];
+} | null {
+  const candidate = result.payload.policyOutcome;
+  if (!candidate || typeof candidate !== 'object') {
+    return null;
+  }
+  const policyOutcome = candidate as Record<string, unknown>;
+  if (
+    typeof policyOutcome.allowed !== 'boolean' ||
+    typeof policyOutcome.requiresApproval !== 'boolean' ||
+    typeof policyOutcome.reason !== 'string' ||
+    (policyOutcome.sideEffect !== 'read' &&
+      policyOutcome.sideEffect !== 'write' &&
+      policyOutcome.sideEffect !== 'destructive' &&
+      policyOutcome.sideEffect !== 'network')
+  ) {
+    return null;
+  }
+  return {
+    allowed: policyOutcome.allowed,
+    requiresApproval: policyOutcome.requiresApproval,
+    reason: policyOutcome.reason,
+    sideEffect: policyOutcome.sideEffect,
+  };
+}
+
+function buildExecutionPayload(
+  invocation: ToolInvocation,
+  result: ToolResult
+): Record<string, unknown> {
+  const policyOutcome = getPolicyOutcome(result);
+  const resolvedCommand =
+    typeof result.payload.resolvedCommand === 'string'
+      ? result.payload.resolvedCommand
+      : typeof result.payload.command === 'string'
+        ? result.payload.command
+        : null;
+  return {
+    actionName:
+      typeof result.payload.actionName === 'string' ? result.payload.actionName : invocation.tool,
+    tool: invocation.tool,
+    resolvedCommand,
+    policyOutcome,
+    executionSummary: result.summary,
+    ok: result.ok,
+    exitCode: result.exitCode,
+  };
+}
+
 function formatHelpText(commands: SlashCommand[]): string {
   const lines = ['Available slash commands:'];
   for (const command of commands) {
diff --git a/src/cli/commands/chat.tsx b/src/cli/commands/chat.tsx
index 933939c..41e30d3 100644
--- a/src/cli/commands/chat.tsx
+++ b/src/cli/commands/chat.tsx
@@ -45,6 +45,11 @@ export async function runChatCommand(prompt?: string): Promise<void> {
   }
 
   render(
-    <ChatApp orchestrator={runtime.orchestrator} tools={runtime.tools} traces={runtime.traces} />
+    <ChatApp
+      orchestrator={runtime.orchestrator}
+      tools={runtime.tools}
+      traces={runtime.traces}
+      transcripts={runtime.transcripts}
+    />
   );
 }
diff --git a/src/cli/runtime.ts b/src/cli/runtime.ts
index 4cff230..d1528a6 100644
--- a/src/cli/runtime.ts
+++ b/src/cli/runtime.ts
@@ -1,4 +1,5 @@
 import { AgentOrchestrator } from '../agent/orchestrator';
+import { loadAgentsConfig } from '../config/agents-loader';
 import { createDb } from '../db/client';
 import { runMigrations } from '../db/migrate';
 import { OptionalOtelExporter } from '../observability/otel';
@@ -12,6 +13,7 @@ import { ToolRegistry } from '../tools/registry';
 export async function createRuntime() {
   await runMigrations();
   const db = await createDb();
+  const agentsConfig = await loadAgentsConfig(process.cwd());
   const provider = createProviderAdapter(detectProvider());
   const policyEngine = new DefaultPolicyEngine(createDefaultApprovalPolicy());
   const orchestrator = new AgentOrchestrator({
@@ -28,6 +30,13 @@ export async function createRuntime() {
     traces: new TraceStore(),
     transcripts: new TranscriptStore(),
     otel: new OptionalOtelExporter(),
-    tools: new ToolRegistry(),
+    tools: new ToolRegistry({
+      policyEngine,
+      defaultMode: 'interactive',
+      agentsConfig,
+      onWarning: (message) => {
+        console.warn(`[agents-config] ${message}`);
+      },
+    }),
   };
 }
diff --git a/src/config/agents-loader.ts b/src/config/agents-loader.ts
index 0c49276..fa0a211 100644
--- a/src/config/agents-loader.ts
+++ b/src/config/agents-loader.ts
@@ -6,9 +6,18 @@ export type AgentCommand = {
   command: string;
 };
 
+export type AgentsConfigWarning = {
+  type: 'duplicate-command';
+  commandName: string;
+  keptIndex: number;
+  ignoredIndexes: number[];
+  message: string;
+};
+
 export type AgentsConfig = {
   commands: AgentCommand[];
   hooks: Array<{ event: string; command: string }>;
+  warnings: AgentsConfigWarning[];
 };
 
 function parseSectionLines(content: string, heading: string): string[] {
@@ -35,21 +44,52 @@ export async function loadAgentsConfig(cwd: string): Promise<AgentsConfig> {
   const path = join(cwd, 'AGENTS.md');
   const content = await readFile(path, 'utf8').catch(() => '');
   if (!content) {
-    return { commands: [], hooks: [] };
+    return { commands: [], hooks: [], warnings: [] };
   }
 
   const commandLines = parseSectionLines(content, '## Commands');
   const hookLines = parseSectionLines(content, '## Hooks');
 
-  const commands = commandLines
+  const parsedCommands = commandLines
     .map((line) => line.match(/^[-*]\s*`?([^:`]+)`?\s*:\s*(.+)$/))
     .filter((match): match is RegExpMatchArray => Boolean(match))
     .map((match) => ({ name: match[1].trim(), command: match[2].trim() }));
 
+  const warnings: AgentsConfigWarning[] = [];
+  const commandNameToFirstIndex = new Map<string, number>();
+  const duplicateIndexes = new Map<string, number[]>();
+  for (const [index, command] of parsedCommands.entries()) {
+    const name = command.name;
+    const existing = commandNameToFirstIndex.get(name);
+    if (existing === undefined) {
+      commandNameToFirstIndex.set(name, index);
+      continue;
+    }
+    const duplicates = duplicateIndexes.get(name) ?? [];
+    duplicates.push(index);
+    duplicateIndexes.set(name, duplicates);
+  }
+
+  const commands = parsedCommands.filter((command, index) => {
+    const firstIndex = commandNameToFirstIndex.get(command.name);
+    return firstIndex === index;
+  });
+
+  for (const [commandName, ignoredIndexes] of duplicateIndexes.entries()) {
+    const keptIndex = commandNameToFirstIndex.get(commandName) ?? 0;
+    warnings.push({
+      type: 'duplicate-command',
+      commandName,
+      keptIndex,
+      ignoredIndexes,
+      message: `Duplicate AGENTS command "${commandName}" found at indexes ${ignoredIndexes.join(', ')}; using index ${keptIndex}.`,
+    });
+  }
+
   const hooks = hookLines
     .map((line) => line.match(/^[-*]\s*`?([^:`]+)`?\s*:\s*(.+)$/))
     .filter((match): match is RegExpMatchArray => Boolean(match))
     .map((match) => ({ event: match[1].trim(), command: match[2].trim() }));
 
-  return { commands, hooks };
+  return { commands, hooks, warnings };
 }
diff --git a/src/observability/transcripts.ts b/src/observability/transcripts.ts
index ec364e1..fc4c5d5 100644
--- a/src/observability/transcripts.ts
+++ b/src/observability/transcripts.ts
@@ -7,6 +7,7 @@ export type TranscriptEntry = {
   sessionId: string;
   role: 'user' | 'assistant' | 'system';
   text: string;
+  payload?: Record<string, unknown>;
 };
 
 export class TranscriptStore {
diff --git a/src/tools/exec-command.ts b/src/tools/exec-command.ts
index 2760a9f..e503285 100644
--- a/src/tools/exec-command.ts
+++ b/src/tools/exec-command.ts
@@ -1,12 +1,48 @@
 import { spawn } from 'node:child_process';
-import type { ToolResult } from './schemas';
+import type { ToolInvocation, ToolResult } from './schemas';
 
 export type ExecCommandInput = {
   command: string;
   cwd: string;
+  toolName?: string;
   timeoutMs?: number;
 };
 
+const DESTRUCTIVE_PATTERNS = [
+  /\brm\s+-rf\b/i,
+  /\bmkfs\b/i,
+  /\bdd\s+if=/i,
+  /\bshutdown\b/i,
+  /\breboot\b/i,
+  /\bchmod\b/i,
+];
+
+const WRITE_PATTERNS = [
+  />/,
+  /\btee\b/i,
+  /\bmv\b/i,
+  /\bcp\b/i,
+  /\btouch\b/i,
+  /\bmkdir\b/i,
+  /\bpnpm\s+lint(?::\w+)?(?:\s+--\w+)?\s+--write\b/i,
+  /\bgit\s+add\b/i,
+];
+
+const NETWORK_PATTERNS = [/\bcurl\b/i, /\bwget\b/i, /\bnpm\s+install\b/i, /\bpnpm\s+add\b/i];
+
+export function classifyCommandSideEffect(command: string): ToolInvocation['sideEffect'] {
+  if (DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
+    return 'destructive';
+  }
+  if (WRITE_PATTERNS.some((pattern) => pattern.test(command))) {
+    return 'write';
+  }
+  if (NETWORK_PATTERNS.some((pattern) => pattern.test(command))) {
+    return 'network';
+  }
+  return 'read';
+}
+
 export async function executeCommand(input: ExecCommandInput): Promise<ToolResult> {
   const timeoutMs = input.timeoutMs ?? 120_000;
 
@@ -37,7 +73,7 @@ export async function executeCommand(input: ExecCommandInput): Promise<ToolResul
     child.on('close', (code) => {
       clearTimeout(timer);
       resolve({
-        tool: 'exec-command',
+        tool: input.toolName ?? 'exec-command',
         ok: code === 0 && !killedByTimeout,
         summary: killedByTimeout
           ? 'Command timed out'
diff --git a/src/tools/registry.ts b/src/tools/registry.ts
index 030617a..6cda5ea 100644
--- a/src/tools/registry.ts
+++ b/src/tools/registry.ts
@@ -1,29 +1,152 @@
 import { z } from 'zod';
-import { executeCommand } from './exec-command';
+import type { AgentsConfig } from '../config/agents-loader';
+import type { DefaultPolicyEngine } from '../policy/engine';
+import type { ApprovalDecision } from '../policy/schemas';
+import { classifyCommandSideEffect, executeCommand } from './exec-command';
 import { ToolInvocationSchema, type ToolResult } from './schemas';
 
-export type ToolHandler = (params: Record<string, unknown>) => Promise<ToolResult>;
+export type ToolHandlerContext = {
+  invocationTool: string;
+  sideEffect: 'read' | 'write' | 'destructive' | 'network';
+  mode: 'interactive' | 'automation';
+  approvalGranted: boolean;
+};
+
+export type ToolHandler = (
+  params: Record<string, unknown>,
+  context: ToolHandlerContext
+) => Promise<ToolResult>;
+
+export type ToolInvokeOptions = {
+  mode?: 'interactive' | 'automation';
+  approvalGranted?: boolean;
+};
+
+export type ToolRegistryOptions = {
+  policyEngine?: DefaultPolicyEngine;
+  defaultMode?: 'interactive' | 'automation';
+  agentsConfig?: AgentsConfig;
+  onWarning?: (message: string) => void;
+};
+
+function deniedResult(input: {
+  tool: string;
+  actionName?: string;
+  resolvedCommand: string;
+  decision: ApprovalDecision;
+  summary: string;
+}): ToolResult {
+  return {
+    tool: input.tool,
+    ok: false,
+    summary: input.summary,
+    payload: {
+      actionName: input.actionName ?? input.tool,
+      resolvedCommand: input.resolvedCommand,
+      policyOutcome: input.decision,
+      executionSummary: input.summary,
+    },
+    stdout: '',
+    stderr: input.decision.reason,
+    exitCode: 126,
+  };
+}
 
 export class ToolRegistry {
   private handlers = new Map<string, ToolHandler>();
+  private policyEngine?: DefaultPolicyEngine;
+  private defaultMode: 'interactive' | 'automation';
 
-  constructor() {
-    this.register('exec-command', async (params) => {
+  constructor(options: ToolRegistryOptions = {}) {
+    this.policyEngine = options.policyEngine;
+    this.defaultMode = options.defaultMode ?? 'interactive';
+
+    this.register('exec-command', async (params, context) => {
       const command = z.string().parse(params.command);
       const cwd = z.string().default(process.cwd()).parse(params.cwd);
       const timeoutMs = z.number().int().positive().optional().parse(params.timeoutMs);
-      return executeCommand({ command, cwd, timeoutMs });
+
+      const decision = this.evaluatePolicy({
+        command,
+        mode: context.mode,
+        sideEffect: context.sideEffect,
+      });
+      if (decision && !decision.allowed && !decision.requiresApproval) {
+        return deniedResult({
+          tool: context.invocationTool,
+          resolvedCommand: command,
+          decision,
+          summary: `Execution denied by policy: ${decision.reason}`,
+        });
+      }
+      if (decision?.requiresApproval && !context.approvalGranted) {
+        return deniedResult({
+          tool: context.invocationTool,
+          resolvedCommand: command,
+          decision,
+          summary: 'Approval required before command execution',
+        });
+      }
+
+      const result = await executeCommand({
+        command,
+        cwd,
+        timeoutMs,
+        toolName: context.invocationTool,
+      });
+      return {
+        ...result,
+        payload: {
+          ...result.payload,
+          policyOutcome:
+            decision ??
+            ({
+              allowed: true,
+              requiresApproval: false,
+              reason: 'Policy engine unavailable',
+              sideEffect: context.sideEffect,
+            } satisfies ApprovalDecision),
+          executionSummary: result.summary,
+        },
+      };
     });
+
+    for (const warning of options.agentsConfig?.warnings ?? []) {
+      options.onWarning?.(warning.message);
+    }
+
+    for (const command of options.agentsConfig?.commands ?? []) {
+      this.registerAgentsCommand(command.name, command.command, options.onWarning);
+    }
   }
 
   register(name: string, handler: ToolHandler): void {
     this.handlers.set(name, handler);
   }
 
-  async invoke(raw: unknown): Promise<ToolResult> {
+  async invoke(raw: unknown, options: ToolInvokeOptions = {}): Promise<ToolResult> {
     const invocation = ToolInvocationSchema.parse(raw);
     const handler = this.handlers.get(invocation.tool);
+    const mode = options.mode ?? this.defaultMode;
+    const approvalGranted = options.approvalGranted ?? false;
     if (!handler) {
+      if (invocation.tool.startsWith('agents:')) {
+        const commandName = invocation.tool.slice('agents:'.length);
+        return {
+          tool: invocation.tool,
+          ok: false,
+          summary: `AGENTS command not found: ${commandName}`,
+          payload: {
+            actionName: commandName,
+            resolvedCommand: null,
+            policyOutcome: null,
+            executionSummary: 'Unknown AGENTS command',
+          },
+          stdout: '',
+          stderr: `Unknown AGENTS command: ${commandName}`,
+          exitCode: 127,
+        };
+      }
       return {
         tool: invocation.tool,
         ok: false,
@@ -35,6 +158,114 @@ export class ToolRegistry {
       };
     }
 
-    return handler(invocation.params);
+    return handler(invocation.params, {
+      invocationTool: invocation.tool,
+      sideEffect: invocation.sideEffect,
+      mode,
+      approvalGranted,
+    });
+  }
+
+  private registerAgentsCommand(
+    name: string,
+    command: string,
+    onWarning?: (message: string) => void
+  ): void {
+    const namespacedTool = `agents:${name}`;
+
+    this.register(namespacedTool, async (params, context) => {
+      const cwd = z.string().default(process.cwd()).parse(params.cwd);
+      const timeoutMs = z.number().int().positive().optional().parse(params.timeoutMs);
+      const resolvedCommand = command;
+      const inferredSideEffect = classifyCommandSideEffect(resolvedCommand);
+      const decision = this.evaluatePolicy({
+        command: resolvedCommand,
+        mode: context.mode,
+        sideEffect: inferredSideEffect,
+      });
+
+      if (decision && !decision.allowed && !decision.requiresApproval) {
+        return deniedResult({
+          tool: context.invocationTool,
+          actionName: name,
+          resolvedCommand,
+          decision,
+          summary: `AGENTS command denied by policy: ${decision.reason}`,
+        });
+      }
+
+      if (decision?.requiresApproval && !context.approvalGranted) {
+        return deniedResult({
+          tool: context.invocationTool,
+          actionName: name,
+          resolvedCommand,
+          decision,
+          summary: 'Approval required before AGENTS command execution',
+        });
+      }
+
+      const result = await executeCommand({
+        command: resolvedCommand,
+        cwd,
+        timeoutMs,
+        toolName: context.invocationTool,
+      });
+
+      return {
+        ...result,
+        payload: {
+          ...result.payload,
+          actionName: name,
+          resolvedCommand,
+          policyOutcome:
+            decision ??
+            ({
+              allowed: true,
+              requiresApproval: false,
+              reason: 'Policy engine unavailable',
+              sideEffect: inferredSideEffect,
+            } satisfies ApprovalDecision),
+          executionSummary: result.summary,
+        },
+      };
+    });
+
+    if (!this.handlers.has(name)) {
+      this.register(
+        name,
+        async (params, context) =>
+          this.handlers.get(namespacedTool)?.(params, context) ??
+          Promise.resolve({
+            tool: context.invocationTool,
+            ok: false,
+            summary: `No handler registered for ${context.invocationTool}`,
+            payload: {},
+            stdout: '',
+            stderr: `No handler for ${context.invocationTool}`,
+            exitCode: 127,
+          })
+      );
+      return;
+    }
+
+    onWarning?.(
+      `Skipping unprefixed AGENTS command alias "${name}" because a tool with that name already exists. Use "${namespacedTool}".`
+    );
+  }
+
+  private evaluatePolicy(input: {
+    command: string;
+    mode: 'interactive' | 'automation';
+    sideEffect: 'read' | 'write' | 'destructive' | 'network';
+  }): ApprovalDecision | null {
+    if (!this.policyEngine) {
+      return null;
+    }
+    return this.policyEngine.evaluateCommand({
+      command: input.command,
+      cwd: process.cwd(),
+      mode: input.mode,
+      sideEffect: input.sideEffect,
+    });
   }
 }
diff --git a/tests/agents-loader.test.ts b/tests/agents-loader.test.ts
index a078ea1..16ae05d 100644
--- a/tests/agents-loader.test.ts
+++ b/tests/agents-loader.test.ts
@@ -17,5 +17,24 @@ describe('loadAgentsConfig', () => {
     expect(config.commands).toHaveLength(2);
     expect(config.hooks).toHaveLength(1);
     expect(config.commands[0].name).toBe('build');
+    expect(config.warnings).toEqual([]);
+  });
+
+  it('keeps first duplicate command and emits deterministic warning', async () => {
+    const dir = await mkdtemp(join(tmpdir(), 'dubsbot-agents-dup-'));
+    await writeFile(
+      join(dir, 'AGENTS.md'),
+      `# AGENTS\n\n## Commands\n- test: pnpm test\n- build: pnpm build\n- test: pnpm test:watch\n\n## Hooks\n- file-change: pnpm test\n`,
+      'utf8'
+    );
+
+    const config = await loadAgentsConfig(dir);
+    expect(config.commands).toHaveLength(2);
+    expect(config.commands[0]).toEqual({ name: 'test', command: 'pnpm test' });
+    expect(config.warnings).toHaveLength(1);
+    expect(config.warnings[0].type).toBe('duplicate-command');
+    expect(config.warnings[0].commandName).toBe('test');
+    expect(config.warnings[0].keptIndex).toBe(0);
+    expect(config.warnings[0].ignoredIndexes).toEqual([2]);
   });
 });
diff --git a/tests/tool-registry.test.ts b/tests/tool-registry.test.ts
new file mode 100644
index 0000000..d686e04
--- /dev/null
+++ b/tests/tool-registry.test.ts
@@ -0,0 +1,167 @@
+import { describe, expect, it } from 'vitest';
+import { createDefaultApprovalPolicy } from '../src/policy/defaults';
+import { DefaultPolicyEngine } from '../src/policy/engine';
+import { ToolRegistry } from '../src/tools/registry';
+
+describe('ToolRegistry AGENTS runtime actions', () => {
+  it('registers AGENTS commands as invokable actions', async () => {
+    const registry = new ToolRegistry({
+      agentsConfig: {
+        commands: [{ name: 'test', command: 'printf "ok"' }],
+        hooks: [],
+        warnings: [],
+      },
+    });
+
+    const prefixed = await registry.invoke({
+      tool: 'agents:test',
+      sideEffect: 'read',
+      params: {},
+    });
+    const alias = await registry.invoke({
+      tool: 'test',
+      sideEffect: 'read',
+      params: {},
+    });
+
+    expect(prefixed.ok).toBe(true);
+    expect(alias.ok).toBe(true);
+    expect(prefixed.payload.actionName).toBe('test');
+    expect(prefixed.payload.resolvedCommand).toBe('printf "ok"');
+  });
+
+  it('returns deterministic not-found response for unknown AGENTS command identifier', async () => {
+    const registry = new ToolRegistry();
+
+    const result = await registry.invoke({
+      tool: 'agents:missing-command',
+      sideEffect: 'read',
+      params: {},
+    });
+
+    expect(result.ok).toBe(false);
+    expect(result.summary).toContain('AGENTS command not found');
+    expect(result.exitCode).toBe(127);
+  });
+
+  it('requires interactive approval for mutating AGENTS commands', async () => {
+    const registry = new ToolRegistry({
+      policyEngine: new DefaultPolicyEngine(createDefaultApprovalPolicy()),
+      defaultMode: 'interactive',
+      agentsConfig: {
+        commands: [{ name: 'fix', command: 'echo hi > tmp.txt' }],
+        hooks: [],
+        warnings: [],
+      },
+    });
+
+    const result = await registry.invoke({
+      tool: 'agents:fix',
+      sideEffect: 'read',
+      params: {},
+    });
+
+    expect(result.ok).toBe(false);
+    expect(result.summary).toContain('Approval required');
+    expect(result.payload.policyOutcome).toMatchObject({
+      requiresApproval: true,
+      sideEffect: 'write',
+    });
+  });
+
+  it('allows automation write execution when command matches allowlist', async () => {
+    const registry = new ToolRegistry({
+      policyEngine: new DefaultPolicyEngine(
+        createDefaultApprovalPolicy({
+          automationWriteAllowlist: ['echo hi > tmp.txt'],
+        })
+      ),
+      defaultMode: 'automation',
+      agentsConfig: {
+        commands: [{ name: 'fix', command: 'echo hi > tmp.txt' }],
+        hooks: [],
+        warnings: [],
+      },
+    });
+
+    const result = await registry.invoke({
+      tool: 'agents:fix',
+      sideEffect: 'read',
+      params: {},
+    });
+
+    expect(result.ok).toBe(true);
+    expect(result.payload.policyOutcome).toMatchObject({
+      allowed: true,
+      requiresApproval: false,
+      sideEffect: 'write',
+    });
+  });
+
+  it('returns policy denial and structured payload for blocked commands', async () => {
+    const registry = new ToolRegistry({
+      policyEngine: new DefaultPolicyEngine(
+        createDefaultApprovalPolicy({
+          blockedCommandPatterns: ['dangerous-command'],
+        })
+      ),
+      agentsConfig: {
+        commands: [{ name: 'explode', command: 'dangerous-command --now' }],
+        hooks: [],
+        warnings: [],
+      },
+    });
+
+    const result = await registry.invoke({
+      tool: 'agents:explode',
+      sideEffect: 'read',
+      params: {},
+    });
+
+    expect(result.ok).toBe(false);
+    expect(result.payload).toMatchObject({
+      actionName: 'explode',
+      resolvedCommand: 'dangerous-command --now',
+      executionSummary: expect.stringContaining('denied'),
+    });
+    expect(result.payload.policyOutcome).toMatchObject({
+      allowed: false,
+      requiresApproval: false,
+      reason: expect.stringContaining('blocked pattern'),
+    });
+  });
+
+  it('captures structured execution summary on success', async () => {
+    const registry = new ToolRegistry({
+      policyEngine: new DefaultPolicyEngine(createDefaultApprovalPolicy()),
+      agentsConfig: {
+        commands: [{ name: 'hello', command: 'printf "hello"' }],
+        hooks: [],
+        warnings: [],
+      },
+    });
+
+    const result = await registry.invoke(
+      {
+        tool: 'agents:hello',
+        sideEffect: 'read',
+        params: {},
+      },
+      {
+        mode: 'interactive',
+      }
+    );
+
+    expect(result.ok).toBe(true);
+    expect(result.payload).toMatchObject({
+      actionName: 'hello',
+      resolvedCommand: 'printf "hello"',
+      executionSummary: 'Command succeeded',
+    });
+    expect(result.payload.policyOutcome).toMatchObject({
+      allowed: true,
+      requiresApproval: false,
+      sideEffect: 'read',
+    });
+  });
+});

From 0af4f3d58f5d6f71b9c71dd904296b8909aa7563 Mon Sep 17 00:00:00 2001
From: Daniel Wise <io.dwise@gmail.com>
Date: Tue, 3 Mar 2026 06:57:00 -0800
Subject: [PATCH 2/2] fix(runtime): harden policy evaluation inputs for command
 tools

- infer and conservatively rank side effects for exec-command policy checks

- pass invocation cwd into policy evaluation for exec and AGENTS commands

- make write-path test hermetic and complete runtime actions spec purpose
---
 openspec/specs/agents-runtime-actions/spec.md |  3 +-
 src/tools/registry.ts                         | 28 +++++-
 tests/tool-registry.test.ts                   | 85 ++++++++++++++++---
 3 files changed, 99 insertions(+), 17 deletions(-)

diff --git a/openspec/specs/agents-runtime-actions/spec.md b/openspec/specs/agents-runtime-actions/spec.md
index f5732ed..050677a 100644
--- a/openspec/specs/agents-runtime-actions/spec.md
+++ b/openspec/specs/agents-runtime-actions/spec.md
@@ -1,7 +1,7 @@
 # agents-runtime-actions Specification
 
 ## Purpose
-TBD - created by archiving change agents-md-runtime-actions. Update Purpose after archive.
+Define how runtime discovers AGENTS commands from `AGENTS.md`, registers them as invokable actions, evaluates policy and approvals before execution, and emits structured deterministic execution outcomes.
 ## Requirements
 ### Requirement: Runtime SHALL Register AGENTS Commands as Invokable Actions
 The runtime SHALL load command entries from `AGENTS.md` and register each valid command as an invokable runtime action before agent execution begins.
@@ -46,4 +46,3 @@ The runtime SHALL return a deterministic error when an AGENTS command action ref
 #### Scenario: Duplicate command names in AGENTS definition
 - **WHEN** `AGENTS.md` defines duplicate command names
 - **THEN** runtime applies documented resolution behavior consistently and emits a structured warning
-
diff --git a/src/tools/registry.ts b/src/tools/registry.ts
index 6cda5ea..dc82af5 100644
--- a/src/tools/registry.ts
+++ b/src/tools/registry.ts
@@ -29,6 +29,20 @@ export type ToolRegistryOptions = {
   onWarning?: (message: string) => void;
 };
 
+const SIDE_EFFECT_RANK: Record<ToolHandlerContext['sideEffect'], number> = {
+  read: 0,
+  write: 1,
+  network: 2,
+  destructive: 3,
+};
+
+function resolveConservativeSideEffect(
+  declared: ToolHandlerContext['sideEffect'],
+  inferred: ToolHandlerContext['sideEffect']
+): ToolHandlerContext['sideEffect'] {
+  return SIDE_EFFECT_RANK[declared] >= SIDE_EFFECT_RANK[inferred] ? declared : inferred;
+}
+
 function deniedResult(input: {
   tool: string;
   actionName?: string;
@@ -65,11 +79,17 @@ export class ToolRegistry {
       const command = z.string().parse(params.command);
       const cwd = z.string().default(process.cwd()).parse(params.cwd);
       const timeoutMs = z.number().int().positive().optional().parse(params.timeoutMs);
+      const inferredSideEffect = classifyCommandSideEffect(command);
+      const effectiveSideEffect = resolveConservativeSideEffect(
+        context.sideEffect,
+        inferredSideEffect
+      );
 
       const decision = this.evaluatePolicy({
         command,
+        cwd,
         mode: context.mode,
-        sideEffect: context.sideEffect,
+        sideEffect: effectiveSideEffect,
       });
       if (decision && !decision.allowed && !decision.requiresApproval) {
         return deniedResult({
@@ -104,7 +124,7 @@ export class ToolRegistry {
               allowed: true,
               requiresApproval: false,
               reason: 'Policy engine unavailable',
-              sideEffect: context.sideEffect,
+              sideEffect: effectiveSideEffect,
             } satisfies ApprovalDecision),
           executionSummary: result.summary,
         },
@@ -180,6 +200,7 @@ export class ToolRegistry {
       const inferredSideEffect = classifyCommandSideEffect(resolvedCommand);
       const decision = this.evaluatePolicy({
         command: resolvedCommand,
+        cwd,
         mode: context.mode,
         sideEffect: inferredSideEffect,
       });
@@ -255,6 +276,7 @@ export class ToolRegistry {
 
   private evaluatePolicy(input: {
     command: string;
+    cwd: string;
     mode: 'interactive' | 'automation';
     sideEffect: 'read' | 'write' | 'destructive' | 'network';
   }): ApprovalDecision | null {
@@ -263,7 +285,7 @@ export class ToolRegistry {
     }
     return this.policyEngine.evaluateCommand({
       command: input.command,
-      cwd: process.cwd(),
+      cwd: input.cwd,
       mode: input.mode,
       sideEffect: input.sideEffect,
     });
diff --git a/tests/tool-registry.test.ts b/tests/tool-registry.test.ts
index d686e04..c94ce80 100644
--- a/tests/tool-registry.test.ts
+++ b/tests/tool-registry.test.ts
@@ -1,4 +1,7 @@
-import { describe, expect, it } from 'vitest';
+import { mkdtemp, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { describe, expect, it, vi } from 'vitest';
 import { createDefaultApprovalPolicy } from '../src/policy/defaults';
 import { DefaultPolicyEngine } from '../src/policy/engine';
 import { ToolRegistry } from '../src/tools/registry';
@@ -70,30 +73,88 @@ describe('ToolRegistry AGENTS runtime actions', () => {
   });
 
   it('allows automation write execution when command matches allowlist', async () => {
+    const cwd = await mkdtemp(join(tmpdir(), 'dubsbot-tool-registry-'));
+    try {
+      const registry = new ToolRegistry({
+        policyEngine: new DefaultPolicyEngine(
+          createDefaultApprovalPolicy({
+            automationWriteAllowlist: ['echo hi > ./tmp.txt'],
+          })
+        ),
+        defaultMode: 'automation',
+        agentsConfig: {
+          commands: [{ name: 'fix', command: 'echo hi > ./tmp.txt' }],
+          hooks: [],
+          warnings: [],
+        },
+      });
+
+      const result = await registry.invoke({
+        tool: 'agents:fix',
+        sideEffect: 'read',
+        params: { cwd },
+      });
+
+      expect(result.ok).toBe(true);
+      expect(result.payload.policyOutcome).toMatchObject({
+        allowed: true,
+        requiresApproval: false,
+        sideEffect: 'write',
+      });
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  });
+
+  it('passes invocation cwd into policy evaluation', async () => {
+    const cwd = '/tmp/custom-workdir';
+    const evaluateCommand = vi.fn().mockReturnValue({
+      allowed: false,
+      requiresApproval: true,
+      reason: 'Approval required for side effect: write',
+      sideEffect: 'write',
+    });
     const registry = new ToolRegistry({
-      policyEngine: new DefaultPolicyEngine(
-        createDefaultApprovalPolicy({
-          automationWriteAllowlist: ['echo hi > tmp.txt'],
-        })
-      ),
+      policyEngine: { evaluateCommand } as unknown as DefaultPolicyEngine,
       defaultMode: 'automation',
       agentsConfig: {
-        commands: [{ name: 'fix', command: 'echo hi > tmp.txt' }],
+        commands: [{ name: 'fix', command: 'echo hi > ./tmp.txt' }],
         hooks: [],
         warnings: [],
       },
     });
 
-    const result = await registry.invoke({
+    await registry.invoke({
       tool: 'agents:fix',
       sideEffect: 'read',
-      params: {},
+      params: { cwd },
     });
 
-    expect(result.ok).toBe(true);
+    expect(evaluateCommand).toHaveBeenCalledWith(
+      expect.objectContaining({
+        cwd,
+      })
+    );
+  });
+
+  it('uses conservative side effect for exec-command policy checks', async () => {
+    const registry = new ToolRegistry({
+      policyEngine: new DefaultPolicyEngine(createDefaultApprovalPolicy()),
+      defaultMode: 'interactive',
+    });
+
+    const result = await registry.invoke({
+      tool: 'exec-command',
+      sideEffect: 'read',
+      params: {
+        command: 'echo hi > ./tmp.txt',
+      },
+    });
+
+    expect(result.ok).toBe(false);
     expect(result.payload.policyOutcome).toMatchObject({
-      allowed: true,
-      requiresApproval: false,
+      allowed: false,
+      requiresApproval: true,
       sideEffect: 'write',
     });
   });