From b59ad6355a22d8510436654b8aa32ca9d2f3c510 Mon Sep 17 00:00:00 2001
From: Jared Pleva <jpleva91@gmail.com>
Date: Sat, 28 Mar 2026 06:28:18 +0000
Subject: [PATCH] =?UTF-8?q?docs:=20post-v0.6=20accuracy=20pass=20=E2=80=94?=
 =?UTF-8?q?=20Crush=E2=86=92Goose,=20arch=20update,=20bug=20backlog,=20god?=
 =?UTF-8?q?oc?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- README.md: Replace all stale Crush references with Goose (Block) in the
  stack table, architecture diagram, CLI commands table, and core insight.
  Crush was retired in v0.5.x; Goose is the current local execution driver.

- docs/architecture.md: Update 8-layer stack to reflect v0.6.x reality —
  DeepAgents layer replaced by Dagu (orchestration), Crush→Goose (execution),
  OpenShell sandbox note updated. Engine Architecture section now lists Goose
  first, with OpenCode/DeepAgents as alternatives. Governance flow corrected
  to show correction feedback loop on denial.

- docs/roadmap.md: Add Bug Backlog section cataloguing 7 open issues (#60-#69)
  covering test coverage gap, governance bypass via malformed JSON, fragile
  sed parsing in govern-shell.sh, silent log write failure, and dead code.

- internal/governance/engine.go: Add godoc to Evaluate and GetTimeout.
- internal/logger/logger.go: Add godoc to all 8 exported functions.
- internal/tools/tools.go: Add godoc to Execute and FormatForPrompt.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 README.md                     | 14 ++++++--------
 docs/architecture.md          | 25 +++++++++++++------------
 docs/roadmap.md               | 16 ++++++++++++++++
 internal/governance/engine.go |  3 +++
 internal/logger/logger.go     |  9 +++++++++
 internal/tools/tools.go       |  3 +++
 6 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 996f7f2..c3c51ab 100644
--- a/README.md
+++ b/README.md
@@ -75,12 +75,12 @@ ShellForge is a **governed agent runtime** — not an agent framework, not an or
 It sits between any agent driver and the real world. The agent decides what it wants to do. ShellForge decides whether it's allowed.
 
 ```
-Agent Driver (Crush, Claude Code, Copilot CLI)
+Agent Driver (Goose, Claude Code, Copilot CLI)
   → ShellForge Governance (allow / deny / correct)
     → Your Environment (files, shell, git)
 ```
 
-**The core insight:** ShellForge's value is governance, not the agent loop. [Crush](https://github.com/charmbracelet/crush) handles agent execution. [Dagu](https://github.com/dagu-org/dagu) handles workflow orchestration. ShellForge wraps them all with [AgentGuard](https://github.com/AgentGuardHQ/agentguard) policy enforcement on every tool call.
+**The core insight:** ShellForge's value is governance, not the agent loop. [Goose](https://block.github.io/goose) handles local agent execution. [Dagu](https://github.com/dagu-org/dagu) handles workflow orchestration. ShellForge wraps them all with [AgentGuard](https://github.com/AgentGuardHQ/agentguard) policy enforcement on every tool call.
 
 ---
 
@@ -90,7 +90,7 @@ Agent Driver (Crush, Claude Code, Copilot CLI)
 |-------|---------|--------------|
 | **Infer** | [Ollama](https://ollama.com) | Local LLM inference (Metal GPU on Mac) |
 | **Optimize** | [RTK](https://github.com/rtk-ai/rtk) | Token compression — 70-90% reduction on shell output |
-| **Execute** | [Crush](https://github.com/charmbracelet/crush) | Go-native AI coding agent (TUI + headless) |
+| **Execute** | [Goose](https://block.github.io/goose) | AI coding agent with native Ollama support (headless) |
 | **Orchestrate** | [Dagu](https://github.com/dagu-org/dagu) | YAML DAG workflows with scheduling and web UI |
 | **Govern** | [AgentGuard](https://github.com/AgentGuardHQ/agentguard) | Policy enforcement on every action — allow/deny/correct |
 | **Sandbox** | [OpenShell](https://github.com/NVIDIA/OpenShell) | Kernel-level isolation (Docker on macOS) |
@@ -100,7 +100,6 @@ Agent Driver (Crush, Claude Code, Copilot CLI)
 shellforge status
 # Ollama        running (qwen3:30b loaded)
 # RTK           v0.4.2
-# Crush         v1.0.0
 # AgentGuard    enforce mode (5 rules)
 # Dagu          connected (web UI at :8080)
 # OpenShell     Docker sandbox active
@@ -113,7 +112,7 @@ shellforge status
 
 | Command | Description |
 |---------|-------------|
-| `shellforge run <driver> "prompt"` | Run a governed agent (claude, copilot, codex, gemini, crush) |
+| `shellforge run <driver> "prompt"` | Run a governed agent (goose, claude, copilot, codex, gemini) |
 | `shellforge setup` | Install Ollama, create governance config, verify stack |
 | `shellforge agent "prompt"` | Run a governed agent — every tool call checked |
 | `shellforge qa [dir]` | QA analysis — find test gaps and issues |
@@ -134,7 +133,6 @@ shellforge run claude "review this code"
 shellforge run codex "generate tests"
 shellforge run copilot "update docs"
 shellforge run gemini "security audit"
-shellforge run crush "analyze test gaps"
 ```
 
 Orchestrate multiple drivers in a single Dagu DAG:
@@ -156,8 +154,8 @@ See `dags/multi-driver-swarm.yaml` and `dags/workspace-swarm.yaml` for examples.
 └────────────────────┬──────────────────────────────┘
                      │ task
 ┌────────────────────▼──────────────────────────────┐
-│  Crush (Execution Engine)                          │
-│  Agent loop · Tool calling · TUI + headless        │
+│  Goose (Execution Engine)                          │
+│  Agent loop · Tool calling · Ollama-native         │
 │  Uses Ollama for inference                         │
 └────────────────────┬──────────────────────────────┘
                      │ tool call
diff --git a/docs/architecture.md b/docs/architecture.md
index 8fadfdc..9a6394d 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -8,19 +8,19 @@ ShellForge is a single Go binary (~7.5MB) that provides governed local AI agent
 
 ```
 ┌─────────────────────────────────────────────┐
-│  Layer 8: OpenShell (Kernel Sandbox)        │  NVIDIA Landlock/Seccomp
+│  Layer 8: OpenShell (Kernel Sandbox)        │  Docker/Colima isolation
 ├─────────────────────────────────────────────┤
 │  Layer 7: DefenseClaw (Supply Chain)        │  Cisco AI BoM Scanner
 ├─────────────────────────────────────────────┤
-│  Layer 6: DeepAgents (Multi-Agent)          │  LangChain orchestration
+│  Layer 6: Dagu (Orchestration)              │  YAML DAG workflows + web UI
 ├─────────────────────────────────────────────┤
-│  Layer 5: OpenCode (AI Coding)              │  Go CLI, native tools
+│  Layer 5: Goose / OpenCode (Execution)      │  Primary local agent driver
 ├─────────────────────────────────────────────┤
 │  Layer 4: AgentGuard (Governance Kernel)    │  Policy enforcement
 ├─────────────────────────────────────────────┤
-│  Layer 3: TurboQuant (Quantization)         │  KV cache optimization
+│  Layer 3: TurboQuant (Quantization)         │  KV cache optimization (optional)
 ├─────────────────────────────────────────────┤
-│  Layer 2: RTK (Token Compression)           │  Auto-compress I/O
+│  Layer 2: RTK (Token Compression)           │  Auto-compress I/O (optional)
 ├─────────────────────────────────────────────┤
 │  Layer 1: Ollama (Local LLM)                │  Metal GPU on Mac
 └─────────────────────────────────────────────┘
@@ -47,26 +47,27 @@ internal/
 
 ShellForge uses a pluggable engine system:
 
-1. **OpenCode** (preferred) — subprocess, `--non-interactive` mode, governance-wrapped
-2. **DeepAgents** — subprocess, Node.js/Python SDK, governance-wrapped
-3. **Native** (fallback) — built-in multi-turn loop with Ollama + tool calling
+1. **Goose (Block)** (preferred local driver) — subprocess, native Ollama support, SHELL wrapped via `govern-shell.sh`
+2. **OpenCode** (alternative) — subprocess, `--non-interactive` mode, governance-wrapped
+3. **DeepAgents** (alternative) — subprocess, Node.js/Python SDK, governance-wrapped
+4. **Native** (fallback) — built-in multi-turn loop with Ollama + tool calling
 
-The engine selection is automatic based on what's installed.
+The engine selection is automatic based on what's installed. Use `shellforge run goose` for local models, or `shellforge agent` for the native loop.
 
 ## Governance Flow
 
 ```
-User Request → Engine (OpenCode/DeepAgents/Native)
+User Request → Engine (Goose/OpenCode/DeepAgents/Native)
   → Tool Call → Governance Check (agentguard.yaml)
     → ALLOW → Execute Tool → Return Result
-    → DENY  → Log Violation → Block Execution
+    → DENY  → Log Violation → Correction Feedback → Retry
 ```
 
 ## Data Flow
 
 1. User invokes `./shellforge qa` (or agent, report, scan)
 2. CLI loads `agentguard.yaml` governance policy
-3. Detects available engine (OpenCode > DeepAgents > Native)
+3. Detects available engine (Goose > OpenCode > DeepAgents > Native)
 4. Engine sends prompt to Ollama (via RTK for token compression)
 5. LLM responds with tool calls
 6. Each tool call passes through governance check
diff --git a/docs/roadmap.md b/docs/roadmap.md
index 8c48323..ab10459 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -97,6 +97,22 @@ Foundation types exist (`internal/action/`, `internal/orchestrator/`, `internal/
 
 ---
 
+## Bug Backlog (Open Issues)
+
+Bugs identified during v0.6.x development. Fix before v1.0.
+
+| Issue | Package | Severity | Description |
+|-------|---------|----------|-------------|
+| [#69](https://github.com/AgentGuardHQ/shellforge/issues/69) | `agentguard.yaml` | High | Governance gap: plain `rm` and `rm -r` bypass `no-destructive-rm` policy |
+| [#67](https://github.com/AgentGuardHQ/shellforge/issues/67) | `scripts/govern-shell.sh` | Medium | Fragile `sed`-based JSON parsing — denial reason extraction can fail or corrupt |
+| [#65](https://github.com/AgentGuardHQ/shellforge/issues/65) | `internal/scheduler` | Medium | `os.WriteFile` error silently ignored — audit log loss |
+| [#63](https://github.com/AgentGuardHQ/shellforge/issues/63) | `internal/normalizer` | Medium | `classifyShellRisk` prefix match too broad — `catalog_tool` classified as read-only |
+| [#62](https://github.com/AgentGuardHQ/shellforge/issues/62) | `cmd/shellforge` | Medium | `cmdEvaluate` ignores JSON unmarshal error — malformed input defaults to allow |
+| [#61](https://github.com/AgentGuardHQ/shellforge/issues/61) | `internal/intent` | Low | Dead code in `flattenParams` — first assignment immediately overwritten |
+| [#60](https://github.com/AgentGuardHQ/shellforge/issues/60) | all packages | High | Zero test coverage — critical for a governance runtime |
+
+---
+
 ## Stack (as of v0.6.1)
 
 | Component | Role | Status |
diff --git a/internal/governance/engine.go b/internal/governance/engine.go
index 505c78e..be6f943 100644
--- a/internal/governance/engine.go
+++ b/internal/governance/engine.go
@@ -69,6 +69,8 @@ Policies: cfg.Policies,
 }, nil
 }
 
+// Evaluate checks a tool call against all policies and returns an allow/deny Decision.
+// In enforce mode, deny policies block execution. In monitor mode, they log only.
 func (e *Engine) Evaluate(tool string, params map[string]string) Decision {
 for _, p := range e.Policies {
 if e.matches(p, tool, params) {
@@ -98,6 +100,7 @@ Mode:       e.Mode,
 }
 }
 
+// GetTimeout returns the first policy-level timeout in seconds, or 300 if none is set.
 func (e *Engine) GetTimeout() int {
 for _, p := range e.Policies {
 if p.Timeout > 0 {
diff --git a/internal/logger/logger.go b/internal/logger/logger.go
index 54c67c8..062f631 100644
--- a/internal/logger/logger.go
+++ b/internal/logger/logger.go
@@ -45,6 +45,8 @@ entries []Entry
 logFile *os.File
 )
 
+// Init opens a JSONL log file under outputDir named "<agent>-<timestamp>.jsonl".
+// Must be called before any log functions; call Close when done.
 func Init(outputDir, agent string) error {
 if err := os.MkdirAll(outputDir, 0o755); err != nil {
 return err
@@ -59,6 +61,7 @@ logFile = f
 return nil
 }
 
+// Close flushes and closes the current log file.
 func Close() {
 if logFile != nil {
 logFile.Close()
@@ -74,6 +77,7 @@ logFile.WriteString("\n")
 }
 }
 
+// Governance logs a governance evaluation result to stdout and the JSONL log.
 func Governance(agent, tool string, params map[string]string, allowed bool, policyName, reason string) {
 status := "allow"
 if !allowed {
@@ -99,6 +103,7 @@ Decision:  &DecisionLog{Allowed: allowed, PolicyName: policyName, Reason: reason
 })
 }
 
+// ToolResult logs the outcome of a tool execution to stdout and the JSONL log.
 func ToolResult(agent, tool string, success bool, output string) {
 icon := "✓"
 if !success {
@@ -123,6 +128,7 @@ Message:   truncate(output, 200),
 })
 }
 
+// Agent logs a free-form info message from the named agent.
 func Agent(agent, message string) {
 fmt.Printf("[%s] %s\n", agent, message)
 record(Entry{
@@ -133,6 +139,7 @@ Message:   message,
 })
 }
 
+// ModelCall logs token usage and latency for an Ollama inference call.
 func ModelCall(agent string, promptTokens, responseTokens int, durationMs int64) {
 record(Entry{
 Timestamp: time.Now().UTC().Format(time.RFC3339),
@@ -143,6 +150,7 @@ Duration:  durationMs,
 })
 }
 
+// Error logs an error message to stderr and the JSONL log.
 func Error(agent, message string) {
 fmt.Fprintf(os.Stderr, "[%s] ERROR: %s\n", agent, message)
 record(Entry{
@@ -153,6 +161,7 @@ Message:   message,
 })
 }
 
+// GetEntries returns all log entries recorded in this session (in-memory only).
 func GetEntries() []Entry { return entries }
 
 func summarize(params map[string]string) string {
diff --git a/internal/tools/tools.go b/internal/tools/tools.go
index 598866d..56a0654 100644
--- a/internal/tools/tools.go
+++ b/internal/tools/tools.go
@@ -80,6 +80,8 @@ func ExecuteDirect(tool string, params map[string]string, timeoutSec int) Result
 }
 
 // Execute runs a tool call through governance, then executes if allowed.
+// Execute evaluates the tool call against governance policy and, if allowed, runs it.
+// This is the fully governed path; use ExecuteDirect when governance is already checked.
 func Execute(engine *governance.Engine, agent, tool string, params map[string]string) Result {
 decision := engine.Evaluate(tool, params)
 logger.Governance(agent, tool, params, decision.Allowed, decision.PolicyName, decision.Reason)
@@ -224,6 +226,7 @@ return Result{Success: true, Output: output}
 }
 
 // FormatForPrompt returns tool descriptions for the system prompt.
+// FormatForPrompt returns Markdown-formatted tool definitions for inclusion in a system prompt.
 func FormatForPrompt() string {
 var sb strings.Builder
 for _, t := range Definitions {