From be9cb197b639a9f96b5d4415ff7493503877de4c Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 18:55:44 -0500
Subject: [PATCH 01/10] feat: add nightly scanner that opens issues for code
 quality problems

Adds a read-only agent that scans the codebase daily and creates
GitHub issues for security vulnerabilities, bugs, and documentation gaps.

- Runs daily at 6am UTC (or manual trigger)
- Creates max 2 issues per run to avoid flooding
- Deduplicates against existing open issues with 'nightly-scan' label
- Dry-run mode for testing
- Strong anti-hallucination rules (read-only, must verify files exist)
---
 .github/agents/nightly-scanner.yaml |  94 ++++++++++++++++
 .github/workflows/nightly-scan.yml  | 159 ++++++++++++++++++++++++++++
 2 files changed, 253 insertions(+)
 create mode 100644 .github/agents/nightly-scanner.yaml
 create mode 100644 .github/workflows/nightly-scan.yml

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
new file mode 100644
index 000000000..7cfbcad96
--- /dev/null
+++ b/.github/agents/nightly-scanner.yaml
@@ -0,0 +1,94 @@
+version: "2"
+
+models:
+  claude-sonnet:
+    provider: anthropic
+    model: claude-sonnet-4-5
+    max_tokens: 8192
+    temperature: 0.2
+
+agents:
+  root:
+    model: claude-sonnet
+    description: Scans codebase nightly and reports issues in structured format
+    instruction: |
+      You are a senior engineer performing a nightly codebase audit. Your job is to find
+      real issues and report them clearly—NOT to fix them.
+
+      ## Your task
+
+      Scan this codebase and identify 1-2 of the most important issues. Quality over quantity.
+
+      ## What to look for (in priority order)
+
+      1. **Security vulnerabilities**
+         - SQL/command injection, path traversal
+         - Hardcoded secrets or credentials
+         - Insecure TLS, weak crypto
+         - Missing input validation
+
+      2. **Bugs that cause runtime errors**
+         - Nil pointer dereferences
+         - Ignored errors from critical operations
+         - Resource leaks (unclosed files, connections)
+         - Race conditions, deadlocks
+
+      3. **Documentation gaps** (only if no security/bug issues found)
+         - Public APIs without documentation
+         - Complex functions without explanations
+         - Missing README sections
+
+      ## What to IGNORE
+
+      - Style, formatting, linting issues
+      - Minor code smells that don't cause bugs
+      - Test coverage (unless tests are actually broken)
+      - Naming conventions
+
+      ## ⛔ CRITICAL GROUNDING RULES ⛔
+
+      - **ONLY report issues in files you have actually read with read_file**
+      - **EVERY file path must be verified to exist**
+      - **EVERY code snippet must be quoted EXACTLY from the file**
+      - **EVERY line number must be accurate**
+      - **If you find nothing significant, say "No issues found" - do NOT invent issues**
+
+      ## Your workflow
+
+      1. Use `directory_tree` to understand the codebase structure
+      2. Identify key source files (focus on `cmd/`, `internal/`, `pkg/`)
+      3. Use `read_file` to examine code
+      4. For each potential issue, verify by reading surrounding context
+      5. Select the 1-2 most important, verified issues
+
+      ## Output format
+
+      Output ONLY a JSON array (no markdown, no explanation). Each issue should have:
+
+      ```json
+      [
+        {
+          "title": "Brief issue title (50 chars max)",
+          "category": "security" | "bug" | "documentation",
+          "severity": "critical" | "high" | "medium",
+          "file": "path/to/file.go",
+          "line": 123,
+          "code": "exact code snippet from file",
+          "problem": "Clear explanation of why this is an issue",
+          "suggestion": "How to fix it"
+        }
+      ]
+      ```
+
+      If no issues found, output:
+      ```json
+      []
+      ```
+
+    toolsets:
+      - type: filesystem
+        tools:
+          - read_file
+          - read_multiple_files
+          - list_directory
+          - directory_tree
diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml
new file mode 100644
index 000000000..8f4e403f0
--- /dev/null
+++ b/.github/workflows/nightly-scan.yml
@@ -0,0 +1,159 @@
+name: Nightly Codebase Scan
+
+on:
+  schedule:
+    # Run every day at 6am UTC
+    - cron: '0 6 * * *'
+  workflow_dispatch:
+    inputs:
+      dry-run:
+        description: 'Log issues only, do not create them'
+        type: boolean
+        default: false
+
+permissions:
+  contents: read
+  issues: write
+
+concurrency:
+  group: nightly-scan
+  cancel-in-progress: false
+
+jobs:
+  scan:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
+        with:
+          fetch-depth: 1
+
+      - name: Run nightly scan
+        id: scan
+        uses: docker/cagent-action@latest
+        with:
+          agent: ${{ github.workspace }}/.github/agents/nightly-scanner.yaml
+          anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
+          timeout: 600
+
+      - name: Parse and create issues
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
+        env:
+          DRY_RUN: ${{ inputs.dry-run || false }}
+        with:
+          script: |
+            const fs = require('fs');
+            const outputFile = '${{ steps.scan.outputs.output-file }}';
+            const dryRun = process.env.DRY_RUN === 'true';
+
+            if (!fs.existsSync(outputFile)) {
+              console.log('No output file found');
+              return;
+            }
+
+            const rawOutput = fs.readFileSync(outputFile, 'utf8');
+            console.log('Raw agent output:', rawOutput);
+
+            // Extract JSON from output (agent might include extra text)
+            let issues = [];
+            try {
+              // Try to find JSON array in the output
+              const jsonMatch = rawOutput.match(/\[[\s\S]*\]/);
+              if (jsonMatch) {
+                issues = JSON.parse(jsonMatch[0]);
+              }
+            } catch (e) {
+              console.log('Failed to parse JSON:', e.message);
+              console.log('Agent output was not valid JSON - skipping issue creation');
+              return;
+            }
+
+            if (!Array.isArray(issues) || issues.length === 0) {
+              console.log('✅ No issues found by scanner');
+              return;
+            }
+
+            console.log(`Found ${issues.length} issue(s)`);
+
+            // Get existing open issues to avoid duplicates
+            const { data: existingIssues } = await github.rest.issues.listForRepo({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              state: 'open',
+              labels: 'automated',
+              per_page: 100
+            });
+
+            const existingTitles = existingIssues.map(i => i.title.toLowerCase());
+
+            // Limit to 2 issues per run
+            const issuesToCreate = issues.slice(0, 2);
+            let created = 0;
+
+            for (const issue of issuesToCreate) {
+              // Skip if similar issue already exists
+              const titleLower = issue.title.toLowerCase();
+              if (existingTitles.some(t => t.includes(titleLower) || titleLower.includes(t))) {
+                console.log(`⏭️ Skipping duplicate: ${issue.title}`);
+                continue;
+              }
+
+              const severityEmoji = {
+                critical: '🔴',
+                high: '🟠',
+                medium: '🟡'
+              }[issue.severity] || '⚪';
+
+              const categoryLabel = {
+                security: 'kind/bug',       // security issues are bugs
+                bug: 'kind/bug',
+                documentation: 'kind/documentation'
+              }[issue.category] || 'kind/bug';
+
+              const body = `## ${severityEmoji} ${issue.severity.toUpperCase()} - ${issue.category}
+
+**File:** \`${issue.file}\`${issue.line ? ` (line ${issue.line})` : ''}
+
+### Code
+
+\`\`\`go
+${issue.code}
+\`\`\`
+
+### Problem
+
+${issue.problem}
+
+### Suggested Fix
+
+${issue.suggestion}
+
+---
+
+*Found by [nightly codebase scan](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})*
+`;
+
+              if (dryRun) {
+                console.log(`\n📋 Would create issue:\n  Title: ${issue.title}\n  Labels: automated, ${categoryLabel}`);
+                console.log(`  Body preview:\n${body.substring(0, 200)}...`);
+              } else {
+                try {
+                  const { data: newIssue } = await github.rest.issues.create({
+                    owner: context.repo.owner,
+                    repo: context.repo.repo,
+                    title: `[${issue.category}] ${issue.title}`,
+                    body: body,
+                    labels: ['automated', categoryLabel]
+                  });
+                  console.log(`✅ Created issue #${newIssue.number}: ${newIssue.html_url}`);
+                  created++;
+                } catch (e) {
+                  console.log(`❌ Failed to create issue: ${e.message}`);
+                }
+              }
+            }
+
+            if (!dryRun) {
+              console.log(`\n📊 Summary: Created ${created} issue(s)`);
+            }

From d8479b8adf5693f2be9ca0cdeb7594db0e0a192f Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 19:03:58 -0500
Subject: [PATCH 02/10] feat: add multi-agent architecture with persistent
 memory

- Root agent (claude-sonnet) orchestrates scan across sub-agents
- Security sub-agent (claude-opus) for vulnerability detection
- Bugs sub-agent (claude-sonnet) for logic errors and resource leaks
- Documentation sub-agent (claude-haiku) for doc gap detection
- Add GitHub Actions cache for persistent scanner memory
- Memory stores skip patterns, context, and feedback across runs
- Each sub-agent has strict grounding rules to prevent hallucinations
---
 .github/agents/nightly-scanner.yaml | 258 +++++++++++++++++++++++-----
 .github/workflows/nightly-scan.yml  |  21 +++
 2 files changed, 233 insertions(+), 46 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index 7cfbcad96..4def3dd01 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -1,69 +1,68 @@
 version: "2"
 
 models:
+  # Orchestrator - needs good reasoning for delegation
   claude-sonnet:
     provider: anthropic
     model: claude-sonnet-4-5
+    max_tokens: 4096
+    temperature: 0.1
+
+  # Security analysis - use stronger model for catching subtle vulnerabilities
+  claude-opus:
+    provider: anthropic
+    model: claude-opus-4
     max_tokens: 8192
+    temperature: 0.1
+
+  # Documentation - faster model is sufficient
+  claude-haiku:
+    provider: anthropic
+    model: claude-haiku-3-5
+    max_tokens: 4096
     temperature: 0.2
 
 agents:
   root:
     model: claude-sonnet
-    description: Scans codebase nightly and reports issues in structured format
+    description: Orchestrates nightly codebase scan across specialized sub-agents
+    sub_agents:
+      - security
+      - bugs
+      - documentation
     instruction: |
-      You are a senior engineer performing a nightly codebase audit. Your job is to find
-      real issues and report them clearly—NOT to fix them.
-
-      ## Your task
-
-      Scan this codebase and identify 1-2 of the most important issues. Quality over quantity.
+      You are the orchestrator for a nightly codebase scan. Your job is to delegate
+      analysis to specialized sub-agents and compile their findings.
 
-      ## What to look for (in priority order)
+      ## First: Load Memory
 
-      1. **Security vulnerabilities**
-         - SQL/command injection, path traversal
-         - Hardcoded secrets or credentials
-         - Insecure TLS, weak crypto
-         - Missing input validation
+      Read `.github/scanner-memory.json` if it exists. This contains:
+      - Files/patterns to skip (known false positives)
+      - Codebase-specific context learned from past scans
+      - Feedback from humans on previous issues
 
-      2. **Bugs that cause runtime errors**
-         - Nil pointer dereferences
-         - Ignored errors from critical operations
-         - Resource leaks (unclosed files, connections)
-         - Race conditions, deadlocks
-
-      3. **Documentation gaps** (only if no security/bug issues found)
-         - Public APIs without documentation
-         - Complex functions without explanations
-         - Missing README sections
-
-      ## What to IGNORE
-
-      - Style, formatting, linting issues
-      - Minor code smells that don't cause bugs
-      - Test coverage (unless tests are actually broken)
-      - Naming conventions
-
-      ## ⛔ CRITICAL GROUNDING RULES ⛔
+      ## Your workflow
 
-      - **ONLY report issues in files you have actually read with read_file**
-      - **EVERY file path must be verified to exist**
-      - **EVERY code snippet must be quoted EXACTLY from the file**
-      - **EVERY line number must be accurate**
-      - **If you find nothing significant, say "No issues found" - do NOT invent issues**
+      1. Read memory file to understand what to skip
+      2. Use `directory_tree` to understand the codebase structure
+      3. Delegate to sub-agents in order:
+         - `security` - for security vulnerabilities (HIGHEST PRIORITY)
+         - `bugs` - for logic errors, resource leaks, race conditions
+         - `documentation` - for missing docs (ONLY if no security/bug issues)
+      4. Collect findings from each sub-agent
+      5. Filter out any issues that match patterns in memory's "skip" list
+      6. Select the top 1-2 most important issues
+      7. Update memory with any new learnings
 
-      ## Your workflow
+      ## Memory updates
 
-      1. Use `directory_tree` to understand the codebase structure
-      2. Identify key source files (focus on `cmd/`, `internal/`, `pkg/`)
-      3. Use `read_file` to examine code
-      4. For each potential issue, verify by reading surrounding context
-      5. Select the 1-2 most important, verified issues
+      If you discover patterns that should be remembered (e.g., "this codebase uses
+      custom error handling that looks like ignored errors but isn't"), add them
+      to memory by writing to `.github/scanner-memory.json`.
 
       ## Output format
 
-      Output ONLY a JSON array (no markdown, no explanation). Each issue should have:
+      Output ONLY a JSON array with the final 1-2 issues:
 
       ```json
       [
@@ -80,11 +79,126 @@ agents:
       ]
       ```
 
-      If no issues found, output:
+      If no issues found, output: `[]`
+
+    toolsets:
+      - type: filesystem
+
+  security:
+    model: claude-opus
+    description: Deep security vulnerability analysis
+    instruction: |
+      You are a security expert scanning for vulnerabilities. Be thorough but precise.
+
+      ## ⛔ CRITICAL GROUNDING RULES ⛔
+
+      - **ONLY report issues in files you have actually read**
+      - **EVERY file path must be verified with read_file**
+      - **EVERY code snippet must be EXACT quotes from files**
+      - **If unsure, don't report it**
+
+      ## What to look for
+
+      ### Critical
+      - SQL injection (string concatenation in queries)
+      - Command injection (exec with user input)
+      - Path traversal (user input in file paths)
+      - Hardcoded secrets, API keys, credentials
+      - Authentication/authorization bypass
+
+      ### High
+      - Insecure TLS (InsecureSkipVerify: true)
+      - Weak cryptography (MD5, SHA1 for security)
+      - Missing input validation on external data
+      - SSRF vulnerabilities
+      - Unsafe deserialization
+
+      ### Medium
+      - Verbose error messages exposing internals
+      - Debug/dev settings in production code
+      - Missing rate limiting on sensitive endpoints
+
+      ## Output format
+
+      Return a JSON array of findings:
+      ```json
+      [
+        {
+          "title": "Brief title",
+          "severity": "critical|high|medium",
+          "file": "path/to/file.go",
+          "line": 123,
+          "code": "exact code",
+          "problem": "explanation",
+          "suggestion": "fix"
+        }
+      ]
+      ```
+
+      Return `[]` if no security issues found.
+
+    toolsets:
+      - type: filesystem
+        tools:
+          - read_file
+          - read_multiple_files
+          - list_directory
+          - directory_tree
+
+  bugs:
+    model: claude-sonnet
+    description: Logic errors, resource leaks, and concurrency bugs
+    instruction: |
+      You are analyzing code for bugs that cause runtime errors or incorrect behavior.
+
+      ## ⛔ CRITICAL GROUNDING RULES ⛔
+
+      - **ONLY report issues in files you have actually read**
+      - **EVERY file path must be verified with read_file**
+      - **EVERY code snippet must be EXACT quotes from files**
+      - **If unsure, don't report it**
+
+      ## What to look for
+
+      ### High
+      - Nil pointer dereference (accessing pointer before nil check)
+      - Ignored errors from operations that can fail
+      - Resource leaks (files, connections, channels never closed)
+      - Race conditions (shared state without synchronization)
+      - Deadlocks (incorrect lock ordering)
+
+      ### Medium
+      - Unreachable code
+      - Integer overflow in size calculations
+      - Slice bounds errors
+      - Goroutine leaks
+
+      ## What to IGNORE
+
+      - Style issues, naming conventions
+      - Defensive nil checks (these are good!)
+      - Errors that are logged but not returned (often intentional)
+      - Test files (unless tests are broken)
+
+      ## Output format
+
+      Return a JSON array of findings:
       ```json
-      []
+      [
+        {
+          "title": "Brief title",
+          "severity": "high|medium",
+          "file": "path/to/file.go",
+          "line": 123,
+          "code": "exact code",
+          "problem": "explanation",
+          "suggestion": "fix"
+        }
+      ]
       ```
 
+      Return `[]` if no bugs found.
+
     toolsets:
       - type: filesystem
         tools:
@@ -92,3 +206,55 @@ agents:
           - read_multiple_files
           - list_directory
           - directory_tree
+
+  documentation:
+    model: claude-haiku
+    description: Documentation gaps and improvements
+    instruction: |
+      You analyze code for documentation issues. Only report significant gaps.
+
+      ## ⛔ GROUNDING RULES ⛔
+
+      - **ONLY report issues in files you have actually read**
+      - **Focus on public APIs and exported functions**
+
+      ## What to look for
+
+      - Exported functions/types with no documentation
+      - Complex algorithms without explanations
+      - Missing package-level documentation
+      - Outdated comments that don't match code
+      - Missing README sections for key features
+
+      ## What to IGNORE
+
+      - Internal/private functions
+      - Simple getter/setter methods
+      - Test files
+      - Generated code
+
+      ## Output format
+
+      Return a JSON array of findings:
+      ```json
+      [
+        {
+          "title": "Brief title",
+          "severity": "medium",
+          "file": "path/to/file.go",
+          "line": 123,
+          "code": "function signature or relevant code",
+          "problem": "what's missing",
+          "suggestion": "what to document"
+        }
+      ]
+      ```
+
+      Return `[]` if no documentation issues found.
+
+    toolsets:
+      - type: filesystem
+        tools:
+          - read_file
+          - list_directory
+          - directory_tree
diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml
index 8f4e403f0..ba124f9b6 100644
--- a/.github/workflows/nightly-scan.yml
+++ b/.github/workflows/nightly-scan.yml
@@ -29,6 +29,20 @@ jobs:
         with:
           fetch-depth: 1
 
+      - name: Restore scanner memory
+        uses: actions/cache/restore@v4
+        with:
+          path: .github/scanner-memory.json
+          key: scanner-memory-${{ github.repository }}-${{ github.run_id }}
+          restore-keys: |
+            scanner-memory-${{ github.repository }}-
+
+      - name: Initialize memory file if missing
+        run: |
+          if [ ! -f .github/scanner-memory.json ]; then
+            echo '{"skip_patterns": [], "context": [], "feedback": []}' > .github/scanner-memory.json
+          fi
+
       - name: Run nightly scan
         id: scan
         uses: docker/cagent-action@latest
@@ -157,3 +171,10 @@ ${issue.suggestion}
             if (!dryRun) {
               console.log(`\n📊 Summary: Created ${created} issue(s)`);
             }
+
+      - name: Save scanner memory
+        uses: actions/cache/save@v4
+        if: always()
+        with:
+          path: .github/scanner-memory.json
+          key: scanner-memory-${{ github.repository }}-${{ github.run_id }}

From 39d662fca65c47ffd56231f967f74ac24f260d7f Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 19:06:27 -0500
Subject: [PATCH 03/10] fix: improve scanner resilience and pin cache action

Agent improvements:
- Documentation agent reads all markdown files before analysis
- All sub-agents explicitly accept empty results as valid outcome
- Added read_multiple_files tool to documentation agent

Workflow improvements:
- Pin actions/cache to v4.2.0 SHA for security
- Use static cache key (matches cagent-action pattern)
---
 .github/agents/nightly-scanner.yaml | 33 ++++++++++++++++++++++++++---
 .github/workflows/nightly-scan.yml  | 10 ++++-----
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index 4def3dd01..ac8854e41 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -97,6 +97,11 @@ agents:
       - **EVERY code snippet must be EXACT quotes from files**
       - **If unsure, don't report it**
 
+      ## Important: No findings is a valid outcome
+
+      If the codebase has no security issues, that's great! Return an empty array `[]`.
+      Do NOT manufacture issues just to have something to report. Quality over quantity.
+
       ## What to look for
 
       ### Critical
@@ -135,7 +140,7 @@ agents:
       ]
       ```
 
-      Return `[]` if no security issues found.
+      Return `[]` if no security issues found. This is perfectly acceptable!
 
     toolsets:
       - type: filesystem
@@ -158,6 +163,11 @@ agents:
       - **EVERY code snippet must be EXACT quotes from files**
       - **If unsure, don't report it**
 
+      ## Important: No findings is a valid outcome
+
+      If the codebase has no bugs, that's great! Return an empty array `[]`.
+      Do NOT manufacture issues just to have something to report. Quality over quantity.
+
       ## What to look for
 
       ### High
@@ -197,7 +207,7 @@ agents:
       ]
       ```
 
-      Return `[]` if no bugs found.
+      Return `[]` if no bugs found. This is perfectly acceptable!
 
     toolsets:
       - type: filesystem
@@ -213,11 +223,27 @@ agents:
     instruction: |
       You analyze code for documentation issues. Only report significant gaps.
 
+      ## First: Read existing documentation
+
+      Before analyzing anything, read ALL existing markdown files in the repository:
+      1. Use `directory_tree` to find all `.md` files
+      2. Read each markdown file (README.md, CONTRIBUTING.md, docs/*.md, etc.)
+      3. Understand what documentation already exists
+
+      This context is essential - you need to know what's already documented before
+      suggesting what's missing.
+
       ## ⛔ GROUNDING RULES ⛔
 
       - **ONLY report issues in files you have actually read**
       - **Focus on public APIs and exported functions**
 
+      ## Important: No findings is a valid outcome
+
+      If the documentation is already good, that's great! Return an empty array `[]`.
+      Do NOT manufacture issues just to have something to report. Quality over quantity.
+      Many codebases are well-documented - finding nothing wrong is a positive outcome.
+
       ## What to look for
 
       - Exported functions/types with no documentation
@@ -250,11 +276,12 @@ agents:
       ]
       ```
 
-      Return `[]` if no documentation issues found.
+      Return `[]` if no documentation issues found. This is perfectly acceptable!
 
     toolsets:
       - type: filesystem
         tools:
           - read_file
+          - read_multiple_files
           - list_directory
           - directory_tree
diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml
index ba124f9b6..82637116e 100644
--- a/.github/workflows/nightly-scan.yml
+++ b/.github/workflows/nightly-scan.yml
@@ -30,12 +30,12 @@ jobs:
           fetch-depth: 1
 
       - name: Restore scanner memory
-        uses: actions/cache/restore@v4
+        uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
         with:
           path: .github/scanner-memory.json
-          key: scanner-memory-${{ github.repository }}-${{ github.run_id }}
+          key: scanner-memory-${{ github.repository }}
           restore-keys: |
-            scanner-memory-${{ github.repository }}-
+            scanner-memory-${{ github.repository }}
 
       - name: Initialize memory file if missing
         run: |
@@ -173,8 +173,8 @@ ${issue.suggestion}
             }
 
       - name: Save scanner memory
-        uses: actions/cache/save@v4
+        uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
         if: always()
         with:
           path: .github/scanner-memory.json
-          key: scanner-memory-${{ github.repository }}-${{ github.run_id }}
+          key: scanner-memory-${{ github.repository }}

From d7ba70478286fbeab900c25ccce5e088d1478c47 Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 19:11:39 -0500
Subject: [PATCH 04/10] refactor: use cagent's built-in memory system

- Replace custom JSON memory with cagent's SQLite memory toolset
- Agent uses get_memories/add_memory tools instead of file I/O
- Memory path resolves to .github/agents/scanner-memory.db
- Removes manual JSON initialization step
---
 .github/agents/nightly-scanner.yaml | 14 ++++++++------
 .github/workflows/nightly-scan.yml  | 10 ++--------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index ac8854e41..bf0563f94 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -36,29 +36,29 @@ agents:
 
       ## First: Load Memory
 
-      Read `.github/scanner-memory.json` if it exists. This contains:
+      Use `get_memories` to check for any learned patterns from previous scans. This includes:
       - Files/patterns to skip (known false positives)
       - Codebase-specific context learned from past scans
       - Feedback from humans on previous issues
 
       ## Your workflow
 
-      1. Read memory file to understand what to skip
+      1. Call `get_memories` to understand what to skip from previous runs
       2. Use `directory_tree` to understand the codebase structure
       3. Delegate to sub-agents in order:
          - `security` - for security vulnerabilities (HIGHEST PRIORITY)
          - `bugs` - for logic errors, resource leaks, race conditions
          - `documentation` - for missing docs (ONLY if no security/bug issues)
       4. Collect findings from each sub-agent
-      5. Filter out any issues that match patterns in memory's "skip" list
+      5. Filter out any issues that match patterns from memory
       6. Select the top 1-2 most important issues
-      7. Update memory with any new learnings
+      7. Use `add_memory` to store any new learnings
 
       ## Memory updates
 
       If you discover patterns that should be remembered (e.g., "this codebase uses
-      custom error handling that looks like ignored errors but isn't"), add them
-      to memory by writing to `.github/scanner-memory.json`.
+      custom error handling that looks like ignored errors but isn't"), use
+      `add_memory` to store them for future runs.
 
       ## Output format
 
@@ -83,6 +83,8 @@ agents:
 
     toolsets:
       - type: filesystem
+      - type: memory
+        path: scanner-memory.db
 
   security:
     model: claude-opus
diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml
index 82637116e..4e4618a64 100644
--- a/.github/workflows/nightly-scan.yml
+++ b/.github/workflows/nightly-scan.yml
@@ -32,17 +32,11 @@ jobs:
       - name: Restore scanner memory
         uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
         with:
-          path: .github/scanner-memory.json
+          path: ${{ github.workspace }}/.github/agents/scanner-memory.db
           key: scanner-memory-${{ github.repository }}
           restore-keys: |
             scanner-memory-${{ github.repository }}
 
-      - name: Initialize memory file if missing
-        run: |
-          if [ ! -f .github/scanner-memory.json ]; then
-            echo '{"skip_patterns": [], "context": [], "feedback": []}' > .github/scanner-memory.json
-          fi
-
       - name: Run nightly scan
         id: scan
         uses: docker/cagent-action@latest
@@ -176,5 +170,5 @@ ${issue.suggestion}
         uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
         if: always()
         with:
-          path: .github/scanner-memory.json
+          path: ${{ github.workspace }}/.github/agents/scanner-memory.db
           key: scanner-memory-${{ github.repository }}

From fcc7aa81ca5fd1ea27f92acde64648e1048a1597 Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 19:13:51 -0500
Subject: [PATCH 05/10] refactor: use text format for sub-agent output

Sub-agents now return findings in simple text format:
  FILE: path/to/file.go
  LINE: 123
  SEVERITY: high
  ...

Benefits over JSON:
- More natural for LLM output
- Less prone to formatting errors
- Matches cagent-action PR review pattern

Root agent still outputs JSON for workflow parsing.
---
 .github/agents/nightly-scanner.yaml | 99 ++++++++++++++++-------------
 1 file changed, 55 insertions(+), 44 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index bf0563f94..6e946f7c4 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -49,10 +49,27 @@ agents:
          - `security` - for security vulnerabilities (HIGHEST PRIORITY)
          - `bugs` - for logic errors, resource leaks, race conditions
          - `documentation` - for missing docs (ONLY if no security/bug issues)
-      4. Collect findings from each sub-agent
+      4. Collect findings from each sub-agent (they return text in FILE/LINE/SEVERITY format)
       5. Filter out any issues that match patterns from memory
       6. Select the top 1-2 most important issues
-      7. Use `add_memory` to store any new learnings
+      7. Convert selected findings to JSON output format
+      8. Use `add_memory` to store any new learnings
+
+      ## Sub-agent output format
+
+      Sub-agents return findings in this text format (or `NO_ISSUES` if none):
+      ```
+      FILE: path/to/file.go
+      LINE: 123
+      SEVERITY: high
+      TITLE: Brief description
+      CODE: exact code
+      PROBLEM: explanation
+      SUGGESTION: fix
+      ---
+      ```
+
+      Parse these and convert to JSON for your final output.
 
       ## Memory updates
 
@@ -127,22 +144,20 @@ agents:
 
       ## Output format
 
-      Return a JSON array of findings:
-      ```json
-      [
-        {
-          "title": "Brief title",
-          "severity": "critical|high|medium",
-          "file": "path/to/file.go",
-          "line": 123,
-          "code": "exact code",
-          "problem": "explanation",
-          "suggestion": "fix"
-        }
-      ]
+      For each finding, output in this EXACT format:
+
+      ```
+      FILE: path/to/file.go
+      LINE: 123
+      SEVERITY: critical|high|medium
+      TITLE: Brief description of the vulnerability
+      CODE: exact code snippet
+      PROBLEM: Why this is a security issue
+      SUGGESTION: How to fix it
+      ---
       ```
 
-      Return `[]` if no security issues found. This is perfectly acceptable!
+      If no security issues found, output: `NO_ISSUES`
 
     toolsets:
       - type: filesystem
@@ -194,22 +209,20 @@ agents:
 
       ## Output format
 
-      Return a JSON array of findings:
-      ```json
-      [
-        {
-          "title": "Brief title",
-          "severity": "high|medium",
-          "file": "path/to/file.go",
-          "line": 123,
-          "code": "exact code",
-          "problem": "explanation",
-          "suggestion": "fix"
-        }
-      ]
+      For each finding, output in this EXACT format:
+
+      ```
+      FILE: path/to/file.go
+      LINE: 123
+      SEVERITY: high|medium
+      TITLE: Brief description of the bug
+      CODE: exact code snippet
+      PROBLEM: Why this is a bug
+      SUGGESTION: How to fix it
+      ---
       ```
 
-      Return `[]` if no bugs found. This is perfectly acceptable!
+      If no bugs found, output: `NO_ISSUES`
 
     toolsets:
       - type: filesystem
@@ -263,22 +276,20 @@ agents:
 
       ## Output format
 
-      Return a JSON array of findings:
-      ```json
-      [
-        {
-          "title": "Brief title",
-          "severity": "medium",
-          "file": "path/to/file.go",
-          "line": 123,
-          "code": "function signature or relevant code",
-          "problem": "what's missing",
-          "suggestion": "what to document"
-        }
-      ]
+      For each finding, output in this EXACT format:
+
+      ```
+      FILE: path/to/file.go
+      LINE: 123
+      SEVERITY: medium
+      TITLE: Brief description of the doc gap
+      CODE: function signature or relevant code
+      PROBLEM: What documentation is missing
+      SUGGESTION: What should be documented
+      ---
       ```
 
-      Return `[]` if no documentation issues found. This is perfectly acceptable!
+      If no documentation issues found, output: `NO_ISSUES`
 
     toolsets:
       - type: filesystem

From e3b4610c5d54a7d491848857c60a0f2768c12fb9 Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 19:18:55 -0500
Subject: [PATCH 06/10] feat: add reporter sub-agent for issue creation

Move issue creation from workflow to agent:
- New `reporter` sub-agent uses `gh` CLI to create issues
- Checks for duplicates before creating
- Selects appropriate labels based on category
- Workflow reduced from 175 lines to 55 lines

Dry-run mode now passed as prompt to agent.
---
 .github/agents/nightly-scanner.yaml | 136 ++++++++++++++++++++++++----
 .github/workflows/nightly-scan.yml  | 125 +------------------------
 2 files changed, 119 insertions(+), 142 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index 6e946f7c4..9491701bd 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -30,6 +30,7 @@ agents:
       - security
       - bugs
       - documentation
+      - reporter
     instruction: |
       You are the orchestrator for a nightly codebase scan. Your job is to delegate
       analysis to specialized sub-agents and compile their findings.
@@ -52,12 +53,12 @@ agents:
       4. Collect findings from each sub-agent (they return text in FILE/LINE/SEVERITY format)
       5. Filter out any issues that match patterns from memory
       6. Select the top 1-2 most important issues
-      7. Convert selected findings to JSON output format
+      7. Delegate to `reporter` with the selected findings to create GitHub issues
       8. Use `add_memory` to store any new learnings
 
       ## Sub-agent output format
 
-      Sub-agents return findings in this text format (or `NO_ISSUES` if none):
+      Analysis sub-agents return findings in this text format (or `NO_ISSUES` if none):
       ```
       FILE: path/to/file.go
       LINE: 123
@@ -69,8 +70,6 @@ agents:
       ---
       ```
 
-      Parse these and convert to JSON for your final output.
-
       ## Memory updates
 
       If you discover patterns that should be remembered (e.g., "this codebase uses
@@ -79,24 +78,19 @@ agents:
 
       ## Output format
 
-      Output ONLY a JSON array with the final 1-2 issues:
-
-      ```json
-      [
-        {
-          "title": "Brief issue title (50 chars max)",
-          "category": "security" | "bug" | "documentation",
-          "severity": "critical" | "high" | "medium",
-          "file": "path/to/file.go",
-          "line": 123,
-          "code": "exact code snippet from file",
-          "problem": "Clear explanation of why this is an issue",
-          "suggestion": "How to fix it"
-        }
-      ]
+      After the reporter creates issues, output a brief summary:
+      ```
+      SCAN COMPLETE
+      Issues created: N
+      - #123: Issue title
+      - #456: Issue title
       ```
 
-      If no issues found, output: `[]`
+      If no issues found or created, output:
+      ```
+      SCAN COMPLETE
+      No issues found.
+      ```
 
     toolsets:
       - type: filesystem
@@ -298,3 +292,105 @@ agents:
           - read_multiple_files
           - list_directory
           - directory_tree
+
+  reporter:
+    model: claude-haiku
+    description: Creates GitHub issues for findings
+    instruction: |
+      You create GitHub issues for code quality findings using the `gh` CLI.
+
+      ## Input
+
+      You receive findings from the orchestrator in this format:
+      ```
+      CATEGORY: security|bug|documentation
+      FILE: path/to/file.go
+      LINE: 123
+      SEVERITY: critical|high|medium
+      TITLE: Brief description
+      CODE: exact code snippet
+      PROBLEM: explanation
+      SUGGESTION: fix
+      ---
+      ```
+
+      ## Workflow
+
+      1. First, check for existing similar issues:
+         ```bash
+         gh issue list --label automated --state open --limit 100
+         ```
+
+      2. For each finding, skip if a similar issue already exists (matching title or file)
+
+      3. Create new issues with `gh issue create`:
+         ```bash
+         gh issue create \
+           --title "[category] Title here" \
+           --label "automated" \
+           --label "kind/bug" \
+           --body "issue body here"
+         ```
+
+      ## Issue format
+
+      Title: `[security] Brief title` or `[bug] Brief title` or `[documentation] Brief title`
+
+      Labels (select based on category):
+      - Always add `automated`
+      - `security` category → add `kind/bug` (security issues are bugs)
+      - `bug` category → add `kind/bug`
+      - `documentation` category → add `kind/documentation`
+
+      Body template:
+      ```markdown
+      ## SEVERITY_EMOJI SEVERITY - CATEGORY
+
+      **File:** `path/to/file.go` (line 123)
+
+      ### Code
+
+      ```go
+      exact code snippet
+      ```
+
+      ### Problem
+
+      Explanation of the issue
+
+      ### Suggested Fix
+
+      How to fix it
+
+      ---
+      *Found by nightly codebase scan*
+      ```
+
+      Severity emojis: 🔴 critical, 🟠 high, 🟡 medium
+
+      ## Output
+
+      Return what you created:
+      ```
+      CREATED: #123 [security] Issue title
+      CREATED: #456 [bug] Issue title
+      SKIPPED: Similar issue already exists for path/to/file.go
+      ```
+
+      Or if nothing to create:
+      ```
+      NO_ISSUES_TO_CREATE
+      ```
+
+      ## Important
+
+      - Maximum 2 issues per run
+      - Skip duplicates (check by title similarity and file path)
+      - Use exact code snippets from the findings
+
+    toolsets:
+      - type: shell
+
+permissions:
+  allow:
+    - shell:cmd=gh *
diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml
index 4e4618a64..3496d761a 100644
--- a/.github/workflows/nightly-scan.yml
+++ b/.github/workflows/nightly-scan.yml
@@ -38,134 +38,15 @@ jobs:
             scanner-memory-${{ github.repository }}
 
       - name: Run nightly scan
-        id: scan
         uses: docker/cagent-action@latest
+        env:
+          GH_TOKEN: ${{ github.token }}
         with:
           agent: ${{ github.workspace }}/.github/agents/nightly-scanner.yaml
+          prompt: ${{ inputs.dry-run && 'DRY RUN MODE: Do not create any issues. Just report what you would create.' || '' }}
           anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
           timeout: 600
 
-      - name: Parse and create issues
-        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
-        env:
-          DRY_RUN: ${{ inputs.dry-run || false }}
-        with:
-          script: |
-            const fs = require('fs');
-            const outputFile = '${{ steps.scan.outputs.output-file }}';
-            const dryRun = process.env.DRY_RUN === 'true';
-
-            if (!fs.existsSync(outputFile)) {
-              console.log('No output file found');
-              return;
-            }
-
-            const rawOutput = fs.readFileSync(outputFile, 'utf8');
-            console.log('Raw agent output:', rawOutput);
-
-            // Extract JSON from output (agent might include extra text)
-            let issues = [];
-            try {
-              // Try to find JSON array in the output
-              const jsonMatch = rawOutput.match(/\[[\s\S]*\]/);
-              if (jsonMatch) {
-                issues = JSON.parse(jsonMatch[0]);
-              }
-            } catch (e) {
-              console.log('Failed to parse JSON:', e.message);
-              console.log('Agent output was not valid JSON - skipping issue creation');
-              return;
-            }
-
-            if (!Array.isArray(issues) || issues.length === 0) {
-              console.log('✅ No issues found by scanner');
-              return;
-            }
-
-            console.log(`Found ${issues.length} issue(s)`);
-
-            // Get existing open issues to avoid duplicates
-            const { data: existingIssues } = await github.rest.issues.listForRepo({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              state: 'open',
-              labels: 'automated',
-              per_page: 100
-            });
-
-            const existingTitles = existingIssues.map(i => i.title.toLowerCase());
-
-            // Limit to 2 issues per run
-            const issuesToCreate = issues.slice(0, 2);
-            let created = 0;
-
-            for (const issue of issuesToCreate) {
-              // Skip if similar issue already exists
-              const titleLower = issue.title.toLowerCase();
-              if (existingTitles.some(t => t.includes(titleLower) || titleLower.includes(t))) {
-                console.log(`⏭️ Skipping duplicate: ${issue.title}`);
-                continue;
-              }
-
-              const severityEmoji = {
-                critical: '🔴',
-                high: '🟠',
-                medium: '🟡'
-              }[issue.severity] || '⚪';
-
-              const categoryLabel = {
-                security: 'kind/bug',       // security issues are bugs
-                bug: 'kind/bug',
-                documentation: 'kind/documentation'
-              }[issue.category] || 'kind/bug';
-
-              const body = `## ${severityEmoji} ${issue.severity.toUpperCase()} - ${issue.category}
-
-**File:** \`${issue.file}\`${issue.line ? ` (line ${issue.line})` : ''}
-
-### Code
-
-\`\`\`go
-${issue.code}
-\`\`\`
-
-### Problem
-
-${issue.problem}
-
-### Suggested Fix
-
-${issue.suggestion}
-
----
-
-*Found by [nightly codebase scan](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})*
-`;
-
-              if (dryRun) {
-                console.log(`\n📋 Would create issue:\n  Title: ${issue.title}\n  Labels: automated, ${categoryLabel}`);
-                console.log(`  Body preview:\n${body.substring(0, 200)}...`);
-              } else {
-                try {
-                  const { data: newIssue } = await github.rest.issues.create({
-                    owner: context.repo.owner,
-                    repo: context.repo.repo,
-                    title: `[${issue.category}] ${issue.title}`,
-                    body: body,
-                    labels: ['automated', categoryLabel]
-                  });
-                  console.log(`✅ Created issue #${newIssue.number}: ${newIssue.html_url}`);
-                  created++;
-                } catch (e) {
-                  console.log(`❌ Failed to create issue: ${e.message}`);
-                }
-              }
-            }
-
-            if (!dryRun) {
-              console.log(`\n📊 Summary: Created ${created} issue(s)`);
-            }
-
       - name: Save scanner memory
         uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
         if: always()

From 957dbce49e5fcafc4fe7ba70c9589d2c3451d2e1 Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 19:25:26 -0500
Subject: [PATCH 07/10] feat: use multi-provider models for specialized tasks

Model assignments:
- Security: openai/o3-mini (reasoning model for subtle vulnerabilities)
- Bugs: google/gemini-2.5-flash (fast, good at Go code analysis)
- Documentation: anthropic/claude-haiku (sufficient for simpler task)
- Orchestrator: anthropic/claude-sonnet (coordination)
- Reporter: anthropic/claude-haiku (formatting + gh commands)

Workflow now passes all three API keys:
- ANTHROPIC_API_KEY
- OPENAI_API_KEY
- GOOGLE_API_KEY (mapped from GEMINI_API_KEY secret)
---
 .github/agents/nightly-scanner.yaml | 21 +++++++++++++--------
 .github/workflows/nightly-scan.yml  |  2 ++
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index 9491701bd..7d4cee04f 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -8,14 +8,19 @@ models:
     max_tokens: 4096
     temperature: 0.1
 
-  # Security analysis - use stronger model for catching subtle vulnerabilities
-  claude-opus:
-    provider: anthropic
-    model: claude-opus-4
+  # Security analysis - reasoning model excels at finding subtle vulnerabilities
+  openai-o3:
+    provider: openai
+    model: o3-mini
     max_tokens: 8192
-    temperature: 0.1
 
-  # Documentation - faster model is sufficient
+  # Bug detection - fast and surprisingly good at Go code analysis
+  gemini-flash:
+    provider: google
+    model: gemini-2.5-flash
+    max_tokens: 8192
+
+  # Documentation & reporting - faster model is sufficient
   claude-haiku:
     provider: anthropic
     model: claude-haiku-3-5
@@ -98,7 +103,7 @@ agents:
         path: scanner-memory.db
 
   security:
-    model: claude-opus
+    model: openai-o3
     description: Deep security vulnerability analysis
     instruction: |
       You are a security expert scanning for vulnerabilities. Be thorough but precise.
@@ -162,7 +167,7 @@ agents:
           - directory_tree
 
   bugs:
-    model: claude-sonnet
+    model: gemini-flash
     description: Logic errors, resource leaks, and concurrency bugs
     instruction: |
       You are analyzing code for bugs that cause runtime errors or incorrect behavior.
diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml
index 3496d761a..14efd6473 100644
--- a/.github/workflows/nightly-scan.yml
+++ b/.github/workflows/nightly-scan.yml
@@ -41,10 +41,12 @@ jobs:
         uses: docker/cagent-action@latest
         env:
           GH_TOKEN: ${{ github.token }}
+          GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }}
         with:
           agent: ${{ github.workspace }}/.github/agents/nightly-scanner.yaml
           prompt: ${{ inputs.dry-run && 'DRY RUN MODE: Do not create any issues. Just report what you would create.' || '' }}
           anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
+          openai-api-key: ${{ secrets.OPENAI_API_KEY }}
           timeout: 600
 
       - name: Save scanner memory

From b1a3ff778a2de93deca0ca19e6e40075f772ff3e Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 19:41:21 -0500
Subject: [PATCH 08/10] fix: address PR review feedback

Fixes from code review:

1. Restrict shell permissions
   - Changed `gh *` to `gh issue list *` and `gh issue create *`
   - Principle of least privilege

2. CATEGORY field mismatch
   - Root agent now adds CATEGORY when forwarding to reporter
   - Added explicit "Forwarding to reporter" section with example

3. Inconsistent array/NO_ISSUES terminology
   - Changed "Return an empty array `[]`" to "Output `NO_ISSUES`"
   - Consistent across all three analysis agents

4. Documentation trigger clarity
   - Changed to "ONLY run if BOTH security AND bugs returned `NO_ISSUES`"
   - Unambiguous trigger condition

5. Better duplicate detection
   - Changed from downloading 100 issues to `gh issue list --search`
   - Searches by file path in issue body

6. Sub-agent failure handling
   - Added explicit error handling strategy
   - Log errors and continue with other agents
   - Report partial results if some agents fail
   - Added FAILED status to reporter output
---
 .github/agents/nightly-scanner.yaml | 67 +++++++++++++++++++++--------
 .github/workflows/nightly-scan.yml  |  2 +-
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index 7d4cee04f..e094f74bc 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -51,15 +51,29 @@ agents:
 
       1. Call `get_memories` to understand what to skip from previous runs
       2. Use `directory_tree` to understand the codebase structure
-      3. Delegate to sub-agents in order:
+      3. Delegate to sub-agents in order (with error handling):
          - `security` - for security vulnerabilities (HIGHEST PRIORITY)
+           - If fails: log error, continue to bugs
          - `bugs` - for logic errors, resource leaks, race conditions
-         - `documentation` - for missing docs (ONLY if no security/bug issues)
-      4. Collect findings from each sub-agent (they return text in FILE/LINE/SEVERITY format)
-      5. Filter out any issues that match patterns from memory
-      6. Select the top 1-2 most important issues
-      7. Delegate to `reporter` with the selected findings to create GitHub issues
-      8. Use `add_memory` to store any new learnings
+           - If fails: log error, continue to documentation check
+         - `documentation` - for missing docs
+           - ONLY run if BOTH security AND bugs returned `NO_ISSUES`
+           - If fails: log error, continue to reporting
+      4. Collect findings from each sub-agent (they return text format or `NO_ISSUES`)
+      5. Filter out any issues where FILE matches patterns from memory
+      6. Sort by SEVERITY (critical > high > medium) and select top 1-2 issues
+      7. Add CATEGORY field to each finding based on source agent:
+         - From security agent → `CATEGORY: security`
+         - From bugs agent → `CATEGORY: bug`
+         - From documentation agent → `CATEGORY: documentation`
+      8. Delegate to `reporter` with the augmented findings
+      9. Use `add_memory` to store any new learnings
+
+      ## Error handling
+
+      - If a sub-agent fails (timeout, API error), log the error and continue with other agents
+      - If ALL sub-agents fail, output: `SCAN FAILED: All agents encountered errors`
+      - If SOME sub-agents fail, report findings from successful ones and note failures
 
       ## Sub-agent output format
 
@@ -75,6 +89,21 @@ agents:
       ---
       ```
 
+      ## Forwarding to reporter
+
+      When forwarding to reporter, ADD the CATEGORY field:
+      ```
+      CATEGORY: security
+      FILE: path/to/file.go
+      LINE: 123
+      SEVERITY: high
+      TITLE: Brief description
+      CODE: exact code
+      PROBLEM: explanation
+      SUGGESTION: fix
+      ---
+      ```
+
       ## Memory updates
 
       If you discover patterns that should be remembered (e.g., "this codebase uses
@@ -117,7 +146,7 @@ agents:
 
       ## Important: No findings is a valid outcome
 
-      If the codebase has no security issues, that's great! Return an empty array `[]`.
+      If the codebase has no security issues, that's great! Output `NO_ISSUES`.
       Do NOT manufacture issues just to have something to report. Quality over quantity.
 
       ## What to look for
@@ -181,7 +210,7 @@ agents:
 
       ## Important: No findings is a valid outcome
 
-      If the codebase has no bugs, that's great! Return an empty array `[]`.
+      If the codebase has no bugs, that's great! Output `NO_ISSUES`.
       Do NOT manufacture issues just to have something to report. Quality over quantity.
 
       ## What to look for
@@ -254,7 +283,7 @@ agents:
 
       ## Important: No findings is a valid outcome
 
-      If the documentation is already good, that's great! Return an empty array `[]`.
+      If the documentation is already good, that's great! Output `NO_ISSUES`.
       Do NOT manufacture issues just to have something to report. Quality over quantity.
       Many codebases are well-documented - finding nothing wrong is a positive outcome.
 
@@ -321,14 +350,13 @@ agents:
 
       ## Workflow
 
-      1. First, check for existing similar issues:
+      1. For each finding, check if a similar issue already exists by searching:
          ```bash
-         gh issue list --label automated --state open --limit 100
+         gh issue list --label automated --state open --search "in:body {filepath}"
          ```
+         If results found for the same file, SKIP (log as SKIPPED).
 
-      2. For each finding, skip if a similar issue already exists (matching title or file)
-
-      3. Create new issues with `gh issue create`:
+      2. If no duplicate found, create the issue:
          ```bash
          gh issue create \
            --title "[category] Title here" \
@@ -337,6 +365,8 @@ agents:
            --body "issue body here"
          ```
 
+      3. If `gh issue create` fails, log as FAILED and continue with remaining findings.
+
       ## Issue format
 
       Title: `[security] Brief title` or `[bug] Brief title` or `[documentation] Brief title`
@@ -380,6 +410,7 @@ agents:
       CREATED: #123 [security] Issue title
       CREATED: #456 [bug] Issue title
       SKIPPED: Similar issue already exists for path/to/file.go
+      FAILED: Could not create issue - API error message
       ```
 
       Or if nothing to create:
@@ -390,12 +421,14 @@ agents:
       ## Important
 
       - Maximum 2 issues per run
-      - Skip duplicates (check by title similarity and file path)
+      - Skip duplicates (search by file path in issue body)
       - Use exact code snippets from the findings
+      - If creation fails, log FAILED and continue with remaining findings
 
     toolsets:
       - type: shell
 
 permissions:
   allow:
-    - shell:cmd=gh *
+    - shell:cmd=gh issue list *
+    - shell:cmd=gh issue create *
diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml
index 14efd6473..0108dacd6 100644
--- a/.github/workflows/nightly-scan.yml
+++ b/.github/workflows/nightly-scan.yml
@@ -41,12 +41,12 @@ jobs:
         uses: docker/cagent-action@latest
         env:
           GH_TOKEN: ${{ github.token }}
-          GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }}
         with:
           agent: ${{ github.workspace }}/.github/agents/nightly-scanner.yaml
           prompt: ${{ inputs.dry-run && 'DRY RUN MODE: Do not create any issues. Just report what you would create.' || '' }}
           anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
           openai-api-key: ${{ secrets.OPENAI_API_KEY }}
+          google-api-key: ${{ secrets.GEMINI_API_KEY }}
           timeout: 600
 
       - name: Save scanner memory

From b147fd1efc4d36e570c911a6536811e5c5584707 Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Mon, 2 Feb 2026 22:09:12 -0500
Subject: [PATCH 09/10] refactor: PR feedback

---
 .github/agents/nightly-scanner.yaml | 52 ++++++++++++++++++++---------
 .github/workflows/nightly-scan.yml  | 10 +++---
 2 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index e094f74bc..f3790a82d 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -23,7 +23,7 @@ models:
   # Documentation & reporting - faster model is sufficient
   claude-haiku:
     provider: anthropic
-    model: claude-haiku-3-5
+    model: claude-haiku-4-5
     max_tokens: 4096
     temperature: 0.2
 
@@ -129,7 +129,7 @@ agents:
     toolsets:
       - type: filesystem
       - type: memory
-        path: scanner-memory.db
+        path: .github/agents/scanner-memory.db
 
   security:
     model: openai-o3
@@ -350,19 +350,30 @@ agents:
 
       ## Workflow
 
-      1. For each finding, check if a similar issue already exists by searching:
+      **ENFORCE: Process at most 2 findings. If you receive more, only process the first 2.**
+
+      For each finding (up to 2 maximum):
+
+      1. Check if a similar issue already exists by searching for the same file AND line:
          ```bash
-         gh issue list --label automated --state open --search "in:body {filepath}"
+         # Use environment variables to safely pass file paths (avoids shell injection)
+         FILE_PATH="path/to/file.go"
+         LINE_NUM="123"
+         gh issue list --label automated --state open --search "in:body \"$FILE_PATH\" \"line $LINE_NUM\""
          ```
-         If results found for the same file, SKIP (log as SKIPPED).
+         If results found for the same file and line, SKIP (log as SKIPPED).
 
-      2. If no duplicate found, create the issue:
+      2. If no duplicate found, create the issue using a heredoc to handle special characters:
          ```bash
-         gh issue create \
-           --title "[category] Title here" \
+         # Store title in variable first to handle special characters safely
+         ISSUE_TITLE="[category] Title here"
+         cat << 'EOF' | gh issue create \
+           --title "$ISSUE_TITLE" \
            --label "automated" \
            --label "kind/bug" \
-           --body "issue body here"
+           --body-file -
+         Issue body content here...
+         EOF
          ```
 
       3. If `gh issue create` fails, log as FAILED and continue with remaining findings.
@@ -377,9 +388,16 @@ agents:
       - `bug` category → add `kind/bug`
       - `documentation` category → add `kind/documentation`
 
-      Body template:
-      ```markdown
-      ## SEVERITY_EMOJI SEVERITY - CATEGORY
+      Body template (use heredoc to handle backticks and special characters):
+      ```bash
+      # Store values in variables to safely handle special characters
+      ISSUE_TITLE="[security] SQL injection in user query"
+      cat << 'EOF' | gh issue create \
+        --title "$ISSUE_TITLE" \
+        --label "automated" \
+        --label "kind/bug" \
+        --body-file -
+      ## 🔴 critical - security
 
       **File:** `path/to/file.go` (line 123)
 
@@ -399,6 +417,7 @@ agents:
 
       ---
       *Found by nightly codebase scan*
+      EOF
       ```
 
       Severity emojis: 🔴 critical, 🟠 high, 🟡 medium
@@ -420,15 +439,16 @@ agents:
 
       ## Important
 
-      - Maximum 2 issues per run
-      - Skip duplicates (search by file path in issue body)
+      - **STRICT LIMIT: Maximum 2 issues per run** - Stop after creating 2 issues, even if more findings exist
+      - Skip duplicates (search by file path AND line number in issue body)
       - Use exact code snippets from the findings
       - If creation fails, log FAILED and continue with remaining findings
+      - Always quote file paths and line numbers in search queries to handle special characters
 
     toolsets:
       - type: shell
 
 permissions:
   allow:
-    - shell:cmd=gh issue list *
-    - shell:cmd=gh issue create *
+    - shell:cmd=gh issue list --*
+    - shell:cmd=gh issue create --*
diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml
index 0108dacd6..72622e121 100644
--- a/.github/workflows/nightly-scan.yml
+++ b/.github/workflows/nightly-scan.yml
@@ -30,12 +30,12 @@ jobs:
           fetch-depth: 1
 
       - name: Restore scanner memory
-        uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
+        uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
         with:
           path: ${{ github.workspace }}/.github/agents/scanner-memory.db
-          key: scanner-memory-${{ github.repository }}
+          key: scanner-memory-${{ github.repository }}-${{ github.run_id }}
           restore-keys: |
-            scanner-memory-${{ github.repository }}
+            scanner-memory-${{ github.repository }}-
 
       - name: Run nightly scan
         uses: docker/cagent-action@latest
@@ -47,11 +47,11 @@ jobs:
           anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
           openai-api-key: ${{ secrets.OPENAI_API_KEY }}
           google-api-key: ${{ secrets.GEMINI_API_KEY }}
-          timeout: 600
+          timeout: 1200
 
       - name: Save scanner memory
         uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
         if: always()
         with:
           path: ${{ github.workspace }}/.github/agents/scanner-memory.db
-          key: scanner-memory-${{ github.repository }}
+          key: scanner-memory-${{ github.repository }}-${{ github.run_id }}

From 0620d327c32da61557e01109df3c1704f30073c5 Mon Sep 17 00:00:00 2001
From: Derek Misler <derek.misler@docker.com>
Date: Tue, 3 Feb 2026 17:26:42 -0500
Subject: [PATCH 10/10] feat: alloy

---
 .github/agents/nightly-scanner.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml
index f3790a82d..ea740578c 100644
--- a/.github/agents/nightly-scanner.yaml
+++ b/.github/agents/nightly-scanner.yaml
@@ -132,8 +132,8 @@ agents:
         path: .github/agents/scanner-memory.db
 
   security:
-    model: openai-o3
-    description: Deep security vulnerability analysis
+    model: openai-o3,claude-sonnet
+    description: Deep security vulnerability analysis (alloy: reasoning + broad knowledge)
     instruction: |
       You are a security expert scanning for vulnerabilities. Be thorough but precise.
 
@@ -196,8 +196,8 @@ agents:
           - directory_tree
 
   bugs:
-    model: gemini-flash
-    description: Logic errors, resource leaks, and concurrency bugs
+    model: gemini-flash,claude-haiku
+    description: Logic errors, resource leaks, and concurrency bugs (alloy: speed + precision)
     instruction: |
       You are analyzing code for bugs that cause runtime errors or incorrect behavior.