From be9cb197b639a9f96b5d4415ff7493503877de4c Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 18:55:44 -0500 Subject: [PATCH 01/10] feat: add nightly scanner that opens issues for code quality problems Adds a read-only agent that scans the codebase daily and creates GitHub issues for security vulnerabilities, bugs, and documentation gaps. - Runs daily at 6am UTC (or manual trigger) - Creates max 2 issues per run to avoid flooding - Deduplicates against existing open issues with 'nightly-scan' label - Dry-run mode for testing - Strong anti-hallucination rules (read-only, must verify files exist) --- .github/agents/nightly-scanner.yaml | 94 ++++++++++++++++ .github/workflows/nightly-scan.yml | 159 ++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 .github/agents/nightly-scanner.yaml create mode 100644 .github/workflows/nightly-scan.yml diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml new file mode 100644 index 000000000..7cfbcad96 --- /dev/null +++ b/.github/agents/nightly-scanner.yaml @@ -0,0 +1,94 @@ +version: "2" + +models: + claude-sonnet: + provider: anthropic + model: claude-sonnet-4-5 + max_tokens: 8192 + temperature: 0.2 + +agents: + root: + model: claude-sonnet + description: Scans codebase nightly and reports issues in structured format + instruction: | + You are a senior engineer performing a nightly codebase audit. Your job is to find + real issues and report them clearly—NOT to fix them. + + ## Your task + + Scan this codebase and identify 1-2 of the most important issues. Quality over quantity. + + ## What to look for (in priority order) + + 1. **Security vulnerabilities** + - SQL/command injection, path traversal + - Hardcoded secrets or credentials + - Insecure TLS, weak crypto + - Missing input validation + + 2. **Bugs that cause runtime errors** + - Nil pointer dereferences + - Ignored errors from critical operations + - Resource leaks (unclosed files, connections) + - Race conditions, deadlocks + + 3. **Documentation gaps** (only if no security/bug issues found) + - Public APIs without documentation + - Complex functions without explanations + - Missing README sections + + ## What to IGNORE + + - Style, formatting, linting issues + - Minor code smells that don't cause bugs + - Test coverage (unless tests are actually broken) + - Naming conventions + + ## ⛔ CRITICAL GROUNDING RULES ⛔ + + - **ONLY report issues in files you have actually read with read_file** + - **EVERY file path must be verified to exist** + - **EVERY code snippet must be quoted EXACTLY from the file** + - **EVERY line number must be accurate** + - **If you find nothing significant, say "No issues found" - do NOT invent issues** + + ## Your workflow + + 1. Use `directory_tree` to understand the codebase structure + 2. Identify key source files (focus on `cmd/`, `internal/`, `pkg/`) + 3. Use `read_file` to examine code + 4. For each potential issue, verify by reading surrounding context + 5. Select the 1-2 most important, verified issues + + ## Output format + + Output ONLY a JSON array (no markdown, no explanation). Each issue should have: + + ```json + [ + { + "title": "Brief issue title (50 chars max)", + "category": "security" | "bug" | "documentation", + "severity": "critical" | "high" | "medium", + "file": "path/to/file.go", + "line": 123, + "code": "exact code snippet from file", + "problem": "Clear explanation of why this is an issue", + "suggestion": "How to fix it" + } + ] + ``` + + If no issues found, output: + ```json + [] + ``` + + toolsets: + - type: filesystem + tools: + - read_file + - read_multiple_files + - list_directory + - directory_tree diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml new file mode 100644 index 000000000..8f4e403f0 --- /dev/null +++ b/.github/workflows/nightly-scan.yml @@ -0,0 +1,159 @@ +name: Nightly Codebase Scan + +on: + schedule: + # Run every day at 6am UTC + - cron: '0 6 * * *' + workflow_dispatch: + inputs: + dry-run: + description: 'Log issues only, do not create them' + type: boolean + default: false + +permissions: + contents: read + issues: write + +concurrency: + group: nightly-scan + cancel-in-progress: false + +jobs: + scan: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 + with: + fetch-depth: 1 + + - name: Run nightly scan + id: scan + uses: docker/cagent-action@latest + with: + agent: ${{ github.workspace }}/.github/agents/nightly-scanner.yaml + anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} + timeout: 600 + + - name: Parse and create issues + uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7 + env: + DRY_RUN: ${{ inputs.dry-run || false }} + with: + script: | + const fs = require('fs'); + const outputFile = '${{ steps.scan.outputs.output-file }}'; + const dryRun = process.env.DRY_RUN === 'true'; + + if (!fs.existsSync(outputFile)) { + console.log('No output file found'); + return; + } + + const rawOutput = fs.readFileSync(outputFile, 'utf8'); + console.log('Raw agent output:', rawOutput); + + // Extract JSON from output (agent might include extra text) + let issues = []; + try { + // Try to find JSON array in the output + const jsonMatch = rawOutput.match(/\[[\s\S]*\]/); + if (jsonMatch) { + issues = JSON.parse(jsonMatch[0]); + } + } catch (e) { + console.log('Failed to parse JSON:', e.message); + console.log('Agent output was not valid JSON - skipping issue creation'); + return; + } + + if (!Array.isArray(issues) || issues.length === 0) { + console.log('✅ No issues found by scanner'); + return; + } + + console.log(`Found ${issues.length} issue(s)`); + + // Get existing open issues to avoid duplicates + const { data: existingIssues } = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'automated', + per_page: 100 + }); + + const existingTitles = existingIssues.map(i => i.title.toLowerCase()); + + // Limit to 2 issues per run + const issuesToCreate = issues.slice(0, 2); + let created = 0; + + for (const issue of issuesToCreate) { + // Skip if similar issue already exists + const titleLower = issue.title.toLowerCase(); + if (existingTitles.some(t => t.includes(titleLower) || titleLower.includes(t))) { + console.log(`⏭️ Skipping duplicate: ${issue.title}`); + continue; + } + + const severityEmoji = { + critical: '🔴', + high: '🟠', + medium: '🟡' + }[issue.severity] || '⚪'; + + const categoryLabel = { + security: 'kind/bug', // security issues are bugs + bug: 'kind/bug', + documentation: 'kind/documentation' + }[issue.category] || 'kind/bug'; + + const body = `## ${severityEmoji} ${issue.severity.toUpperCase()} - ${issue.category} + +**File:** \`${issue.file}\`${issue.line ? ` (line ${issue.line})` : ''} + +### Code + +\`\`\`go +${issue.code} +\`\`\` + +### Problem + +${issue.problem} + +### Suggested Fix + +${issue.suggestion} + +--- + +*Found by [nightly codebase scan](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})* +`; + + if (dryRun) { + console.log(`\n📋 Would create issue:\n Title: ${issue.title}\n Labels: automated, ${categoryLabel}`); + console.log(` Body preview:\n${body.substring(0, 200)}...`); + } else { + try { + const { data: newIssue } = await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: `[${issue.category}] ${issue.title}`, + body: body, + labels: ['automated', categoryLabel] + }); + console.log(`✅ Created issue #${newIssue.number}: ${newIssue.html_url}`); + created++; + } catch (e) { + console.log(`❌ Failed to create issue: ${e.message}`); + } + } + } + + if (!dryRun) { + console.log(`\n📊 Summary: Created ${created} issue(s)`); + } From d8479b8adf5693f2be9ca0cdeb7594db0e0a192f Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 19:03:58 -0500 Subject: [PATCH 02/10] feat: add multi-agent architecture with persistent memory - Root agent (claude-sonnet) orchestrates scan across sub-agents - Security sub-agent (claude-opus) for vulnerability detection - Bugs sub-agent (claude-sonnet) for logic errors and resource leaks - Documentation sub-agent (claude-haiku) for doc gap detection - Add GitHub Actions cache for persistent scanner memory - Memory stores skip patterns, context, and feedback across runs - Each sub-agent has strict grounding rules to prevent hallucinations --- .github/agents/nightly-scanner.yaml | 258 +++++++++++++++++++++++----- .github/workflows/nightly-scan.yml | 21 +++ 2 files changed, 233 insertions(+), 46 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index 7cfbcad96..4def3dd01 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -1,69 +1,68 @@ version: "2" models: + # Orchestrator - needs good reasoning for delegation claude-sonnet: provider: anthropic model: claude-sonnet-4-5 + max_tokens: 4096 + temperature: 0.1 + + # Security analysis - use stronger model for catching subtle vulnerabilities + claude-opus: + provider: anthropic + model: claude-opus-4 max_tokens: 8192 + temperature: 0.1 + + # Documentation - faster model is sufficient + claude-haiku: + provider: anthropic + model: claude-haiku-3-5 + max_tokens: 4096 temperature: 0.2 agents: root: model: claude-sonnet - description: Scans codebase nightly and reports issues in structured format + description: Orchestrates nightly codebase scan across specialized sub-agents + sub_agents: + - security + - bugs + - documentation instruction: | - You are a senior engineer performing a nightly codebase audit. Your job is to find - real issues and report them clearly—NOT to fix them. - - ## Your task - - Scan this codebase and identify 1-2 of the most important issues. Quality over quantity. + You are the orchestrator for a nightly codebase scan. Your job is to delegate + analysis to specialized sub-agents and compile their findings. - ## What to look for (in priority order) + ## First: Load Memory - 1. **Security vulnerabilities** - - SQL/command injection, path traversal - - Hardcoded secrets or credentials - - Insecure TLS, weak crypto - - Missing input validation + Read `.github/scanner-memory.json` if it exists. This contains: + - Files/patterns to skip (known false positives) + - Codebase-specific context learned from past scans + - Feedback from humans on previous issues - 2. **Bugs that cause runtime errors** - - Nil pointer dereferences - - Ignored errors from critical operations - - Resource leaks (unclosed files, connections) - - Race conditions, deadlocks - - 3. **Documentation gaps** (only if no security/bug issues found) - - Public APIs without documentation - - Complex functions without explanations - - Missing README sections - - ## What to IGNORE - - - Style, formatting, linting issues - - Minor code smells that don't cause bugs - - Test coverage (unless tests are actually broken) - - Naming conventions - - ## ⛔ CRITICAL GROUNDING RULES ⛔ + ## Your workflow - - **ONLY report issues in files you have actually read with read_file** - - **EVERY file path must be verified to exist** - - **EVERY code snippet must be quoted EXACTLY from the file** - - **EVERY line number must be accurate** - - **If you find nothing significant, say "No issues found" - do NOT invent issues** + 1. Read memory file to understand what to skip + 2. Use `directory_tree` to understand the codebase structure + 3. Delegate to sub-agents in order: + - `security` - for security vulnerabilities (HIGHEST PRIORITY) + - `bugs` - for logic errors, resource leaks, race conditions + - `documentation` - for missing docs (ONLY if no security/bug issues) + 4. Collect findings from each sub-agent + 5. Filter out any issues that match patterns in memory's "skip" list + 6. Select the top 1-2 most important issues + 7. Update memory with any new learnings - ## Your workflow + ## Memory updates - 1. Use `directory_tree` to understand the codebase structure - 2. Identify key source files (focus on `cmd/`, `internal/`, `pkg/`) - 3. Use `read_file` to examine code - 4. For each potential issue, verify by reading surrounding context - 5. Select the 1-2 most important, verified issues + If you discover patterns that should be remembered (e.g., "this codebase uses + custom error handling that looks like ignored errors but isn't"), add them + to memory by writing to `.github/scanner-memory.json`. ## Output format - Output ONLY a JSON array (no markdown, no explanation). Each issue should have: + Output ONLY a JSON array with the final 1-2 issues: ```json [ @@ -80,11 +79,126 @@ agents: ] ``` - If no issues found, output: + If no issues found, output: `[]` + + toolsets: + - type: filesystem + + security: + model: claude-opus + description: Deep security vulnerability analysis + instruction: | + You are a security expert scanning for vulnerabilities. Be thorough but precise. + + ## ⛔ CRITICAL GROUNDING RULES ⛔ + + - **ONLY report issues in files you have actually read** + - **EVERY file path must be verified with read_file** + - **EVERY code snippet must be EXACT quotes from files** + - **If unsure, don't report it** + + ## What to look for + + ### Critical + - SQL injection (string concatenation in queries) + - Command injection (exec with user input) + - Path traversal (user input in file paths) + - Hardcoded secrets, API keys, credentials + - Authentication/authorization bypass + + ### High + - Insecure TLS (InsecureSkipVerify: true) + - Weak cryptography (MD5, SHA1 for security) + - Missing input validation on external data + - SSRF vulnerabilities + - Unsafe deserialization + + ### Medium + - Verbose error messages exposing internals + - Debug/dev settings in production code + - Missing rate limiting on sensitive endpoints + + ## Output format + + Return a JSON array of findings: + ```json + [ + { + "title": "Brief title", + "severity": "critical|high|medium", + "file": "path/to/file.go", + "line": 123, + "code": "exact code", + "problem": "explanation", + "suggestion": "fix" + } + ] + ``` + + Return `[]` if no security issues found. + + toolsets: + - type: filesystem + tools: + - read_file + - read_multiple_files + - list_directory + - directory_tree + + bugs: + model: claude-sonnet + description: Logic errors, resource leaks, and concurrency bugs + instruction: | + You are analyzing code for bugs that cause runtime errors or incorrect behavior. + + ## ⛔ CRITICAL GROUNDING RULES ⛔ + + - **ONLY report issues in files you have actually read** + - **EVERY file path must be verified with read_file** + - **EVERY code snippet must be EXACT quotes from files** + - **If unsure, don't report it** + + ## What to look for + + ### High + - Nil pointer dereference (accessing pointer before nil check) + - Ignored errors from operations that can fail + - Resource leaks (files, connections, channels never closed) + - Race conditions (shared state without synchronization) + - Deadlocks (incorrect lock ordering) + + ### Medium + - Unreachable code + - Integer overflow in size calculations + - Slice bounds errors + - Goroutine leaks + + ## What to IGNORE + + - Style issues, naming conventions + - Defensive nil checks (these are good!) + - Errors that are logged but not returned (often intentional) + - Test files (unless tests are broken) + + ## Output format + + Return a JSON array of findings: ```json - [] + [ + { + "title": "Brief title", + "severity": "high|medium", + "file": "path/to/file.go", + "line": 123, + "code": "exact code", + "problem": "explanation", + "suggestion": "fix" + } + ] ``` + Return `[]` if no bugs found. + toolsets: - type: filesystem tools: @@ -92,3 +206,55 @@ agents: - read_multiple_files - list_directory - directory_tree + + documentation: + model: claude-haiku + description: Documentation gaps and improvements + instruction: | + You analyze code for documentation issues. Only report significant gaps. + + ## ⛔ GROUNDING RULES ⛔ + + - **ONLY report issues in files you have actually read** + - **Focus on public APIs and exported functions** + + ## What to look for + + - Exported functions/types with no documentation + - Complex algorithms without explanations + - Missing package-level documentation + - Outdated comments that don't match code + - Missing README sections for key features + + ## What to IGNORE + + - Internal/private functions + - Simple getter/setter methods + - Test files + - Generated code + + ## Output format + + Return a JSON array of findings: + ```json + [ + { + "title": "Brief title", + "severity": "medium", + "file": "path/to/file.go", + "line": 123, + "code": "function signature or relevant code", + "problem": "what's missing", + "suggestion": "what to document" + } + ] + ``` + + Return `[]` if no documentation issues found. + + toolsets: + - type: filesystem + tools: + - read_file + - list_directory + - directory_tree diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml index 8f4e403f0..ba124f9b6 100644 --- a/.github/workflows/nightly-scan.yml +++ b/.github/workflows/nightly-scan.yml @@ -29,6 +29,20 @@ jobs: with: fetch-depth: 1 + - name: Restore scanner memory + uses: actions/cache/restore@v4 + with: + path: .github/scanner-memory.json + key: scanner-memory-${{ github.repository }}-${{ github.run_id }} + restore-keys: | + scanner-memory-${{ github.repository }}- + + - name: Initialize memory file if missing + run: | + if [ ! -f .github/scanner-memory.json ]; then + echo '{"skip_patterns": [], "context": [], "feedback": []}' > .github/scanner-memory.json + fi + - name: Run nightly scan id: scan uses: docker/cagent-action@latest @@ -157,3 +171,10 @@ ${issue.suggestion} if (!dryRun) { console.log(`\n📊 Summary: Created ${created} issue(s)`); } + + - name: Save scanner memory + uses: actions/cache/save@v4 + if: always() + with: + path: .github/scanner-memory.json + key: scanner-memory-${{ github.repository }}-${{ github.run_id }} From 39d662fca65c47ffd56231f967f74ac24f260d7f Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 19:06:27 -0500 Subject: [PATCH 03/10] fix: improve scanner resilience and pin cache action Agent improvements: - Documentation agent reads all markdown files before analysis - All sub-agents explicitly accept empty results as valid outcome - Added read_multiple_files tool to documentation agent Workflow improvements: - Pin actions/cache to v4.2.0 SHA for security - Use static cache key (matches cagent-action pattern) --- .github/agents/nightly-scanner.yaml | 33 ++++++++++++++++++++++++++--- .github/workflows/nightly-scan.yml | 10 ++++----- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index 4def3dd01..ac8854e41 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -97,6 +97,11 @@ agents: - **EVERY code snippet must be EXACT quotes from files** - **If unsure, don't report it** + ## Important: No findings is a valid outcome + + If the codebase has no security issues, that's great! Return an empty array `[]`. + Do NOT manufacture issues just to have something to report. Quality over quantity. + ## What to look for ### Critical @@ -135,7 +140,7 @@ agents: ] ``` - Return `[]` if no security issues found. + Return `[]` if no security issues found. This is perfectly acceptable! toolsets: - type: filesystem @@ -158,6 +163,11 @@ agents: - **EVERY code snippet must be EXACT quotes from files** - **If unsure, don't report it** + ## Important: No findings is a valid outcome + + If the codebase has no bugs, that's great! Return an empty array `[]`. + Do NOT manufacture issues just to have something to report. Quality over quantity. + ## What to look for ### High @@ -197,7 +207,7 @@ agents: ] ``` - Return `[]` if no bugs found. + Return `[]` if no bugs found. This is perfectly acceptable! toolsets: - type: filesystem @@ -213,11 +223,27 @@ agents: instruction: | You analyze code for documentation issues. Only report significant gaps. + ## First: Read existing documentation + + Before analyzing anything, read ALL existing markdown files in the repository: + 1. Use `directory_tree` to find all `.md` files + 2. Read each markdown file (README.md, CONTRIBUTING.md, docs/*.md, etc.) + 3. Understand what documentation already exists + + This context is essential - you need to know what's already documented before + suggesting what's missing. + ## ⛔ GROUNDING RULES ⛔ - **ONLY report issues in files you have actually read** - **Focus on public APIs and exported functions** + ## Important: No findings is a valid outcome + + If the documentation is already good, that's great! Return an empty array `[]`. + Do NOT manufacture issues just to have something to report. Quality over quantity. + Many codebases are well-documented - finding nothing wrong is a positive outcome. + ## What to look for - Exported functions/types with no documentation @@ -250,11 +276,12 @@ agents: ] ``` - Return `[]` if no documentation issues found. + Return `[]` if no documentation issues found. This is perfectly acceptable! toolsets: - type: filesystem tools: - read_file + - read_multiple_files - list_directory - directory_tree diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml index ba124f9b6..82637116e 100644 --- a/.github/workflows/nightly-scan.yml +++ b/.github/workflows/nightly-scan.yml @@ -30,12 +30,12 @@ jobs: fetch-depth: 1 - name: Restore scanner memory - uses: actions/cache/restore@v4 + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 with: path: .github/scanner-memory.json - key: scanner-memory-${{ github.repository }}-${{ github.run_id }} + key: scanner-memory-${{ github.repository }} restore-keys: | - scanner-memory-${{ github.repository }}- + scanner-memory-${{ github.repository }} - name: Initialize memory file if missing run: | @@ -173,8 +173,8 @@ ${issue.suggestion} } - name: Save scanner memory - uses: actions/cache/save@v4 + uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 if: always() with: path: .github/scanner-memory.json - key: scanner-memory-${{ github.repository }}-${{ github.run_id }} + key: scanner-memory-${{ github.repository }} From d7ba70478286fbeab900c25ccce5e088d1478c47 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 19:11:39 -0500 Subject: [PATCH 04/10] refactor: use cagent's built-in memory system - Replace custom JSON memory with cagent's SQLite memory toolset - Agent uses get_memories/add_memory tools instead of file I/O - Memory path resolves to .github/agents/scanner-memory.db - Removes manual JSON initialization step --- .github/agents/nightly-scanner.yaml | 14 ++++++++------ .github/workflows/nightly-scan.yml | 10 ++-------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index ac8854e41..bf0563f94 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -36,29 +36,29 @@ agents: ## First: Load Memory - Read `.github/scanner-memory.json` if it exists. This contains: + Use `get_memories` to check for any learned patterns from previous scans. This includes: - Files/patterns to skip (known false positives) - Codebase-specific context learned from past scans - Feedback from humans on previous issues ## Your workflow - 1. Read memory file to understand what to skip + 1. Call `get_memories` to understand what to skip from previous runs 2. Use `directory_tree` to understand the codebase structure 3. Delegate to sub-agents in order: - `security` - for security vulnerabilities (HIGHEST PRIORITY) - `bugs` - for logic errors, resource leaks, race conditions - `documentation` - for missing docs (ONLY if no security/bug issues) 4. Collect findings from each sub-agent - 5. Filter out any issues that match patterns in memory's "skip" list + 5. Filter out any issues that match patterns from memory 6. Select the top 1-2 most important issues - 7. Update memory with any new learnings + 7. Use `add_memory` to store any new learnings ## Memory updates If you discover patterns that should be remembered (e.g., "this codebase uses - custom error handling that looks like ignored errors but isn't"), add them - to memory by writing to `.github/scanner-memory.json`. + custom error handling that looks like ignored errors but isn't"), use + `add_memory` to store them for future runs. ## Output format @@ -83,6 +83,8 @@ agents: toolsets: - type: filesystem + - type: memory + path: scanner-memory.db security: model: claude-opus diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml index 82637116e..4e4618a64 100644 --- a/.github/workflows/nightly-scan.yml +++ b/.github/workflows/nightly-scan.yml @@ -32,17 +32,11 @@ jobs: - name: Restore scanner memory uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 with: - path: .github/scanner-memory.json + path: ${{ github.workspace }}/.github/agents/scanner-memory.db key: scanner-memory-${{ github.repository }} restore-keys: | scanner-memory-${{ github.repository }} - - name: Initialize memory file if missing - run: | - if [ ! -f .github/scanner-memory.json ]; then - echo '{"skip_patterns": [], "context": [], "feedback": []}' > .github/scanner-memory.json - fi - - name: Run nightly scan id: scan uses: docker/cagent-action@latest @@ -176,5 +170,5 @@ ${issue.suggestion} uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 if: always() with: - path: .github/scanner-memory.json + path: ${{ github.workspace }}/.github/agents/scanner-memory.db key: scanner-memory-${{ github.repository }} From fcc7aa81ca5fd1ea27f92acde64648e1048a1597 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 19:13:51 -0500 Subject: [PATCH 05/10] refactor: use text format for sub-agent output Sub-agents now return findings in simple text format: FILE: path/to/file.go LINE: 123 SEVERITY: high ... Benefits over JSON: - More natural for LLM output - Less prone to formatting errors - Matches cagent-action PR review pattern Root agent still outputs JSON for workflow parsing. --- .github/agents/nightly-scanner.yaml | 99 ++++++++++++++++------------- 1 file changed, 55 insertions(+), 44 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index bf0563f94..6e946f7c4 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -49,10 +49,27 @@ agents: - `security` - for security vulnerabilities (HIGHEST PRIORITY) - `bugs` - for logic errors, resource leaks, race conditions - `documentation` - for missing docs (ONLY if no security/bug issues) - 4. Collect findings from each sub-agent + 4. Collect findings from each sub-agent (they return text in FILE/LINE/SEVERITY format) 5. Filter out any issues that match patterns from memory 6. Select the top 1-2 most important issues - 7. Use `add_memory` to store any new learnings + 7. Convert selected findings to JSON output format + 8. Use `add_memory` to store any new learnings + + ## Sub-agent output format + + Sub-agents return findings in this text format (or `NO_ISSUES` if none): + ``` + FILE: path/to/file.go + LINE: 123 + SEVERITY: high + TITLE: Brief description + CODE: exact code + PROBLEM: explanation + SUGGESTION: fix + --- + ``` + + Parse these and convert to JSON for your final output. ## Memory updates @@ -127,22 +144,20 @@ agents: ## Output format - Return a JSON array of findings: - ```json - [ - { - "title": "Brief title", - "severity": "critical|high|medium", - "file": "path/to/file.go", - "line": 123, - "code": "exact code", - "problem": "explanation", - "suggestion": "fix" - } - ] + For each finding, output in this EXACT format: + + ``` + FILE: path/to/file.go + LINE: 123 + SEVERITY: critical|high|medium + TITLE: Brief description of the vulnerability + CODE: exact code snippet + PROBLEM: Why this is a security issue + SUGGESTION: How to fix it + --- ``` - Return `[]` if no security issues found. This is perfectly acceptable! + If no security issues found, output: `NO_ISSUES` toolsets: - type: filesystem @@ -194,22 +209,20 @@ agents: ## Output format - Return a JSON array of findings: - ```json - [ - { - "title": "Brief title", - "severity": "high|medium", - "file": "path/to/file.go", - "line": 123, - "code": "exact code", - "problem": "explanation", - "suggestion": "fix" - } - ] + For each finding, output in this EXACT format: + + ``` + FILE: path/to/file.go + LINE: 123 + SEVERITY: high|medium + TITLE: Brief description of the bug + CODE: exact code snippet + PROBLEM: Why this is a bug + SUGGESTION: How to fix it + --- ``` - Return `[]` if no bugs found. This is perfectly acceptable! + If no bugs found, output: `NO_ISSUES` toolsets: - type: filesystem @@ -263,22 +276,20 @@ agents: ## Output format - Return a JSON array of findings: - ```json - [ - { - "title": "Brief title", - "severity": "medium", - "file": "path/to/file.go", - "line": 123, - "code": "function signature or relevant code", - "problem": "what's missing", - "suggestion": "what to document" - } - ] + For each finding, output in this EXACT format: + + ``` + FILE: path/to/file.go + LINE: 123 + SEVERITY: medium + TITLE: Brief description of the doc gap + CODE: function signature or relevant code + PROBLEM: What documentation is missing + SUGGESTION: What should be documented + --- ``` - Return `[]` if no documentation issues found. This is perfectly acceptable! + If no documentation issues found, output: `NO_ISSUES` toolsets: - type: filesystem From e3b4610c5d54a7d491848857c60a0f2768c12fb9 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 19:18:55 -0500 Subject: [PATCH 06/10] feat: add reporter sub-agent for issue creation Move issue creation from workflow to agent: - New `reporter` sub-agent uses `gh` CLI to create issues - Checks for duplicates before creating - Selects appropriate labels based on category - Workflow reduced from 175 lines to 55 lines Dry-run mode now passed as prompt to agent. --- .github/agents/nightly-scanner.yaml | 136 ++++++++++++++++++++++++---- .github/workflows/nightly-scan.yml | 125 +------------------------ 2 files changed, 119 insertions(+), 142 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index 6e946f7c4..9491701bd 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -30,6 +30,7 @@ agents: - security - bugs - documentation + - reporter instruction: | You are the orchestrator for a nightly codebase scan. Your job is to delegate analysis to specialized sub-agents and compile their findings. @@ -52,12 +53,12 @@ agents: 4. Collect findings from each sub-agent (they return text in FILE/LINE/SEVERITY format) 5. Filter out any issues that match patterns from memory 6. Select the top 1-2 most important issues - 7. Convert selected findings to JSON output format + 7. Delegate to `reporter` with the selected findings to create GitHub issues 8. Use `add_memory` to store any new learnings ## Sub-agent output format - Sub-agents return findings in this text format (or `NO_ISSUES` if none): + Analysis sub-agents return findings in this text format (or `NO_ISSUES` if none): ``` FILE: path/to/file.go LINE: 123 @@ -69,8 +70,6 @@ agents: --- ``` - Parse these and convert to JSON for your final output. - ## Memory updates If you discover patterns that should be remembered (e.g., "this codebase uses @@ -79,24 +78,19 @@ agents: ## Output format - Output ONLY a JSON array with the final 1-2 issues: - - ```json - [ - { - "title": "Brief issue title (50 chars max)", - "category": "security" | "bug" | "documentation", - "severity": "critical" | "high" | "medium", - "file": "path/to/file.go", - "line": 123, - "code": "exact code snippet from file", - "problem": "Clear explanation of why this is an issue", - "suggestion": "How to fix it" - } - ] + After the reporter creates issues, output a brief summary: + ``` + SCAN COMPLETE + Issues created: N + - #123: Issue title + - #456: Issue title ``` - If no issues found, output: `[]` + If no issues found or created, output: + ``` + SCAN COMPLETE + No issues found. + ``` toolsets: - type: filesystem @@ -298,3 +292,105 @@ agents: - read_multiple_files - list_directory - directory_tree + + reporter: + model: claude-haiku + description: Creates GitHub issues for findings + instruction: | + You create GitHub issues for code quality findings using the `gh` CLI. + + ## Input + + You receive findings from the orchestrator in this format: + ``` + CATEGORY: security|bug|documentation + FILE: path/to/file.go + LINE: 123 + SEVERITY: critical|high|medium + TITLE: Brief description + CODE: exact code snippet + PROBLEM: explanation + SUGGESTION: fix + --- + ``` + + ## Workflow + + 1. First, check for existing similar issues: + ```bash + gh issue list --label automated --state open --limit 100 + ``` + + 2. For each finding, skip if a similar issue already exists (matching title or file) + + 3. Create new issues with `gh issue create`: + ```bash + gh issue create \ + --title "[category] Title here" \ + --label "automated" \ + --label "kind/bug" \ + --body "issue body here" + ``` + + ## Issue format + + Title: `[security] Brief title` or `[bug] Brief title` or `[documentation] Brief title` + + Labels (select based on category): + - Always add `automated` + - `security` category → add `kind/bug` (security issues are bugs) + - `bug` category → add `kind/bug` + - `documentation` category → add `kind/documentation` + + Body template: + ```markdown + ## SEVERITY_EMOJI SEVERITY - CATEGORY + + **File:** `path/to/file.go` (line 123) + + ### Code + + ```go + exact code snippet + ``` + + ### Problem + + Explanation of the issue + + ### Suggested Fix + + How to fix it + + --- + *Found by nightly codebase scan* + ``` + + Severity emojis: 🔴 critical, 🟠 high, 🟡 medium + + ## Output + + Return what you created: + ``` + CREATED: #123 [security] Issue title + CREATED: #456 [bug] Issue title + SKIPPED: Similar issue already exists for path/to/file.go + ``` + + Or if nothing to create: + ``` + NO_ISSUES_TO_CREATE + ``` + + ## Important + + - Maximum 2 issues per run + - Skip duplicates (check by title similarity and file path) + - Use exact code snippets from the findings + + toolsets: + - type: shell + +permissions: + allow: + - shell:cmd=gh * diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml index 4e4618a64..3496d761a 100644 --- a/.github/workflows/nightly-scan.yml +++ b/.github/workflows/nightly-scan.yml @@ -38,134 +38,15 @@ jobs: scanner-memory-${{ github.repository }} - name: Run nightly scan - id: scan uses: docker/cagent-action@latest + env: + GH_TOKEN: ${{ github.token }} with: agent: ${{ github.workspace }}/.github/agents/nightly-scanner.yaml + prompt: ${{ inputs.dry-run && 'DRY RUN MODE: Do not create any issues. Just report what you would create.' || '' }} anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} timeout: 600 - - name: Parse and create issues - uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7 - env: - DRY_RUN: ${{ inputs.dry-run || false }} - with: - script: | - const fs = require('fs'); - const outputFile = '${{ steps.scan.outputs.output-file }}'; - const dryRun = process.env.DRY_RUN === 'true'; - - if (!fs.existsSync(outputFile)) { - console.log('No output file found'); - return; - } - - const rawOutput = fs.readFileSync(outputFile, 'utf8'); - console.log('Raw agent output:', rawOutput); - - // Extract JSON from output (agent might include extra text) - let issues = []; - try { - // Try to find JSON array in the output - const jsonMatch = rawOutput.match(/\[[\s\S]*\]/); - if (jsonMatch) { - issues = JSON.parse(jsonMatch[0]); - } - } catch (e) { - console.log('Failed to parse JSON:', e.message); - console.log('Agent output was not valid JSON - skipping issue creation'); - return; - } - - if (!Array.isArray(issues) || issues.length === 0) { - console.log('✅ No issues found by scanner'); - return; - } - - console.log(`Found ${issues.length} issue(s)`); - - // Get existing open issues to avoid duplicates - const { data: existingIssues } = await github.rest.issues.listForRepo({ - owner: context.repo.owner, - repo: context.repo.repo, - state: 'open', - labels: 'automated', - per_page: 100 - }); - - const existingTitles = existingIssues.map(i => i.title.toLowerCase()); - - // Limit to 2 issues per run - const issuesToCreate = issues.slice(0, 2); - let created = 0; - - for (const issue of issuesToCreate) { - // Skip if similar issue already exists - const titleLower = issue.title.toLowerCase(); - if (existingTitles.some(t => t.includes(titleLower) || titleLower.includes(t))) { - console.log(`⏭️ Skipping duplicate: ${issue.title}`); - continue; - } - - const severityEmoji = { - critical: '🔴', - high: '🟠', - medium: '🟡' - }[issue.severity] || '⚪'; - - const categoryLabel = { - security: 'kind/bug', // security issues are bugs - bug: 'kind/bug', - documentation: 'kind/documentation' - }[issue.category] || 'kind/bug'; - - const body = `## ${severityEmoji} ${issue.severity.toUpperCase()} - ${issue.category} - -**File:** \`${issue.file}\`${issue.line ? ` (line ${issue.line})` : ''} - -### Code - -\`\`\`go -${issue.code} -\`\`\` - -### Problem - -${issue.problem} - -### Suggested Fix - -${issue.suggestion} - ---- - -*Found by [nightly codebase scan](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})* -`; - - if (dryRun) { - console.log(`\n📋 Would create issue:\n Title: ${issue.title}\n Labels: automated, ${categoryLabel}`); - console.log(` Body preview:\n${body.substring(0, 200)}...`); - } else { - try { - const { data: newIssue } = await github.rest.issues.create({ - owner: context.repo.owner, - repo: context.repo.repo, - title: `[${issue.category}] ${issue.title}`, - body: body, - labels: ['automated', categoryLabel] - }); - console.log(`✅ Created issue #${newIssue.number}: ${newIssue.html_url}`); - created++; - } catch (e) { - console.log(`❌ Failed to create issue: ${e.message}`); - } - } - } - - if (!dryRun) { - console.log(`\n📊 Summary: Created ${created} issue(s)`); - } - - name: Save scanner memory uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 if: always() From 957dbce49e5fcafc4fe7ba70c9589d2c3451d2e1 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 19:25:26 -0500 Subject: [PATCH 07/10] feat: use multi-provider models for specialized tasks Model assignments: - Security: openai/o3-mini (reasoning model for subtle vulnerabilities) - Bugs: google/gemini-2.5-flash (fast, good at Go code analysis) - Documentation: anthropic/claude-haiku (sufficient for simpler task) - Orchestrator: anthropic/claude-sonnet (coordination) - Reporter: anthropic/claude-haiku (formatting + gh commands) Workflow now passes all three API keys: - ANTHROPIC_API_KEY - OPENAI_API_KEY - GOOGLE_API_KEY (mapped from GEMINI_API_KEY secret) --- .github/agents/nightly-scanner.yaml | 21 +++++++++++++-------- .github/workflows/nightly-scan.yml | 2 ++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index 9491701bd..7d4cee04f 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -8,14 +8,19 @@ models: max_tokens: 4096 temperature: 0.1 - # Security analysis - use stronger model for catching subtle vulnerabilities - claude-opus: - provider: anthropic - model: claude-opus-4 + # Security analysis - reasoning model excels at finding subtle vulnerabilities + openai-o3: + provider: openai + model: o3-mini max_tokens: 8192 - temperature: 0.1 - # Documentation - faster model is sufficient + # Bug detection - fast and surprisingly good at Go code analysis + gemini-flash: + provider: google + model: gemini-2.5-flash + max_tokens: 8192 + + # Documentation & reporting - faster model is sufficient claude-haiku: provider: anthropic model: claude-haiku-3-5 @@ -98,7 +103,7 @@ agents: path: scanner-memory.db security: - model: claude-opus + model: openai-o3 description: Deep security vulnerability analysis instruction: | You are a security expert scanning for vulnerabilities. Be thorough but precise. @@ -162,7 +167,7 @@ agents: - directory_tree bugs: - model: claude-sonnet + model: gemini-flash description: Logic errors, resource leaks, and concurrency bugs instruction: | You are analyzing code for bugs that cause runtime errors or incorrect behavior. diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml index 3496d761a..14efd6473 100644 --- a/.github/workflows/nightly-scan.yml +++ b/.github/workflows/nightly-scan.yml @@ -41,10 +41,12 @@ jobs: uses: docker/cagent-action@latest env: GH_TOKEN: ${{ github.token }} + GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }} with: agent: ${{ github.workspace }}/.github/agents/nightly-scanner.yaml prompt: ${{ inputs.dry-run && 'DRY RUN MODE: Do not create any issues. Just report what you would create.' || '' }} anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} + openai-api-key: ${{ secrets.OPENAI_API_KEY }} timeout: 600 - name: Save scanner memory From b1a3ff778a2de93deca0ca19e6e40075f772ff3e Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 19:41:21 -0500 Subject: [PATCH 08/10] fix: address PR review feedback Fixes from code review: 1. Restrict shell permissions - Changed `gh *` to `gh issue list *` and `gh issue create *` - Principle of least privilege 2. CATEGORY field mismatch - Root agent now adds CATEGORY when forwarding to reporter - Added explicit "Forwarding to reporter" section with example 3. Inconsistent array/NO_ISSUES terminology - Changed "Return an empty array `[]`" to "Output `NO_ISSUES`" - Consistent across all three analysis agents 4. Documentation trigger clarity - Changed to "ONLY run if BOTH security AND bugs returned `NO_ISSUES`" - Unambiguous trigger condition 5. Better duplicate detection - Changed from downloading 100 issues to `gh issue list --search` - Searches by file path in issue body 6. Sub-agent failure handling - Added explicit error handling strategy - Log errors and continue with other agents - Report partial results if some agents fail - Added FAILED status to reporter output --- .github/agents/nightly-scanner.yaml | 67 +++++++++++++++++++++-------- .github/workflows/nightly-scan.yml | 2 +- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index 7d4cee04f..e094f74bc 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -51,15 +51,29 @@ agents: 1. Call `get_memories` to understand what to skip from previous runs 2. Use `directory_tree` to understand the codebase structure - 3. Delegate to sub-agents in order: + 3. Delegate to sub-agents in order (with error handling): - `security` - for security vulnerabilities (HIGHEST PRIORITY) + - If fails: log error, continue to bugs - `bugs` - for logic errors, resource leaks, race conditions - - `documentation` - for missing docs (ONLY if no security/bug issues) - 4. Collect findings from each sub-agent (they return text in FILE/LINE/SEVERITY format) - 5. Filter out any issues that match patterns from memory - 6. Select the top 1-2 most important issues - 7. Delegate to `reporter` with the selected findings to create GitHub issues - 8. Use `add_memory` to store any new learnings + - If fails: log error, continue to documentation check + - `documentation` - for missing docs + - ONLY run if BOTH security AND bugs returned `NO_ISSUES` + - If fails: log error, continue to reporting + 4. Collect findings from each sub-agent (they return text format or `NO_ISSUES`) + 5. Filter out any issues where FILE matches patterns from memory + 6. Sort by SEVERITY (critical > high > medium) and select top 1-2 issues + 7. Add CATEGORY field to each finding based on source agent: + - From security agent → `CATEGORY: security` + - From bugs agent → `CATEGORY: bug` + - From documentation agent → `CATEGORY: documentation` + 8. Delegate to `reporter` with the augmented findings + 9. Use `add_memory` to store any new learnings + + ## Error handling + + - If a sub-agent fails (timeout, API error), log the error and continue with other agents + - If ALL sub-agents fail, output: `SCAN FAILED: All agents encountered errors` + - If SOME sub-agents fail, report findings from successful ones and note failures ## Sub-agent output format @@ -75,6 +89,21 @@ agents: --- ``` + ## Forwarding to reporter + + When forwarding to reporter, ADD the CATEGORY field: + ``` + CATEGORY: security + FILE: path/to/file.go + LINE: 123 + SEVERITY: high + TITLE: Brief description + CODE: exact code + PROBLEM: explanation + SUGGESTION: fix + --- + ``` + ## Memory updates If you discover patterns that should be remembered (e.g., "this codebase uses @@ -117,7 +146,7 @@ agents: ## Important: No findings is a valid outcome - If the codebase has no security issues, that's great! Return an empty array `[]`. + If the codebase has no security issues, that's great! Output `NO_ISSUES`. Do NOT manufacture issues just to have something to report. Quality over quantity. ## What to look for @@ -181,7 +210,7 @@ agents: ## Important: No findings is a valid outcome - If the codebase has no bugs, that's great! Return an empty array `[]`. + If the codebase has no bugs, that's great! Output `NO_ISSUES`. Do NOT manufacture issues just to have something to report. Quality over quantity. ## What to look for @@ -254,7 +283,7 @@ agents: ## Important: No findings is a valid outcome - If the documentation is already good, that's great! Return an empty array `[]`. + If the documentation is already good, that's great! Output `NO_ISSUES`. Do NOT manufacture issues just to have something to report. Quality over quantity. Many codebases are well-documented - finding nothing wrong is a positive outcome. @@ -321,14 +350,13 @@ agents: ## Workflow - 1. First, check for existing similar issues: + 1. For each finding, check if a similar issue already exists by searching: ```bash - gh issue list --label automated --state open --limit 100 + gh issue list --label automated --state open --search "in:body {filepath}" ``` + If results found for the same file, SKIP (log as SKIPPED). - 2. For each finding, skip if a similar issue already exists (matching title or file) - - 3. Create new issues with `gh issue create`: + 2. If no duplicate found, create the issue: ```bash gh issue create \ --title "[category] Title here" \ @@ -337,6 +365,8 @@ agents: --body "issue body here" ``` + 3. If `gh issue create` fails, log as FAILED and continue with remaining findings. + ## Issue format Title: `[security] Brief title` or `[bug] Brief title` or `[documentation] Brief title` @@ -380,6 +410,7 @@ agents: CREATED: #123 [security] Issue title CREATED: #456 [bug] Issue title SKIPPED: Similar issue already exists for path/to/file.go + FAILED: Could not create issue - API error message ``` Or if nothing to create: @@ -390,12 +421,14 @@ agents: ## Important - Maximum 2 issues per run - - Skip duplicates (check by title similarity and file path) + - Skip duplicates (search by file path in issue body) - Use exact code snippets from the findings + - If creation fails, log FAILED and continue with remaining findings toolsets: - type: shell permissions: allow: - - shell:cmd=gh * + - shell:cmd=gh issue list * + - shell:cmd=gh issue create * diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml index 14efd6473..0108dacd6 100644 --- a/.github/workflows/nightly-scan.yml +++ b/.github/workflows/nightly-scan.yml @@ -41,12 +41,12 @@ jobs: uses: docker/cagent-action@latest env: GH_TOKEN: ${{ github.token }} - GOOGLE_API_KEY: ${{ secrets.GEMINI_API_KEY }} with: agent: ${{ github.workspace }}/.github/agents/nightly-scanner.yaml prompt: ${{ inputs.dry-run && 'DRY RUN MODE: Do not create any issues. Just report what you would create.' || '' }} anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} openai-api-key: ${{ secrets.OPENAI_API_KEY }} + google-api-key: ${{ secrets.GEMINI_API_KEY }} timeout: 600 - name: Save scanner memory From b147fd1efc4d36e570c911a6536811e5c5584707 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Mon, 2 Feb 2026 22:09:12 -0500 Subject: [PATCH 09/10] refactor: PR feedback --- .github/agents/nightly-scanner.yaml | 52 ++++++++++++++++++++--------- .github/workflows/nightly-scan.yml | 10 +++--- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index e094f74bc..f3790a82d 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -23,7 +23,7 @@ models: # Documentation & reporting - faster model is sufficient claude-haiku: provider: anthropic - model: claude-haiku-3-5 + model: claude-haiku-4-5 max_tokens: 4096 temperature: 0.2 @@ -129,7 +129,7 @@ agents: toolsets: - type: filesystem - type: memory - path: scanner-memory.db + path: .github/agents/scanner-memory.db security: model: openai-o3 @@ -350,19 +350,30 @@ agents: ## Workflow - 1. For each finding, check if a similar issue already exists by searching: + **ENFORCE: Process at most 2 findings. If you receive more, only process the first 2.** + + For each finding (up to 2 maximum): + + 1. Check if a similar issue already exists by searching for the same file AND line: ```bash - gh issue list --label automated --state open --search "in:body {filepath}" + # Use environment variables to safely pass file paths (avoids shell injection) + FILE_PATH="path/to/file.go" + LINE_NUM="123" + gh issue list --label automated --state open --search "in:body \"$FILE_PATH\" \"line $LINE_NUM\"" ``` - If results found for the same file, SKIP (log as SKIPPED). + If results found for the same file and line, SKIP (log as SKIPPED). - 2. If no duplicate found, create the issue: + 2. If no duplicate found, create the issue using a heredoc to handle special characters: ```bash - gh issue create \ - --title "[category] Title here" \ + # Store title in variable first to handle special characters safely + ISSUE_TITLE="[category] Title here" + cat << 'EOF' | gh issue create \ + --title "$ISSUE_TITLE" \ --label "automated" \ --label "kind/bug" \ - --body "issue body here" + --body-file - + Issue body content here... + EOF ``` 3. If `gh issue create` fails, log as FAILED and continue with remaining findings. @@ -377,9 +388,16 @@ agents: - `bug` category → add `kind/bug` - `documentation` category → add `kind/documentation` - Body template: - ```markdown - ## SEVERITY_EMOJI SEVERITY - CATEGORY + Body template (use heredoc to handle backticks and special characters): + ```bash + # Store values in variables to safely handle special characters + ISSUE_TITLE="[security] SQL injection in user query" + cat << 'EOF' | gh issue create \ + --title "$ISSUE_TITLE" \ + --label "automated" \ + --label "kind/bug" \ + --body-file - + ## 🔴 critical - security **File:** `path/to/file.go` (line 123) @@ -399,6 +417,7 @@ agents: --- *Found by nightly codebase scan* + EOF ``` Severity emojis: 🔴 critical, 🟠 high, 🟡 medium @@ -420,15 +439,16 @@ agents: ## Important - - Maximum 2 issues per run - - Skip duplicates (search by file path in issue body) + - **STRICT LIMIT: Maximum 2 issues per run** - Stop after creating 2 issues, even if more findings exist + - Skip duplicates (search by file path AND line number in issue body) - Use exact code snippets from the findings - If creation fails, log FAILED and continue with remaining findings + - Always quote file paths and line numbers in search queries to handle special characters toolsets: - type: shell permissions: allow: - - shell:cmd=gh issue list * - - shell:cmd=gh issue create * + - shell:cmd=gh issue list --* + - shell:cmd=gh issue create --* diff --git a/.github/workflows/nightly-scan.yml b/.github/workflows/nightly-scan.yml index 0108dacd6..72622e121 100644 --- a/.github/workflows/nightly-scan.yml +++ b/.github/workflows/nightly-scan.yml @@ -30,12 +30,12 @@ jobs: fetch-depth: 1 - name: Restore scanner memory - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 with: path: ${{ github.workspace }}/.github/agents/scanner-memory.db - key: scanner-memory-${{ github.repository }} + key: scanner-memory-${{ github.repository }}-${{ github.run_id }} restore-keys: | - scanner-memory-${{ github.repository }} + scanner-memory-${{ github.repository }}- - name: Run nightly scan uses: docker/cagent-action@latest @@ -47,11 +47,11 @@ jobs: anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }} openai-api-key: ${{ secrets.OPENAI_API_KEY }} google-api-key: ${{ secrets.GEMINI_API_KEY }} - timeout: 600 + timeout: 1200 - name: Save scanner memory uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 if: always() with: path: ${{ github.workspace }}/.github/agents/scanner-memory.db - key: scanner-memory-${{ github.repository }} + key: scanner-memory-${{ github.repository }}-${{ github.run_id }} From 0620d327c32da61557e01109df3c1704f30073c5 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Tue, 3 Feb 2026 17:26:42 -0500 Subject: [PATCH 10/10] feat: alloy --- .github/agents/nightly-scanner.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/agents/nightly-scanner.yaml b/.github/agents/nightly-scanner.yaml index f3790a82d..ea740578c 100644 --- a/.github/agents/nightly-scanner.yaml +++ b/.github/agents/nightly-scanner.yaml @@ -132,8 +132,8 @@ agents: path: .github/agents/scanner-memory.db security: - model: openai-o3 - description: Deep security vulnerability analysis + model: openai-o3,claude-sonnet + description: Deep security vulnerability analysis (alloy: reasoning + broad knowledge) instruction: | You are a security expert scanning for vulnerabilities. Be thorough but precise. @@ -196,8 +196,8 @@ agents: - directory_tree bugs: - model: gemini-flash - description: Logic errors, resource leaks, and concurrency bugs + model: gemini-flash,claude-haiku + description: Logic errors, resource leaks, and concurrency bugs (alloy: speed + precision) instruction: | You are analyzing code for bugs that cause runtime errors or incorrect behavior.