From 2e91ca29d079dfdf6f85eee27d15bcf691688809 Mon Sep 17 00:00:00 2001 From: Sungblab <98207899+Sungblab@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:06:59 +0900 Subject: [PATCH] Add repeated mistake detection loop --- .devflow/mistakes.example.json | 12 ++ docs/contributing/commands.md | 45 +++++ docs/product-plan.md | 13 +- docs/roadmap.md | 3 +- packages/cli/README.md | 3 + packages/cli/src/index.js | 73 ++++++- packages/cli/test/cli-mvp.test.mjs | 121 ++++++++++++ packages/core/README.md | 6 +- packages/core/src/index.js | 242 ++++++++++++++++++++++++ packages/mcp/README.md | 12 ++ packages/mcp/src/index.js | 76 ++++++++ packages/mcp/test/mcp-contract.test.mjs | 50 +++++ 12 files changed, 648 insertions(+), 8 deletions(-) diff --git a/.devflow/mistakes.example.json b/.devflow/mistakes.example.json index f0917e2..664a15c 100644 --- a/.devflow/mistakes.example.json +++ b/.devflow/mistakes.example.json @@ -29,6 +29,18 @@ "symptom": "Agent used configuration or API patterns from an older major version of a framework or tool.", "correction": "Before editing framework configuration, inspect installed package versions and current primary docs; do not assume older major-version setup still applies.", "appliesTo": ["tooling-version-drift", "framework-config"] + }, + { + "id": "powershell-select-object-range-syntax", + "symptom": "Agent passed a PowerShell range expression as a string to Select-Object -Index.", + "correction": "Wrap PowerShell ranges in parentheses, for example Select-Object -Index (108..156).", + "appliesTo": ["windows-powershell", "shell-file-io"] + }, + { + "id": "playwright-module-unavailable", + "symptom": "Agent tried to run Playwright before the package or workspace runtime was available.", + "correction": "Inspect the repo package manager and installed dependencies before loading Playwright; install dependencies or use the bundled runtime path when the project expects it.", + "appliesTo": ["playwright", "browser-automation"] } ] } diff --git a/docs/contributing/commands.md b/docs/contributing/commands.md index 36a5cb6..c8bd87d 100644 --- a/docs/contributing/commands.md +++ b/docs/contributing/commands.md @@ -1004,6 +1004,51 @@ Outputs: Use this when work happened in a browser, terminal, IDE, meeting, or agent host that does not have a stable adapter yet. +## `devflow mistakes` + +Records and detects repo-local repeated agent mistake memory. This is the +mistake repair loop that feeds `devflow doctor`, start skills, and future +plugin hooks without making Devflow a persistent autonomous agent. + +Examples: + +```powershell +devflow mistakes add --repo C:\Projects\devflow-demo --id powershell-select-object-range-syntax --category shell-file-io-friction --symptom "Agent passed a PowerShell range expression as a string to Select-Object -Index." --correction "Wrap PowerShell ranges in parentheses, for example Select-Object -Index (108..156)." --applies-to windows-powershell --json +devflow mistakes list --repo C:\Projects\devflow-demo --json +devflow mistakes detect --repo C:\Projects\devflow-demo --platform windows-powershell --command 'Get-Content -LiteralPath docs\product-plan.md | Select-Object -Index 108..156' --stderr 'Cannot bind parameter ''Index''. Cannot convert value "108..156" to type "System.Int32".' --record --json +``` + +Subcommands: + +- `add`: writes a maintainer-approved correction into `.devflow/mistakes.json` +- `list`: renders the current project mistake memory +- `detect`: scans command output for known failure signatures and returns + candidate corrections + +Inputs: + +- repository path +- mistake id, category, symptom, correction, and applies-to tags for `add` +- platform, command text, stdout, stderr, and optional exit code for `detect` +- `--record` on `detect` to upsert detected candidates into + `.devflow/mistakes.json` + +Outputs: + +- `mistakes_add`, `mistakes_list`, or `mistakes_detect` JSON +- normalized mistake records with occurrence counts and bounded evidence text +- local `.devflow/mistakes.json` updates only when `add` or + `detect --record` is used + +Current detection signatures cover: + +- PowerShell `Select-Object -Index 108..156` range syntax mistakes +- Playwright package/runtime unavailable errors + +Detection records a candidate; it does not automatically edit `AGENTS.md` or +skill files. Promotion to durable instruction files should remain +confirmation-gated so project docs do not accumulate noisy one-off errors. + ## `devflow doctor` Inspects the local execution contract that agent hosts should respect before diff --git a/docs/product-plan.md b/docs/product-plan.md index 04c5ad4..b57d01c 100644 --- a/docs/product-plan.md +++ b/docs/product-plan.md @@ -113,9 +113,10 @@ beginner profile should translate those concepts into plain language. the context of my project. 9. As a beginner, I can turn vague intent into a better prompt without learning all implementation vocabulary upfront. -10. As a maintainer, I can capture repeated agent mistakes such as shell - mismatch, Windows path handling, encoding issues, unsafe commands, and - missing setup steps, then feed those lessons into future sessions. +10. As a maintainer, I can capture and detect repeated agent mistakes such as + shell mismatch, Windows path handling, encoding issues, unsafe commands, + missing setup steps, and unavailable tools, then feed those lessons into + future sessions. ## Product Shape @@ -152,6 +153,12 @@ Repeated-mistake memory should be layered: - private user memory can record maintainer-specific habits, paths, and historical failures +The repair loop should be explicit: command output or user correction becomes a +mistake candidate, confirmed candidates are stored in `.devflow/mistakes.json`, +`devflow doctor` injects the correction at session start, and repeated +candidates can be promoted to `AGENTS.md` or skill files only through a +confirmation-gated patch. + ## Supported Agents The product should treat coding tools as adapters, not as the product center. diff --git a/docs/roadmap.md b/docs/roadmap.md index 81e14c0..051e1c5 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -23,6 +23,7 @@ Build: - `devflow status` - `devflow finish` - `devflow doctor` +- `devflow mistakes add/list/detect` - `devflow prompt next` - `devflow prompt latest` - repo-local Codex/Claude plugin hooks for start, prompt intent, and finish @@ -30,7 +31,7 @@ Build: - local `.devflow/` state files - git dirty-file capture - gate evidence capture -- platform execution contract and repeated-mistake memory capture +- platform execution contract and repeated-mistake memory capture/detection - repo-local Codex plugin wrappers for the start/status/doctor and finish evidence loops - Markdown next-session handoff output and latest prompt projection diff --git a/packages/cli/README.md b/packages/cli/README.md index 620da82..f69ad9d 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -83,6 +83,9 @@ packages/cli/ - `devflow sessions attach` - `devflow sessions list` - `devflow sessions note` +- `devflow mistakes add` +- `devflow mistakes list` +- `devflow mistakes detect` `devflow init` currently renders a scaffold plan by default and writes the minimum project contract only when `--confirm` is provided. The first scaffold diff --git a/packages/cli/src/index.js b/packages/cli/src/index.js index fcf027f..c97dd06 100644 --- a/packages/cli/src/index.js +++ b/packages/cli/src/index.js @@ -19,6 +19,8 @@ import { } from "../../adapters/src/index.js"; import { createFinishSummary, + createMistakeDetection, + createMistakeListSummary, readHarnessInspect, readHarnessHealth, readHarnessPlan, @@ -43,6 +45,7 @@ import { readLatestHandoff, readMistakeMemory, recordFinishEvent, + recordMistakeMemory, recordManualSessionNoteEvent, recordReviewEvent, recordSessionAttachedEvent, @@ -97,6 +100,12 @@ try { await renderFinish(args.slice(1)); } else if (command === "doctor") { await renderDoctor(args.slice(1)); + } else if (command === "mistakes" && args[1] === "add") { + await renderMistakeAdd(args.slice(2)); + } else if (command === "mistakes" && args[1] === "list") { + await renderMistakeList(args.slice(2)); + } else if (command === "mistakes" && args[1] === "detect") { + await renderMistakeDetect(args.slice(2)); } else if (command === "gates" && args[1] === "run") { await renderGatesRun(args.slice(2)); } else if (command === "review" && args[1] === "record") { @@ -409,6 +418,60 @@ async function renderDoctor(argsForCommand) { render(summary, options.json); } +async function renderMistakeAdd(argsForCommand) { + const options = parseOptions(argsForCommand); + const repoPath = options.repo ?? cwd(); + const summary = await recordMistakeMemory(repoPath, { + id: options.id, + category: options.category, + scope: options.scope, + symptom: options.symptom, + correction: options.correction, + appliesTo: collectRepeated(options["applies-to"] ?? options.appliesTo), + confidence: options.confidence, + evidence: collectRepeated(options.evidence).map((text) => ({ + kind: "user-correction", + text, + })), + }); + + render(summary, options.json); +} + +async function renderMistakeList(argsForCommand) { + const options = parseOptions(argsForCommand); + const repoPath = options.repo ?? cwd(); + const memory = await readMistakeMemory(repoPath); + const summary = createMistakeListSummary({ + mistakes: memory.mistakes, + warnings: memory.warnings, + }); + + render(summary, options.json); +} + +async function renderMistakeDetect(argsForCommand) { + const options = parseOptions(argsForCommand); + const repoPath = options.repo ?? cwd(); + const detection = createMistakeDetection({ + platform: options.platform ?? defaultPlatformName(), + command: options.command, + stderr: options.stderr, + stdout: options.stdout, + exitCode: options["exit-code"], + }); + const recorded = []; + + if (options.record) { + for (const candidate of detection.candidates) { + const result = await recordMistakeMemory(repoPath, candidate); + recorded.push(result.mistake); + } + } + + render({ ...detection, recorded }, options.json); +} + async function renderGatesRun(argsForCommand) { const { options, positional } = parseOptionsAndPositionals(argsForCommand); const repoPath = options.repo ?? cwd(); @@ -967,6 +1030,11 @@ function renderHelp(group) { mcp: [ "devflow mcp stdio", ], + mistakes: [ + "devflow mistakes add --id --symptom --correction [--json]", + "devflow mistakes list [--json]", + "devflow mistakes detect --stderr [--command ] [--record] [--json]", + ], }; if (group && groups[group]) { @@ -998,6 +1066,7 @@ function renderHelp(group) { " init Plan or write a .devflow project scaffold", " health Check the project scaffold", " doctor Inspect local shell/tooling rules", + " mistakes Record and detect repeated agent mistake memory", " status Show repo, work, session, gate, and handoff state", " harness Inspect/install/verify Codex and Claude harness files", " mcp stdio Run the Devflow MCP stdio server", @@ -1019,6 +1088,7 @@ function renderHelp(group) { "Group help:", " devflow harness --help", " devflow mcp --help", + " devflow mistakes --help", " devflow work --help", " devflow prompt --help", "", @@ -1190,7 +1260,8 @@ function parseOptionsAndPositionals(rawArgs) { key === "once" || key === "dry-run" || key === "check" || - key === "repo-visible" + key === "repo-visible" || + key === "record" ) { options[key] = true; continue; diff --git a/packages/cli/test/cli-mvp.test.mjs b/packages/cli/test/cli-mvp.test.mjs index c2e3128..249ab8a 100644 --- a/packages/cli/test/cli-mvp.test.mjs +++ b/packages/cli/test/cli-mvp.test.mjs @@ -1997,6 +1997,127 @@ test("CLI doctor renders platform and mistake memory JSON", async () => { assert.match(parsed.recommendations[0].message, /Get-Content -LiteralPath/); }); +test("CLI mistakes add and list persist repo-local correction memory", async () => { + const repoPath = await createTempGitRepo(); + + const added = await execFileAsync("node", [ + "packages/cli/src/index.js", + "mistakes", + "add", + "--repo", + repoPath, + "--id", + "powershell-select-object-range-syntax", + "--category", + "shell-file-io-friction", + "--symptom", + "Agent passed a PowerShell range expression as a string to Select-Object -Index.", + "--correction", + "Wrap PowerShell ranges in parentheses, for example Select-Object -Index (108..156).", + "--applies-to", + "windows-powershell", + "--json", + ]); + const addedJson = JSON.parse(added.stdout); + + assert.equal(addedJson.command, "mistakes_add"); + assert.equal(addedJson.mistake.id, "powershell-select-object-range-syntax"); + assert.equal(addedJson.mistake.occurrences, 1); + + const listed = await execFileAsync("node", [ + "packages/cli/src/index.js", + "mistakes", + "list", + "--repo", + repoPath, + "--json", + ]); + const listJson = JSON.parse(listed.stdout); + + assert.equal(listJson.command, "mistakes_list"); + assert.equal(listJson.count, 1); + assert.equal(listJson.mistakes[0].category, "shell-file-io-friction"); + assert.match(listJson.mistakes[0].correction, /Select-Object -Index \(108\.\.156\)/); + + const doctor = await execFileAsync("node", [ + "packages/cli/src/index.js", + "doctor", + "--repo", + repoPath, + "--platform", + "windows-powershell", + "--json", + ]); + const doctorJson = JSON.parse(doctor.stdout); + + assert.equal(doctorJson.memory.repeatedMistakes[0].id, "powershell-select-object-range-syntax"); + assert.match( + doctorJson.recommendations.find((item) => item.source === "powershell-select-object-range-syntax") + .message, + /PowerShell ranges/, + ); +}); + +test("CLI mistakes detect records PowerShell and Playwright mistake candidates", async () => { + const repoPath = await createTempGitRepo(); + + const powershell = await execFileAsync("node", [ + "packages/cli/src/index.js", + "mistakes", + "detect", + "--repo", + repoPath, + "--platform", + "windows-powershell", + "--command", + "Get-Content -LiteralPath docs\\product-plan.md | Select-Object -Index 108..156", + "--stderr", + "Cannot bind parameter 'Index'. Cannot convert value \"108..156\" to type \"System.Int32\".", + "--record", + "--json", + ]); + const powershellJson = JSON.parse(powershell.stdout); + + assert.equal(powershellJson.command, "mistakes_detect"); + assert.equal(powershellJson.candidates[0].id, "powershell-select-object-range-syntax"); + assert.equal(powershellJson.recorded[0].id, "powershell-select-object-range-syntax"); + + const playwright = await execFileAsync("node", [ + "packages/cli/src/index.js", + "mistakes", + "detect", + "--repo", + repoPath, + "--platform", + "windows-powershell", + "--command", + "node smoke.mjs", + "--stderr", + "Error: Cannot find module 'playwright'", + "--record", + "--json", + ]); + const playwrightJson = JSON.parse(playwright.stdout); + + assert.equal(playwrightJson.candidates[0].id, "playwright-module-unavailable"); + assert.equal(playwrightJson.recorded[0].id, "playwright-module-unavailable"); + + const listed = await execFileAsync("node", [ + "packages/cli/src/index.js", + "mistakes", + "list", + "--repo", + repoPath, + "--json", + ]); + const listJson = JSON.parse(listed.stdout); + + assert.deepEqual( + listJson.mistakes.map((mistake) => mistake.id).sort(), + ["playwright-module-unavailable", "powershell-select-object-range-syntax"], + ); +}); + test("CLI sessions codex renders explicit read-only Codex discovery JSON", async () => { const repoPath = await createTempGitRepo(); const codexHome = await mkdtemp(join(tmpdir(), "devflow-cli-codex-home-")); diff --git a/packages/core/README.md b/packages/core/README.md index 265f15e..fe42d0a 100644 --- a/packages/core/README.md +++ b/packages/core/README.md @@ -46,7 +46,7 @@ packages/core/ ## Responsibilities - Define JSON-serializable contracts for `status`, `split`, `finish`, - `doctor`, and session attach planning. + `doctor`, mistake memory, and session attach planning. - Normalize paths to repo-relative POSIX-style paths internally while preserving platform metadata for command generation. - Store append-only events and rebuild derived status views from those events. @@ -56,8 +56,8 @@ packages/core/ research-harness consumers. - Propose session-to-work-item links without writing attach state. Low- or medium-confidence session matches must stay confirmation-gated. -- Render local execution contracts and repeated-mistake memory into agent-safe - recommendations. +- Render local execution contracts, repeated-mistake memory, and detected + mistake candidates into agent-safe recommendations. ## Non-Responsibilities diff --git a/packages/core/src/index.js b/packages/core/src/index.js index bf419f2..69a5e32 100644 --- a/packages/core/src/index.js +++ b/packages/core/src/index.js @@ -1608,6 +1608,107 @@ export async function readMistakeMemory(repoPath) { } } +export function createMistakeListSummary(input = {}) { + const mistakes = Array.isArray(input.mistakes) ? input.mistakes : []; + + return { + schemaVersion: "0.1", + command: "mistakes_list", + source: input.source ?? ".devflow/mistakes.json", + count: mistakes.length, + mistakes, + warnings: input.warnings ?? [], + }; +} + +export function createMistakeDetection(input = {}) { + const platform = input.platform ?? "unknown"; + const commandText = input.command ?? ""; + const stderr = input.stderr ?? ""; + const stdout = input.stdout ?? ""; + const combined = [commandText, stderr, stdout].filter(Boolean).join("\n"); + const candidates = []; + + if (detectPowerShellSelectObjectRange({ platform, commandText, combined })) { + candidates.push( + createMistakeRecord({ + id: "powershell-select-object-range-syntax", + category: "shell-file-io-friction", + scope: "project", + symptom: "Agent passed a PowerShell range expression as a string to Select-Object -Index.", + correction: "Wrap PowerShell ranges in parentheses, for example Select-Object -Index (108..156).", + appliesTo: ["windows-powershell"], + confidence: "high", + evidence: createMistakeEvidence({ commandText, stderr, stdout }), + }), + ); + } + + if (detectPlaywrightModuleUnavailable(combined)) { + candidates.push( + createMistakeRecord({ + id: "playwright-module-unavailable", + category: "setup-tool-availability", + scope: "project", + symptom: "Agent tried to run Playwright before the package or workspace runtime was available.", + correction: + "Inspect the repo package manager and installed dependencies before loading Playwright; install dependencies or use the bundled runtime path when the project expects it.", + appliesTo: ["playwright", "browser-automation"], + confidence: "high", + evidence: createMistakeEvidence({ commandText, stderr, stdout }), + }), + ); + } + + return { + schemaVersion: "0.1", + command: "mistakes_detect", + detection: { + platform, + command: commandText || null, + exitCode: input.exitCode ?? null, + }, + candidates, + recorded: input.recorded ?? [], + warnings: input.warnings ?? [], + }; +} + +export async function recordMistakeMemory(repoPath, input, options = {}) { + const observedAt = options.observedAt ?? new Date().toISOString(); + const memory = await readMistakeMemory(repoPath); + const incoming = createMistakeRecord(input, { observedAt }); + const mistakes = upsertMistake(memory.mistakes, incoming, observedAt); + + await writeMistakeMemory(repoPath, mistakes); + + return { + schemaVersion: "0.1", + command: "mistakes_add", + source: ".devflow/mistakes.json", + mistake: mistakes.find((mistake) => mistake.id === incoming.id), + count: mistakes.length, + warnings: memory.warnings, + }; +} + +export async function writeMistakeMemory(repoPath, mistakes) { + const target = join(repoPath, ".devflow", "mistakes.json"); + await mkdir(dirname(target), { recursive: true }); + await writeFile( + target, + `${JSON.stringify( + { + schemaVersion: "0.1", + mistakes, + }, + null, + 2, + )}\n`, + "utf8", + ); +} + export async function readDevflowConfig(repoPath) { let raw; try { @@ -2718,6 +2819,147 @@ function createDoctorRecommendations(platform, mistakes) { return recommendations; } +function createMistakeRecord(input = {}, options = {}) { + const id = normalizeMistakeId(input.id); + if (!id) { + throw new Error("Mistake id is required."); + } + + const symptom = normalizeRequiredText(input.symptom, "Mistake symptom is required."); + const correction = normalizeRequiredText(input.correction, "Mistake correction is required."); + const observedAt = options.observedAt ?? input.lastSeenAt ?? input.createdAt ?? new Date().toISOString(); + + return { + id, + category: normalizeOptionalText(input.category) ?? "agent-mistake", + scope: normalizeOptionalText(input.scope) ?? "project", + symptom, + correction, + appliesTo: normalizeStringList(input.appliesTo), + confidence: normalizeOptionalText(input.confidence) ?? "manual", + occurrences: Number.isFinite(Number(input.occurrences)) ? Number(input.occurrences) : 1, + firstSeenAt: input.firstSeenAt ?? input.createdAt ?? observedAt, + lastSeenAt: input.lastSeenAt ?? observedAt, + evidence: normalizeEvidence(input.evidence), + }; +} + +function upsertMistake(existingMistakes, incoming, observedAt) { + const mistakes = Array.isArray(existingMistakes) + ? existingMistakes.map((mistake) => createMistakeRecord(mistake)) + : []; + const index = mistakes.findIndex((mistake) => mistake.id === incoming.id); + + if (index === -1) { + return [...mistakes, incoming]; + } + + const existing = mistakes[index]; + const merged = { + ...existing, + ...incoming, + occurrences: (Number(existing.occurrences) || 1) + 1, + firstSeenAt: existing.firstSeenAt ?? incoming.firstSeenAt, + lastSeenAt: observedAt, + evidence: mergeEvidence(existing.evidence, incoming.evidence), + }; + + return [ + ...mistakes.slice(0, index), + merged, + ...mistakes.slice(index + 1), + ]; +} + +function detectPowerShellSelectObjectRange({ platform, commandText, combined }) { + if (platform !== "windows-powershell" && !/powershell|pwsh/i.test(combined)) { + return false; + } + + return ( + /Select-Object\s+-Index\s+\d+\.\.\d+/i.test(commandText) || + (/Cannot bind parameter 'Index'/i.test(combined) && + /Cannot convert value "\d+\.\.\d+" to type "System\.Int32"/i.test(combined)) + ); +} + +function detectPlaywrightModuleUnavailable(combined) { + return ( + /Cannot find (module|package) ['"]@?playwright(?:\/test)?['"]/i.test(combined) || + /ERR_MODULE_NOT_FOUND[\s\S]*@?playwright(?:\/test)?/i.test(combined) + ); +} + +function createMistakeEvidence({ commandText, stderr, stdout }) { + return [ + commandText ? { kind: "command", text: truncateMistakeText(commandText) } : null, + stderr ? { kind: "stderr", text: truncateMistakeText(stderr) } : null, + stdout ? { kind: "stdout", text: truncateMistakeText(stdout) } : null, + ].filter(Boolean); +} + +function normalizeMistakeId(value) { + return typeof value === "string" ? value.trim().toLowerCase() : ""; +} + +function normalizeRequiredText(value, errorMessage) { + const text = normalizeOptionalText(value); + if (!text) { + throw new Error(errorMessage); + } + + return text; +} + +function normalizeOptionalText(value) { + return typeof value === "string" && value.trim() ? value.trim() : null; +} + +function normalizeStringList(value) { + if (value === undefined || value === null) { + return []; + } + + return (Array.isArray(value) ? value : [value]) + .flatMap((item) => String(item).split(",")) + .map((item) => item.trim()) + .filter(Boolean); +} + +function normalizeEvidence(value) { + if (!Array.isArray(value)) { + return []; + } + + return value + .map((item) => { + if (typeof item === "string") { + return { kind: "note", text: truncateMistakeText(item) }; + } + + const text = normalizeOptionalText(item?.text); + if (!text) { + return null; + } + + return { + kind: normalizeOptionalText(item.kind) ?? "note", + text: truncateMistakeText(text), + }; + }) + .filter(Boolean) + .slice(-8); +} + +function mergeEvidence(existing, incoming) { + return [...normalizeEvidence(existing), ...normalizeEvidence(incoming)].slice(-8); +} + +function truncateMistakeText(value) { + const text = String(value).replace(/\s+/g, " ").trim(); + return text.length > 500 ? `${text.slice(0, 497)}...` : text; +} + function deriveStateFromEvents(events, warnings = []) { const completedWork = events.filter( diff --git a/packages/mcp/README.md b/packages/mcp/README.md index dc882d6..17f15e5 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -34,6 +34,9 @@ JSON-RPC transport for MCP-capable hosts. - `devflow.work_list` - `devflow.review_request` - `devflow.review_record` +- `devflow.mistakes_add` +- `devflow.mistakes_list` +- `devflow.mistakes_detect` - `devflow.finish` - `devflow.record_gate` - `devflow.gates_run` @@ -129,6 +132,15 @@ item. Use it after a separate reviewer agent or reviewer persona has inspected the work. It records reviewer, status, summary, and source; it does not perform the review itself. +`devflow.mistakes_add`, `devflow.mistakes_list`, and +`devflow.mistakes_detect` expose the repeated-mistake repair loop to MCP +hosts. `mistakes_detect` accepts command text plus stdout/stderr and returns +known mistake candidates such as PowerShell range syntax and unavailable +Playwright runtime errors. With `record: true`, it upserts the candidates into +`.devflow/mistakes.json`; `devflow.doctor` then injects those corrections into +future session context. These tools do not edit `AGENTS.md` or skill files; +promotion to durable instructions remains a confirmation-gated follow-up. + `devflow.finish` returns the same false-completion guard contract as the CLI. It reads configured gates, recorded `gate.finished` events, and configured review requirements before returning `canClaimDone`, `doneBlockers`, gate diff --git a/packages/mcp/src/index.js b/packages/mcp/src/index.js index bcacad2..c760391 100644 --- a/packages/mcp/src/index.js +++ b/packages/mcp/src/index.js @@ -15,6 +15,8 @@ import { import { createDoctorSummary, createFinishSummary, + createMistakeDetection, + createMistakeListSummary, createNextPrompt, createPromptRewrite, createReviewRequest, @@ -37,6 +39,7 @@ import { readMistakeMemory, recordFinishEvent, recordGateEvent, + recordMistakeMemory, recordManualSessionNoteEvent, recordReviewEvent, recordSessionAttachedEvent, @@ -165,6 +168,18 @@ const tools = [ name: "devflow.doctor", description: "Inspect local execution rules and repeated-mistake memory.", }, + { + name: "devflow.mistakes_add", + description: "Record a repo-local repeated agent mistake correction.", + }, + { + name: "devflow.mistakes_list", + description: "List repo-local repeated agent mistake memory.", + }, + { + name: "devflow.mistakes_detect", + description: "Detect known repeated agent mistake candidates from command output.", + }, { name: "devflow.finish", description: "Record completion evidence and generate a next-session prompt.", @@ -304,6 +319,18 @@ export async function callTool(name, args = {}) { return callDoctor(args); } + if (name === "devflow.mistakes_add") { + return callMistakesAdd(args); + } + + if (name === "devflow.mistakes_list") { + return callMistakesList(args); + } + + if (name === "devflow.mistakes_detect") { + return callMistakesDetect(args); + } + if (name === "devflow.finish") { return callFinish(args); } @@ -878,6 +905,55 @@ async function callDoctor(args) { return toolResult(summary, `devflow doctor: ${summary.platform.name}`); } +async function callMistakesAdd(args) { + const repoPath = args.repo ?? process.cwd(); + const summary = await recordMistakeMemory(repoPath, { + id: args.id, + category: args.category, + scope: args.scope, + symptom: args.symptom, + correction: args.correction, + appliesTo: args.appliesTo ?? args.applies_to ?? [], + confidence: args.confidence, + evidence: args.evidence ?? [], + }); + + return toolResult(summary, `devflow mistakes_add: ${summary.mistake.id}`); +} + +async function callMistakesList(args) { + const repoPath = args.repo ?? process.cwd(); + const memory = await readMistakeMemory(repoPath); + const summary = createMistakeListSummary({ + mistakes: memory.mistakes, + warnings: memory.warnings, + }); + + return toolResult(summary, `devflow mistakes_list: ${summary.count}`); +} + +async function callMistakesDetect(args) { + const repoPath = args.repo ?? process.cwd(); + const detection = createMistakeDetection({ + platform: args.platform ?? "windows-powershell", + command: args.command, + stderr: args.stderr, + stdout: args.stdout, + exitCode: args.exitCode ?? args.exit_code, + }); + const recorded = []; + + if (args.record) { + for (const candidate of detection.candidates) { + const result = await recordMistakeMemory(repoPath, candidate); + recorded.push(result.mistake); + } + } + + const summary = { ...detection, recorded }; + return toolResult(summary, `devflow mistakes_detect: ${summary.candidates.length}`); +} + async function callFinish(args) { const repoPath = args.repo ?? process.cwd(); const config = await readDevflowConfig(repoPath); diff --git a/packages/mcp/test/mcp-contract.test.mjs b/packages/mcp/test/mcp-contract.test.mjs index 70a8a58..f9294dd 100644 --- a/packages/mcp/test/mcp-contract.test.mjs +++ b/packages/mcp/test/mcp-contract.test.mjs @@ -44,6 +44,9 @@ test("MCP lists initial devflow tools", () => { assert.ok(names.includes("devflow.work_list")); assert.ok(names.includes("devflow.review_record")); assert.ok(names.includes("devflow.review_request")); + assert.ok(names.includes("devflow.mistakes_add")); + assert.ok(names.includes("devflow.mistakes_list")); + assert.ok(names.includes("devflow.mistakes_detect")); }); test("MCP harness tools inspect, plan, and health-check native setup", async () => { @@ -339,6 +342,53 @@ test("MCP doctor returns the same structured execution contract", async () => { assert.match(result.content[0].text, /doctor/); }); +test("MCP mistakes tools record detected candidates for doctor memory", async () => { + const repoPath = await mkdtemp(join(tmpdir(), "devflow-mcp-mistakes-")); + + const detected = await callTool("devflow.mistakes_detect", { + repo: repoPath, + platform: "windows-powershell", + command: "Get-Content -LiteralPath docs\\product-plan.md | Select-Object -Index 108..156", + stderr: "Cannot bind parameter 'Index'. Cannot convert value \"108..156\" to type \"System.Int32\".", + record: true, + }); + + assert.equal(detected.structuredContent.command, "mistakes_detect"); + assert.equal(detected.structuredContent.candidates[0].id, "powershell-select-object-range-syntax"); + assert.equal(detected.structuredContent.recorded[0].id, "powershell-select-object-range-syntax"); + + const listed = await callTool("devflow.mistakes_list", { + repo: repoPath, + }); + + assert.equal(listed.structuredContent.command, "mistakes_list"); + assert.equal(listed.structuredContent.count, 1); + + const added = await callTool("devflow.mistakes_add", { + repo: repoPath, + id: "playwright-module-unavailable", + category: "setup-tool-availability", + symptom: "Agent tried to run Playwright before the package or workspace runtime was available.", + correction: "Inspect package manager state before loading Playwright.", + appliesTo: ["playwright"], + }); + + assert.equal(added.structuredContent.command, "mistakes_add"); + assert.equal(added.structuredContent.mistake.id, "playwright-module-unavailable"); + + const doctor = await callTool("devflow.doctor", { + repo: repoPath, + platform: "windows-powershell", + }); + + assert.equal(doctor.structuredContent.memory.repeatedMistakes.length, 2); + assert.ok( + doctor.structuredContent.recommendations.some( + (item) => item.source === "playwright-module-unavailable", + ), + ); +}); + test("MCP finish records evidence into local state", async () => { const repoPath = await mkdtemp(join(tmpdir(), "devflow-mcp-finish-")); const result = await callTool("devflow.finish", {