diff --git a/CHANGELOG.md b/CHANGELOG.md index 71c6175..d10e383 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## 0.6.0 — 2026-06-18 + +### Added + +- **TUI Planner Cockpit (`/plan `)** — the TUI is no longer just chat + a read-only monitor. `/plan` runs the Planner to preview a decomposition, gates on human approval (`y` / `n` / `edit` to drop sub-tasks), then dispatches into the daemon loop; progress shows in the Tasks tab. Available in both the blessed TUI (`chat`) and the readline chat. (INT-1572) +- `POST /api/plan/dispatch` — dual-path dispatch: with Linear configured it creates a parent issue + dependency-wired sub-issues and triggers a heartbeat (reusing the autonomous decomposition engine); otherwise it falls back to running each sub-task through the exec pipeline. +- **Web tools in the agentic loop** — `web_fetch` (keyless: URL → readable text) and `web_search` (pluggable backend: Tavily/Brave when `TAVILY_KEY`/`BRAVE_SEARCH_KEY` is set, else a keyless DuckDuckGo fallback) are now exposed to every adapter (openrouter/gpt/local), restoring the web capability the `claude -p` harness used to provide. Enabled by default (`webTools` option); disabled for the SWE-bench harness to keep the benchmark honest. (INT-1573) + +### Changed + +- **Planner migrated off `claude -p`** — `runPlanner` now runs through the OpenSwarm agentic loop via the configured adapter (read-only, multi-turn) instead of shelling out to `claude -p --max-turns 1`. Completes the INT-1420 `claude -p` removal, drops the claude-binary dependency, and lets the planner read the codebase before decomposing. `PlannerResult` contract unchanged. +- Extracted `createSubIssuesWithDependencies()` from the autonomous runner so the `/plan` endpoint and `decomposeTask` share one sub-issue/dependency engine (no logic fork). +- Extracted `startExecTask()` in the web server so `POST /api/exec` and the `/plan` fallback share one exec-task lifecycle. + ## 0.5.0 — 2026-06-11 ### Added diff --git a/benchmarks/sweBench.ts b/benchmarks/sweBench.ts index 65a6c37..37c8e1a 100644 --- a/benchmarks/sweBench.ts +++ b/benchmarks/sweBench.ts @@ -174,6 +174,9 @@ async function solveOne(inst: SweInstance, model: string): Promise<{ pred: Recor model: diagModel, timeoutMs: 900_000, maxTurns: 50, + // Benchmark integrity — no web tools, or the model could just search up + // the instance's GitHub issue / gold patch instead of diagnosing. + webTools: false, onLog: process.env.SWE_VERBOSE ? (l) => console.log(` [diag] ${l}`) : () => {}, }); // 진단자가 실수로 수정했어도 구현 단계는 깨끗한 베이스에서 시작 @@ -214,6 +217,8 @@ async function solveOne(inst: SweInstance, model: string): Promise<{ pred: Recor // run_tests.sh = docker cp + in-container pytest — the 30s default times // out into a silent no-output failure the model reads as a broken env. bashTimeoutMs: 240_000, + // Benchmark integrity — no web search of the gold patch / GitHub issue. + webTools: false, onLog: process.env.SWE_VERBOSE ? (l) => console.log(` ${l}`) : () => {}, }); diff --git a/package.json b/package.json index 3a746ea..2bfa0a0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@intrect/openswarm", - "version": "0.5.0", + "version": "0.6.0", "description": "Autonomous AI agent orchestrator — Claude, GPT, Codex, and local models (Ollama/LMStudio/llama.cpp)", "license": "GPL-3.0", "type": "module", diff --git a/src/adapters/agenticLoop.ts b/src/adapters/agenticLoop.ts index a2349b9..4dd5d4e 100644 --- a/src/adapters/agenticLoop.ts +++ b/src/adapters/agenticLoop.ts @@ -7,6 +7,7 @@ // ============================================ import { TOOL_DEFINITIONS, executeToolCalls, createReadCache, type ToolCall, type ToolResult, type ToolDefinition } from './tools.js'; +import { WEB_TOOL_DEFINITIONS } from './webTools.js'; import type { CliRunResult } from './types.js'; // ============ 토큰 카운팅 (VEGA token_count.py 이식) ============ @@ -115,6 +116,8 @@ export interface AgenticLoopOptions { protectedFiles?: string[]; /** bash tool timeout — docker-based tests need minutes (default 30s) */ bashTimeoutMs?: number; + /** Expose web_fetch + web_search tools (default true). Disabled e.g. for SWE-bench integrity. */ + webTools?: boolean; } /** 루프 실행 결과 */ @@ -160,6 +163,7 @@ export async function runAgenticLoop(options: AgenticLoopOptions): Promise { + vi.unstubAllGlobals(); + vi.unstubAllEnvs(); + vi.clearAllMocks(); +}); + +describe('WEB_TOOL_DEFINITIONS', () => { + it('exposes exactly web_fetch and web_search', () => { + expect(WEB_TOOL_DEFINITIONS.map((t) => t.function.name)).toEqual(['web_fetch', 'web_search']); + }); +}); + +describe('webFetch', () => { + it('strips HTML to readable text', async () => { + vi.stubGlobal('fetch', vi.fn(async () => + new Response( + '

Hi

world & co

', + { status: 200, headers: { 'content-type': 'text/html' } }, + ), + )); + const out = await webFetch('https://example.com'); + expect(out).toContain('Hi'); + expect(out).toContain('world & co'); + expect(out).not.toContain('<'); + expect(out).not.toContain('bad()'); + }); + + it('rejects a non-http URL without fetching', async () => { + const f = vi.fn(); + vi.stubGlobal('fetch', f); + const out = await webFetch('ftp://x'); + expect(out).toContain('Invalid URL'); + expect(f).not.toHaveBeenCalled(); + }); + + it('reports an HTTP error rather than throwing', async () => { + vi.stubGlobal('fetch', vi.fn(async () => new Response('nope', { status: 404, statusText: 'Not Found' }))); + const out = await webFetch('https://example.com/x'); + expect(out).toContain('404'); + }); +}); + +describe('webSearch — backend selection', () => { + it('defaults to duckduckgo with no keys', () => { + expect(searchBackend()).toBe('duckduckgo'); + }); + + it('prefers Tavily when TAVILY_KEY is set', async () => { + vi.stubEnv('TAVILY_KEY', 'tk'); + expect(searchBackend()).toBe('tavily'); + const f = vi.fn(async () => + new Response(JSON.stringify({ results: [{ title: 'T', url: 'https://t', content: 'snip' }] }), { status: 200 }), + ); + vi.stubGlobal('fetch', f); + const out = await webSearch('q', 3); + expect(String(f.mock.calls[0][0])).toContain('api.tavily.com'); + expect(out).toContain('T'); + expect(out).toContain('https://t'); + }); + + it('uses Brave when BRAVE_SEARCH_KEY is set', async () => { + vi.stubEnv('BRAVE_SEARCH_KEY', 'bk'); + expect(searchBackend()).toBe('brave'); + const f = vi.fn(async () => + new Response(JSON.stringify({ web: { results: [{ title: 'B', url: 'https://b', description: 'd' }] } }), { status: 200 }), + ); + vi.stubGlobal('fetch', f); + const out = await webSearch('q'); + expect(String(f.mock.calls[0][0])).toContain('brave.com'); + expect(out).toContain('B'); + }); + + it('parses keyless DuckDuckGo HTML results', async () => { + const html = + 'Result A' + + 'snippet a'; + vi.stubGlobal('fetch', vi.fn(async () => new Response(html, { status: 200 }))); + const out = await webSearch('q', 5); + expect(out).toContain('Result A'); + expect(out).toContain('https://ex.com/a'); + expect(out).toContain('snippet a'); + }); + + it('returns an error string (does not throw) on backend failure', async () => { + vi.stubGlobal('fetch', vi.fn(async () => { throw new Error('net down'); })); + const out = await webSearch('q'); + expect(out).toContain('Search failed'); + expect(out).toContain('TAVILY_KEY'); // keyless hint + }); +}); diff --git a/src/adapters/webTools.ts b/src/adapters/webTools.ts new file mode 100644 index 0000000..b49c9a5 --- /dev/null +++ b/src/adapters/webTools.ts @@ -0,0 +1,201 @@ +// ============================================ +// OpenSwarm - Web tools (web_fetch + web_search) +// ============================================ +// +// First-class web capability for the agentic loop, shared by every adapter +// (openrouter/gpt/local) — the `claude -p` harness used to provide this for +// free (INT-1573). The model calls these deliberately, like `bash`. +// +// web_fetch is keyless. web_search has a pluggable backend: Tavily or Brave +// when a key is set, else a keyless (and fragile) DuckDuckGo fallback. + +import type { ToolDefinition } from './tools.js'; + +export const WEB_TOOL_DEFINITIONS: ToolDefinition[] = [ + { + type: 'function', + function: { + name: 'web_fetch', + description: + 'Fetch a URL and return its readable text (HTML stripped to text). Use when you already have a URL (docs, a page) and want its content.', + parameters: { + type: 'object', + properties: { + url: { type: 'string', description: 'The http(s) URL to fetch' }, + }, + required: ['url'], + }, + }, + }, + { + type: 'function', + function: { + name: 'web_search', + description: + 'Search the web and return ranked results (title, url, snippet). Use to find documentation, API usage, library versions, or current facts.', + parameters: { + type: 'object', + properties: { + query: { type: 'string', description: 'Search query' }, + max_results: { type: 'number', description: 'Max results to return (default 5, max 10)' }, + }, + required: ['query'], + }, + }, + }, +]; + +const FETCH_TIMEOUT_MS = 15_000; +const MAX_FETCH_CHARS = 20_000; +const USER_AGENT = 'OpenSwarm/0.6 (+https://github.com/unohee/openswarm)'; + +async function fetchWithTimeout(url: string, init: RequestInit = {}): Promise { + const ac = new AbortController(); + const timer = setTimeout(() => ac.abort(), FETCH_TIMEOUT_MS); + try { + return await fetch(url, { + ...init, + signal: ac.signal, + headers: { 'User-Agent': USER_AGENT, ...init.headers }, + }); + } finally { + clearTimeout(timer); + } +} + +function stripTags(s: string): string { + return s + .replace(/<[^>]+>/g, '') + .replace(/ /g, ' ') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/&#(?:39|x27);/g, "'") + .replace(/\s+/g, ' ') + .trim(); +} + +function htmlToText(html: string): string { + return html + .replace(//gi, ' ') + .replace(//gi, ' ') + .replace(//g, ' ') + .replace(/<[^>]+>/g, ' ') + .replace(/ /g, ' ') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/&#(?:39|x27);/g, "'") + .replace(/[ \t]+/g, ' ') + .replace(/\n\s*\n\s*\n+/g, '\n\n') + .trim(); +} + +/** Fetch a URL → readable text (HTML stripped). Returns an error string on failure (never throws). */ +export async function webFetch(url: string): Promise { + if (typeof url !== 'string' || !/^https?:\/\//i.test(url)) { + return `Invalid URL: ${url} (must start with http:// or https://)`; + } + let res: Response; + try { + res = await fetchWithTimeout(url); + } catch (err) { + return `Fetch failed for ${url}: ${err instanceof Error ? err.message : String(err)}`; + } + if (!res.ok) return `Fetch ${url} → HTTP ${res.status} ${res.statusText}`; + const ctype = res.headers.get('content-type') ?? ''; + const body = await res.text(); + const text = ctype.includes('html') || /^\s* MAX_FETCH_CHARS + ? `${text.slice(0, MAX_FETCH_CHARS)}\n... (truncated, ${text.length} chars total)` + : text || '(empty response)'; +} + +interface SearchResult { title: string; url: string; snippet: string } + +/** Which search backend is active (for diagnostics). */ +export function searchBackend(): 'tavily' | 'brave' | 'duckduckgo' { + if (process.env.TAVILY_KEY) return 'tavily'; + if (process.env.BRAVE_SEARCH_KEY) return 'brave'; + return 'duckduckgo'; +} + +/** Search the web → formatted result list. Returns an error string on failure (never throws). */ +export async function webSearch(query: string, maxResults = 5): Promise { + if (typeof query !== 'string' || !query.trim()) return 'Invalid query: a non-empty search query is required.'; + const n = Math.min(Math.max(Number(maxResults) || 5, 1), 10); + try { + const backend = searchBackend(); + const results = + backend === 'tavily' ? await tavilySearch(query, n) + : backend === 'brave' ? await braveSearch(query, n) + : await ddgSearch(query, n); + if (results.length === 0) return `No results for "${query}".`; + return results.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}${r.snippet ? `\n ${r.snippet}` : ''}`).join('\n\n'); + } catch (err) { + const keyed = process.env.TAVILY_KEY || process.env.BRAVE_SEARCH_KEY; + const hint = keyed ? '' : ' (the keyless DuckDuckGo backend is fragile — set TAVILY_KEY or BRAVE_SEARCH_KEY for reliable search)'; + return `Search failed for "${query}": ${err instanceof Error ? err.message : String(err)}${hint}`; + } +} + +async function tavilySearch(query: string, n: number): Promise { + const res = await fetchWithTimeout('https://api.tavily.com/search', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ api_key: process.env.TAVILY_KEY, query, max_results: n }), + }); + if (!res.ok) throw new Error(`Tavily HTTP ${res.status}`); + const data = (await res.json()) as { results?: Array<{ title?: string; url?: string; content?: string }> }; + return (data.results ?? []).slice(0, n).map((r) => ({ + title: r.title ?? '', + url: r.url ?? '', + snippet: (r.content ?? '').slice(0, 300), + })); +} + +async function braveSearch(query: string, n: number): Promise { + const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${n}`; + const res = await fetchWithTimeout(url, { + headers: { 'X-Subscription-Token': process.env.BRAVE_SEARCH_KEY ?? '', Accept: 'application/json' }, + }); + if (!res.ok) throw new Error(`Brave HTTP ${res.status}`); + const data = (await res.json()) as { web?: { results?: Array<{ title?: string; url?: string; description?: string }> } }; + return (data.web?.results ?? []).slice(0, n).map((r) => ({ + title: r.title ?? '', + url: r.url ?? '', + snippet: stripTags(r.description ?? '').slice(0, 300), + })); +} + +async function ddgSearch(query: string, n: number): Promise { + const res = await fetchWithTimeout(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`); + if (!res.ok) throw new Error(`DuckDuckGo HTTP ${res.status}`); + const html = await res.text(); + + const results: SearchResult[] = []; + const linkRe = /]*class="result__a"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi; + let m: RegExpExecArray | null; + while ((m = linkRe.exec(html)) !== null && results.length < n) { + results.push({ title: stripTags(m[2]), url: decodeDdgUrl(m[1]), snippet: '' }); + } + + const snipRe = /]*class="result__snippet"[^>]*>([\s\S]*?)<\/a>/gi; + let i = 0; + let sm: RegExpExecArray | null; + while ((sm = snipRe.exec(html)) !== null && i < results.length) { + results[i].snippet = stripTags(sm[1]).slice(0, 300); + i++; + } + return results; +} + +function decodeDdgUrl(href: string): string { + const m = href.match(/[?&]uddg=([^&]+)/); + if (m) { + try { return decodeURIComponent(m[1]); } catch { /* fall through */ } + } + return href.startsWith('//') ? `https:${href}` : href; +} diff --git a/src/agents/worker.ts b/src/agents/worker.ts index edab3f2..c58a53d 100644 --- a/src/agents/worker.ts +++ b/src/agents/worker.ts @@ -34,6 +34,8 @@ export interface WorkerOptions { protectedFiles?: string[]; /** bash tool timeout in ms — raise for slow verification such as docker-based tests */ bashTimeoutMs?: number; + /** Expose web_fetch + web_search tools (default true). Set false for SWE-bench integrity. */ + webTools?: boolean; } // Prompts @@ -86,6 +88,7 @@ export async function runWorker(options: WorkerOptions): Promise { nudgeMaxOnNoEdit: options.nudgeMaxOnNoEdit, protectedFiles: options.protectedFiles, bashTimeoutMs: options.bashTimeoutMs, + webTools: options.webTools, }); // Parse result via adapter diff --git a/src/automation/runnerExecution.ts b/src/automation/runnerExecution.ts index 7537dbe..94003c3 100644 --- a/src/automation/runnerExecution.ts +++ b/src/automation/runnerExecution.ts @@ -16,6 +16,7 @@ import * as reviewerAgent from '../agents/reviewer.js'; import * as projectMapper from '../support/projectMapper.js'; import * as linear from '../linear/index.js'; import * as planner from '../support/planner.js'; +import type { SubTask } from '../support/planner.js'; import { analyzeIssue } from '../knowledge/index.js'; import { runDraftAnalysis, type DraftAnalysis } from '../agents/draftAnalyzer.js'; import { t } from '../locale/index.js'; @@ -229,140 +230,23 @@ export async function isValidProjectPath(path: string): Promise { // Task Decomposition -export async function decomposeTask( - ctx: ExecutionContext, - task: TaskItem, - projectPath: string, - targetMinutes: number, - draftAnalysis?: DraftAnalysis, -): Promise { - console.log(`[AutonomousRunner] Decomposing task: ${task.title}`); - - const taskId = task.issueId || task.id; - const maxDepth = ctx.decompositionMaxDepth ?? 2; - const maxChildren = ctx.decompositionMaxChildren ?? 5; - const dailyLimit = ctx.decompositionDailyLimit ?? 20; - const autoBacklog = ctx.decompositionAutoBacklog ?? true; - - // ============================================ - // Pre-checks: Depth, Children, Daily Limit - // ============================================ - - // Check decomposition depth limit - if (task.issueId) { - const currentDepth = getDecompositionDepth(task.issueId); - if (currentDepth >= maxDepth) { - console.log(`[AutonomousRunner] Decomposition depth limit reached: ${currentDepth}/${maxDepth}`); - if (autoBacklog && task.issueId) { - try { - await linear.updateIssueState(task.issueId, 'Backlog'); - await linear.addComment(task.issueId, - `⚠️ **Auto-moved to Backlog**\n\n` + - `Reason: Decomposition depth limit reached (${currentDepth}/${maxDepth})\n\n` + - `This task has been nested too deeply. Please review and simplify the task structure, ` + - `or handle it manually.` - ); - console.log(`[AutonomousRunner] Task moved to backlog (depth limit)`); - } catch (err) { - console.error(`[AutonomousRunner] Failed to move to backlog:`, err); - } - } - return false; - } - - // Check children count limit - const childrenCount = getChildrenCount(task.issueId); - if (childrenCount >= maxChildren) { - console.log(`[AutonomousRunner] Children count limit reached: ${childrenCount}/${maxChildren}`); - if (autoBacklog) { - try { - await linear.updateIssueState(task.issueId, 'Backlog'); - await linear.addComment(task.issueId, - `⚠️ **Auto-moved to Backlog**\n\n` + - `Reason: Too many sub-issues already created (${childrenCount}/${maxChildren})\n\n` + - `This task has generated too many sub-issues. Please review the decomposition strategy, ` + - `or handle it manually.` - ); - console.log(`[AutonomousRunner] Task moved to backlog (children limit)`); - } catch (err) { - console.error(`[AutonomousRunner] Failed to move to backlog:`, err); - } - } - return false; - } - } - - // Check daily creation limit - // NOTE: Don't move to Backlog on daily limit — it resets tomorrow. - // Moving to Backlog would permanently exclude the task from future heartbeats. - // Instead, skip decomposition and fall through to direct execution. - if (!canCreateMoreIssues(dailyLimit)) { - const currentCount = getDailyCreationCount(); - console.log(`[AutonomousRunner] Daily issue creation limit reached: ${currentCount}/${dailyLimit} — skipping decomposition (will retry tomorrow)`); - return false; - } - - broadcastEvent({ type: 'pipeline:stage', data: { taskId, stage: 'decompose', status: 'start' } }); - - await ctx.reportToDiscord(t('runner.decomposition.starting', { - title: task.title, - estimated: String(planner.estimateTaskDuration(task)), - threshold: String(targetMinutes), - })); - - // Periodic progress log while planner runs (fallback if stdout isn't streaming) - let elapsed = 0; - const progressTimer = setInterval(() => { - elapsed += 30; - broadcastEvent({ type: 'log', data: { taskId, stage: 'decompose', line: `⏱ Planner running... ${elapsed}s` } }); - }, 30000); - - // KG 영향 분석 — Draft가 이미 가지고 있으면 재사용 - const impactAnalysis = draftAnalysis?.impactAnalysis - ?? await analyzeIssue(projectPath, task.title, task.description).catch(() => null); - - let result: Awaited>; - try { - result = await planner.runPlanner({ - taskTitle: task.title, - taskDescription: task.description || '', - projectPath, - projectName: task.linearProject?.name, - targetMinutes, - model: ctx.plannerModel ?? 'claude-opus-4-7', - timeoutMs: ctx.plannerTimeoutMs ?? 600000, - onLog: (line: string) => broadcastEvent({ type: 'log', data: { taskId, stage: 'decompose', line } }), - impactAnalysis: impactAnalysis ?? undefined, - draftAnalysis: draftAnalysis ? { - taskType: draftAnalysis.taskType, - intentSummary: draftAnalysis.intentSummary, - relevantFiles: draftAnalysis.relevantFiles, - suggestedApproach: draftAnalysis.suggestedApproach, - projectStats: draftAnalysis.projectStats, - } : undefined, - }); - } finally { - clearInterval(progressTimer); - } - - await ctx.reportToDiscord(planner.formatPlannerResult(result)); - - if (!result.success) { - console.error(`[AutonomousRunner] Planner failed: ${result.error}`); - broadcastEvent({ type: 'pipeline:stage', data: { taskId, stage: 'decompose', status: 'fail' } }); - return false; - } - - if (!result.needsDecomposition || result.subTasks.length === 0) { - console.log('[AutonomousRunner] Planner determined no decomposition needed'); - return 'no-decomp'; - } - - if (!task.issueId) { - console.error('[AutonomousRunner] Cannot create sub-issues: no parent issueId'); - return false; - } - +/** + * Create Linear sub-issues from an (approved) decomposition: create each + * sub-issue, register tracking for limits, wire dependencies + * (ready→Todo / blocked→Backlog), sync state comments, and trigger an immediate + * heartbeat. Shared by the autonomous `decomposeTask` path and the TUI `/plan` + * dispatch endpoint so both behave identically (no logic fork). The caller must + * have already created the parent issue (`parentIssueId`). + */ +export async function createSubIssuesWithDependencies( + parentIssueId: string, + task: { title: string; issueIdentifier?: string; parentId?: string; linearProject?: { id?: string; name?: string } }, + subTasks: SubTask[], + totalEstimatedMinutes: number, + ctx: { reportToDiscord: (msg: string) => Promise | void; scheduleNextHeartbeat?: () => void }, + taskId: string, + dailyLimit: number, +): Promise { const createdSubIssues: Array<{ id: string; identifier: string; @@ -372,7 +256,7 @@ export async function decomposeTask( estimatedMinutes: number; }> = []; - for (const [index, subTask] of result.subTasks.entries()) { + for (const [index, subTask] of subTasks.entries()) { const depsStr = subTask.dependencies?.length ? `\n\n${t('runner.decomposition.prerequisite', { deps: subTask.dependencies.join(', ') })}` : ''; @@ -382,7 +266,7 @@ export async function decomposeTask( t('runner.decomposition.autoDecomposed', { parentTitle: task.title }); const subResult = await linear.createSubIssue( - task.issueId, + parentIssueId, subTask.title, subDescription, { @@ -417,20 +301,20 @@ export async function decomposeTask( // Register decomposition in tracking (for limits) registerDecomposition( - task.issueId, + parentIssueId, task.parentId, // Parent ID if this task is also a sub-issue createdSubIssues.map(s => s.id) ); - console.log(`[AutonomousRunner] Registered decomposition: parent=${task.issueId}, children=${createdSubIssues.length}, daily=${getDailyCreationCount()}/${dailyLimit}`); + console.log(`[AutonomousRunner] Registered decomposition: parent=${parentIssueId}, children=${createdSubIssues.length}, daily=${getDailyCreationCount()}/${dailyLimit}`); await linear.markAsDecomposed( - task.issueId, + parentIssueId, createdSubIssues.length, - result.totalEstimatedMinutes + totalEstimatedMinutes ); const childIdByTitle = new Map(createdSubIssues.map((subIssue) => [subIssue.title, subIssue.id])); - const parentState = markTaskDecomposed(task.issueId, { + const parentState = markTaskDecomposed(parentIssueId, { issueIdentifier: task.issueIdentifier, title: task.title, projectId: task.linearProject?.id, @@ -440,7 +324,7 @@ export async function decomposeTask( }); await linear.addComment( - task.issueId, + parentIssueId, buildTaskStateSyncComment(parentState, 'Parent task decomposed') ); @@ -449,10 +333,10 @@ export async function decomposeTask( .join('\n'); await ctx.reportToDiscord(t('runner.decomposition.completed', { - original: task.issueIdentifier || task.issueId || '', + original: task.issueIdentifier || parentIssueId || '', count: String(createdSubIssues.length), list: subIssueList, - totalMinutes: String(result.totalEstimatedMinutes), + totalMinutes: String(totalEstimatedMinutes), })); broadcastEvent({ type: 'pipeline:stage', data: { taskId, stage: 'decompose', status: 'complete' } }); @@ -473,7 +357,7 @@ export async function decomposeTask( title: subIssue.title, projectId: task.linearProject?.id, projectName: task.linearProject?.name, - parentIssueId: task.issueId, + parentIssueId: parentIssueId, dependencyIssueIds, dependencyTitles: subIssue.dependencies, topoRank: subIssue.topoRank, @@ -513,6 +397,153 @@ export async function decomposeTask( return true; } +export async function decomposeTask( + ctx: ExecutionContext, + task: TaskItem, + projectPath: string, + targetMinutes: number, + draftAnalysis?: DraftAnalysis, +): Promise { + console.log(`[AutonomousRunner] Decomposing task: ${task.title}`); + + const taskId = task.issueId || task.id; + const maxDepth = ctx.decompositionMaxDepth ?? 2; + const maxChildren = ctx.decompositionMaxChildren ?? 5; + const dailyLimit = ctx.decompositionDailyLimit ?? 20; + const autoBacklog = ctx.decompositionAutoBacklog ?? true; + + // ============================================ + // Pre-checks: Depth, Children, Daily Limit + // ============================================ + + // Check decomposition depth limit + if (task.issueId) { + const currentDepth = getDecompositionDepth(task.issueId); + if (currentDepth >= maxDepth) { + console.log(`[AutonomousRunner] Decomposition depth limit reached: ${currentDepth}/${maxDepth}`); + if (autoBacklog && task.issueId) { + try { + await linear.updateIssueState(task.issueId, 'Backlog'); + await linear.addComment(task.issueId, + `⚠️ **Auto-moved to Backlog**\n\n` + + `Reason: Decomposition depth limit reached (${currentDepth}/${maxDepth})\n\n` + + `This task has been nested too deeply. Please review and simplify the task structure, ` + + `or handle it manually.` + ); + console.log(`[AutonomousRunner] Task moved to backlog (depth limit)`); + } catch (err) { + console.error(`[AutonomousRunner] Failed to move to backlog:`, err); + } + } + return false; + } + + // Check children count limit + const childrenCount = getChildrenCount(task.issueId); + if (childrenCount >= maxChildren) { + console.log(`[AutonomousRunner] Children count limit reached: ${childrenCount}/${maxChildren}`); + if (autoBacklog) { + try { + await linear.updateIssueState(task.issueId, 'Backlog'); + await linear.addComment(task.issueId, + `⚠️ **Auto-moved to Backlog**\n\n` + + `Reason: Too many sub-issues already created (${childrenCount}/${maxChildren})\n\n` + + `This task has generated too many sub-issues. Please review the decomposition strategy, ` + + `or handle it manually.` + ); + console.log(`[AutonomousRunner] Task moved to backlog (children limit)`); + } catch (err) { + console.error(`[AutonomousRunner] Failed to move to backlog:`, err); + } + } + return false; + } + } + + // Check daily creation limit + // NOTE: Don't move to Backlog on daily limit — it resets tomorrow. + // Moving to Backlog would permanently exclude the task from future heartbeats. + // Instead, skip decomposition and fall through to direct execution. + if (!canCreateMoreIssues(dailyLimit)) { + const currentCount = getDailyCreationCount(); + console.log(`[AutonomousRunner] Daily issue creation limit reached: ${currentCount}/${dailyLimit} — skipping decomposition (will retry tomorrow)`); + return false; + } + + broadcastEvent({ type: 'pipeline:stage', data: { taskId, stage: 'decompose', status: 'start' } }); + + await ctx.reportToDiscord(t('runner.decomposition.starting', { + title: task.title, + estimated: String(planner.estimateTaskDuration(task)), + threshold: String(targetMinutes), + })); + + // Periodic progress log while planner runs (fallback if stdout isn't streaming) + let elapsed = 0; + const progressTimer = setInterval(() => { + elapsed += 30; + broadcastEvent({ type: 'log', data: { taskId, stage: 'decompose', line: `⏱ Planner running... ${elapsed}s` } }); + }, 30000); + + // KG 영향 분석 — Draft가 이미 가지고 있으면 재사용 + const impactAnalysis = draftAnalysis?.impactAnalysis + ?? await analyzeIssue(projectPath, task.title, task.description).catch(() => null); + + let result: Awaited>; + try { + result = await planner.runPlanner({ + taskTitle: task.title, + taskDescription: task.description || '', + projectPath, + projectName: task.linearProject?.name, + targetMinutes, + // Planner runs through the configured adapter loop now (not claude -p); + // leave model unset to use the adapter default when no planner model is configured. + model: ctx.plannerModel, + timeoutMs: ctx.plannerTimeoutMs ?? 600000, + onLog: (line: string) => broadcastEvent({ type: 'log', data: { taskId, stage: 'decompose', line } }), + impactAnalysis: impactAnalysis ?? undefined, + draftAnalysis: draftAnalysis ? { + taskType: draftAnalysis.taskType, + intentSummary: draftAnalysis.intentSummary, + relevantFiles: draftAnalysis.relevantFiles, + suggestedApproach: draftAnalysis.suggestedApproach, + projectStats: draftAnalysis.projectStats, + } : undefined, + }); + } finally { + clearInterval(progressTimer); + } + + await ctx.reportToDiscord(planner.formatPlannerResult(result)); + + if (!result.success) { + console.error(`[AutonomousRunner] Planner failed: ${result.error}`); + broadcastEvent({ type: 'pipeline:stage', data: { taskId, stage: 'decompose', status: 'fail' } }); + return false; + } + + if (!result.needsDecomposition || result.subTasks.length === 0) { + console.log('[AutonomousRunner] Planner determined no decomposition needed'); + return 'no-decomp'; + } + + if (!task.issueId) { + console.error('[AutonomousRunner] Cannot create sub-issues: no parent issueId'); + return false; + } + + return createSubIssuesWithDependencies( + task.issueId, + task, + result.subTasks, + result.totalEstimatedMinutes, + ctx, + taskId, + dailyLimit, + ); +} + // Pipeline Execution export async function executePipeline( diff --git a/src/support/chat.ts b/src/support/chat.ts index 901b258..17fde19 100644 --- a/src/support/chat.ts +++ b/src/support/chat.ts @@ -13,6 +13,7 @@ import { existsSync } from 'node:fs'; import { loadConfig } from '../core/config.js'; import { getDefaultAdapterName, type AdapterName } from '../adapters/index.js'; import { getDefaultChatModel, resolveChatModel, runChatCompletion, shortenChatModel } from './chatBackend.js'; +import { runPlanCommand, type PlanIO } from './planCommand.js'; const CHAT_DIR = resolve(homedir(), '.openswarm', 'chat'); @@ -106,6 +107,7 @@ async function chat(session: Session, userMessage: string): Promise { async function handleCommand( cmd: string, session: Session, + rl: readline.Interface, ): Promise<'exit' | 'handled'> { const [command, ...args] = cmd.slice(1).split(' '); @@ -190,6 +192,24 @@ async function handleCommand( return 'handled'; } + case 'plan': { + const goal = args.join(' ').trim(); + if (!goal) { + console.log(`${DIM}Usage: /plan ${RESET}`); + return 'handled'; + } + const io: PlanIO = { + print: (line: string) => console.log(line), + confirm: async (p: string): Promise<'yes' | 'no' | 'edit'> => { + const a = (await rl.question(`${CYAN}${p}${RESET} `)).trim().toLowerCase(); + return a === 'y' || a === 'yes' ? 'yes' : a === 'e' || a === 'edit' ? 'edit' : 'no'; + }, + promptText: async (p: string): Promise => (await rl.question(`${CYAN}${p}${RESET} `)).trim(), + }; + await runPlanCommand(goal, io, { projectPath: process.cwd() }); + return 'handled'; + } + case 'info': case 'status': console.log(`${BOLD}Session:${RESET} ${session.id}`); @@ -203,6 +223,7 @@ async function handleCommand( case '?': console.log(` ${BOLD}Commands:${RESET} + ${CYAN}/plan ${RESET} Decompose a goal & dispatch it to the loop ${CYAN}/clear${RESET} Clear conversation ${CYAN}/save [name]${RESET} Save session ${CYAN}/load [name]${RESET} List/load sessions @@ -279,7 +300,7 @@ async function main(): Promise { } if (trimmed.startsWith('/')) { - if (await handleCommand(trimmed, session) === 'exit') break; + if (await handleCommand(trimmed, session, rl) === 'exit') break; continue; } diff --git a/src/support/chatTui.ts b/src/support/chatTui.ts index 0af493c..3d21f4e 100644 --- a/src/support/chatTui.ts +++ b/src/support/chatTui.ts @@ -10,6 +10,7 @@ import { existsSync } from 'node:fs'; import { loadConfig } from '../core/config.js'; import { getDefaultAdapterName, type AdapterName } from '../adapters/index.js'; import { getDefaultChatModel, resolveChatModel, runChatCompletion, shortenChatModel } from './chatBackend.js'; +import { runPlanCommand, type PlanIO } from './planCommand.js'; // Constants const CHAT_DIR = resolve(homedir(), '.openswarm', 'chat'); @@ -60,6 +61,8 @@ type AppState = { avgTokensPerSec: number; totalRequests: number; }; + /** When set, the next submitted input line is routed here (e.g. /plan approval) instead of chat. */ + pendingInput?: (value: string) => void; }; // Session Management async function ensureChatDir(): Promise { @@ -1028,12 +1031,45 @@ async function handleCommand( break; } + case 'plan': { + const goal = args.join(' ').trim(); + if (!goal) { + ui.chatLog.log(''); + ui.chatLog.log(' {#fbbf24-fg}Usage: /plan {/}'); + ui.chatLog.log(''); + safeRender(); + break; + } + const io: PlanIO = { + print: (line: string) => { + ui.chatLog.log(line ? ` ${line}` : ''); + safeRender(); + }, + confirm: (prompt: string) => new Promise<'yes' | 'no' | 'edit'>((resolve) => { + ui.chatLog.log(` {#fbbf24-fg}${prompt}{/}`); + safeRender(); + state.pendingInput = (v: string) => { + const a = v.trim().toLowerCase(); + resolve(a === 'y' || a === 'yes' ? 'yes' : a === 'e' || a === 'edit' ? 'edit' : 'no'); + }; + }), + promptText: (prompt: string) => new Promise((resolve) => { + ui.chatLog.log(` {#fbbf24-fg}${prompt}{/}`); + safeRender(); + state.pendingInput = (v: string) => resolve(v); + }), + }; + await runPlanCommand(goal, io, { projectPath: process.cwd() }); + break; + } + case 'help': case 'h': case '?': ui.chatLog.log(''); ui.chatLog.log(' {bold}Available Commands{/bold}'); ui.chatLog.log(''); + ui.chatLog.log(' {#60a5fa-fg}/plan{/} Decompose a goal & dispatch it to the loop'); ui.chatLog.log(' {#60a5fa-fg}/clear{/} Clear conversation'); ui.chatLog.log(' {#60a5fa-fg}/provider{/} [id] Change provider {#718096-fg}(claude/codex){/}'); ui.chatLog.log(' {#60a5fa-fg}/model{/} [name] Change model {#718096-fg}(sonnet/haiku/opus){/}'); @@ -1214,6 +1250,14 @@ export async function main(): Promise { ui.inputBox.focus(); safeRender(); + // An in-progress /plan approval consumes the next line, not chat/commands. + if (state.pendingInput) { + const resolve = state.pendingInput; + state.pendingInput = undefined; + resolve(trimmed); + return; + } + if (trimmed.startsWith('/')) { await handleCommand(trimmed, state, ui); } else { diff --git a/src/support/planCommand.test.ts b/src/support/planCommand.test.ts new file mode 100644 index 0000000..687a1ea --- /dev/null +++ b/src/support/planCommand.test.ts @@ -0,0 +1,136 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { runPlanCommand, type PlanIO } from './planCommand.js'; +import * as planner from './planner.js'; + +vi.mock('./planner.js', () => ({ runPlanner: vi.fn() })); +const mockedRunPlanner = vi.mocked(planner.runPlanner); + +interface FakeSub { + title: string; + description: string; + estimatedMinutes: number; + priority: number; + dependencies?: string[]; +} + +/** A scripted PlanIO: returns queued confirm answers / edit texts, records prints. */ +function makeIO(answers: Array<'yes' | 'no' | 'edit'>, texts: string[] = []) { + const out: string[] = []; + let ai = 0; + let ti = 0; + const io: PlanIO = { + print: (l) => { out.push(l); }, + confirm: async () => answers[ai++] ?? 'no', + promptText: async () => texts[ti++] ?? '', + }; + return { io, out }; +} + +function plannerResult(subTasks: FakeSub[], needsDecomposition = true) { + return { + success: true, + originalIssue: 'g', + needsDecomposition, + subTasks, + totalEstimatedMinutes: subTasks.reduce((s, t) => s + (t.estimatedMinutes || 0), 0), + }; +} + +function bodyOf(fetchMock: ReturnType, call = 0) { + return JSON.parse((fetchMock.mock.calls[call][1] as { body: string }).body); +} + +afterEach(() => { + vi.unstubAllGlobals(); + vi.clearAllMocks(); +}); + +describe('runPlanCommand', () => { + it('dispatches the approved sub-tasks on yes', async () => { + mockedRunPlanner.mockResolvedValue(plannerResult([ + { title: 'A', description: 'da', estimatedMinutes: 10, priority: 2 }, + { title: 'B', description: 'db', estimatedMinutes: 15, priority: 3, dependencies: ['A'] }, + ]) as never); + const fetchMock = vi.fn(async () => + new Response(JSON.stringify({ mode: 'linear', parentIssue: { identifier: 'INT-9' } }), { status: 200 }), + ); + vi.stubGlobal('fetch', fetchMock); + + const { io, out } = makeIO(['yes']); + await runPlanCommand('build X', io, { projectPath: '/tmp/proj' }); + + expect(fetchMock).toHaveBeenCalledOnce(); + expect(String(fetchMock.mock.calls[0][0])).toContain('/api/plan/dispatch'); + const body = bodyOf(fetchMock); + expect(body.goal).toBe('build X'); + expect(body.projectPath).toBe('/tmp/proj'); + expect(body.subTasks).toHaveLength(2); + expect(out.join('\n')).toContain('INT-9'); + }); + + it('does not dispatch on no', async () => { + mockedRunPlanner.mockResolvedValue( + plannerResult([{ title: 'A', description: 'd', estimatedMinutes: 5, priority: 2 }]) as never, + ); + const fetchMock = vi.fn(); + vi.stubGlobal('fetch', fetchMock); + + const { io, out } = makeIO(['no']); + await runPlanCommand('g', io, {}); + + expect(fetchMock).not.toHaveBeenCalled(); + expect(out.join('\n')).toContain('Cancelled'); + }); + + it('drops a sub-task on edit, then dispatches the remainder', async () => { + mockedRunPlanner.mockResolvedValue(plannerResult([ + { title: 'A', description: 'da', estimatedMinutes: 10, priority: 2 }, + { title: 'B', description: 'db', estimatedMinutes: 15, priority: 3 }, + ]) as never); + const fetchMock = vi.fn(async () => + new Response(JSON.stringify({ mode: 'exec', taskIds: ['t1'] }), { status: 202 }), + ); + vi.stubGlobal('fetch', fetchMock); + + // edit → drop #2 → yes + const { io } = makeIO(['edit', 'yes'], ['2']); + await runPlanCommand('g', io, {}); + + const body = bodyOf(fetchMock); + expect(body.subTasks).toHaveLength(1); + expect(body.subTasks[0].title).toBe('A'); + }); + + it('uses the single-task path when no decomposition is needed', async () => { + mockedRunPlanner.mockResolvedValue(plannerResult([], false) as never); + const fetchMock = vi.fn(async () => + new Response(JSON.stringify({ mode: 'linear', parentIssue: { identifier: 'INT-1' } }), { status: 200 }), + ); + vi.stubGlobal('fetch', fetchMock); + + const { io } = makeIO(['yes']); + await runPlanCommand('small task', io, {}); + + expect(fetchMock).toHaveBeenCalledOnce(); + expect(bodyOf(fetchMock).subTasks).toEqual([]); + }); + + it('reports a planner failure without dispatching', async () => { + mockedRunPlanner.mockResolvedValue({ + success: false, + error: 'boom', + originalIssue: 'g', + needsDecomposition: false, + subTasks: [], + totalEstimatedMinutes: 0, + } as never); + const fetchMock = vi.fn(); + vi.stubGlobal('fetch', fetchMock); + + const { io, out } = makeIO([]); + await runPlanCommand('g', io, {}); + + expect(fetchMock).not.toHaveBeenCalled(); + expect(out.join('\n')).toContain('boom'); + }); +}); diff --git a/src/support/planCommand.ts b/src/support/planCommand.ts new file mode 100644 index 0000000..d9dc79f --- /dev/null +++ b/src/support/planCommand.ts @@ -0,0 +1,153 @@ +// ============================================ +// OpenSwarm - Shared `/plan` cockpit logic +// ============================================ +// +// The TUI's planner cockpit: turn a goal into a previewed decomposition, gate on +// human approval, then dispatch to the daemon loop. Shared by the readline +// (chat.ts) and blessed (chatTui.ts) front-ends — each supplies its own I/O via +// the `PlanIO` interface, so the planning/approval/dispatch logic lives in one +// place (in keeping with the adapter-parser dedup, INT-1441). +// +// The Planner itself (runPlanner) and the dispatch engine (POST /api/plan/dispatch +// → createSubIssuesWithDependencies / exec pipeline) already exist; this only adds +// the human-in-the-loop approval surface. + +import { runPlanner, type SubTask } from './planner.js'; + +const DAEMON_BASE = 'http://127.0.0.1:3847'; + +export interface PlanIO { + /** Print one line to the surface (chatLog / stdout). */ + print(line: string): void; + /** Ask the user to approve the plan. Returns the chosen action. */ + confirm(prompt: string): Promise<'yes' | 'no' | 'edit'>; + /** Optional free-text prompt — used by 'edit' to drop sub-tasks. If absent, 'edit' cancels. */ + promptText?(prompt: string): Promise; +} + +export interface PlanCommandOptions { + projectPath?: string; + model?: string; +} + +/** + * Run the `/plan ` flow: plan → render → approve/edit → dispatch. + */ +export async function runPlanCommand( + goal: string, + io: PlanIO, + opts: PlanCommandOptions = {}, +): Promise { + const trimmed = goal.trim(); + if (!trimmed) { + io.print('Usage: /plan '); + return; + } + const projectPath = opts.projectPath ?? process.cwd(); + + io.print(`🧭 Planning: ${trimmed}`); + io.print(' (running the Planner — may take up to ~2 min; requires the claude CLI)'); + + const result = await runPlanner({ + taskTitle: trimmed, + taskDescription: trimmed, + projectPath, + model: opts.model, + }); + + if (!result.success) { + io.print(`✖ Planner failed: ${result.error ?? 'unknown error'}`); + return; + } + + // No decomposition → dispatch the goal itself as a single task. + if (!result.needsDecomposition || result.subTasks.length === 0) { + io.print(`• No decomposition needed — single task (~${result.totalEstimatedMinutes || '?'} min).`); + const decision = await io.confirm('Dispatch this goal as one task? [y/n]'); + if (decision !== 'yes') { + io.print('Cancelled.'); + return; + } + await dispatch(trimmed, [], projectPath, io); + return; + } + + // Render + approval loop. 'edit' drops sub-tasks by number, then re-renders. + let subTasks: SubTask[] = [...result.subTasks]; + for (;;) { + renderPlan(trimmed, subTasks, io); + const decision = await io.confirm('Approve & dispatch this plan? [y/n/edit]'); + + if (decision === 'no') { + io.print('Cancelled.'); + return; + } + + if (decision === 'edit') { + if (!io.promptText) { + io.print('Editing is not available on this surface — cancelling.'); + return; + } + const raw = await io.promptText('Sub-task numbers to DROP (comma-separated), or blank to keep all:'); + const drop = new Set( + raw.split(',').map((s) => parseInt(s.trim(), 10)).filter((n) => !Number.isNaN(n)), + ); + if (drop.size > 0) { + subTasks = subTasks.filter((_, i) => !drop.has(i + 1)); + if (subTasks.length === 0) { + io.print('All sub-tasks dropped — cancelling.'); + return; + } + } + continue; + } + + // 'yes' + await dispatch(trimmed, subTasks, projectPath, io); + return; + } +} + +function renderPlan(goal: string, subTasks: SubTask[], io: PlanIO): void { + const total = subTasks.reduce((sum, t) => sum + (t.estimatedMinutes || 0), 0); + io.print(''); + io.print(`📋 Plan for: ${goal}`); + io.print(` ${subTasks.length} sub-task(s), ~${total} min total`); + subTasks.forEach((st, i) => { + const deps = st.dependencies?.length ? ` ⮑ after: ${st.dependencies.join(', ')}` : ''; + io.print(` ${i + 1}. [P${st.priority} · ${st.estimatedMinutes}m] ${st.title}${deps}`); + }); + io.print(''); +} + +async function dispatch( + goal: string, + subTasks: SubTask[], + projectPath: string, + io: PlanIO, +): Promise { + try { + const res = await fetch(`${DAEMON_BASE}/api/plan/dispatch`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ goal, projectPath, subTasks }), + }); + if (!res.ok) { + const err = (await res.json().catch(() => ({ error: `HTTP ${res.status}` }))) as { error?: string }; + io.print(`✖ Dispatch failed: ${err.error ?? res.status}`); + return; + } + const data = (await res.json()) as { + mode: string; + parentIssue?: { identifier?: string }; + taskIds?: string[]; + }; + if (data.mode === 'linear') { + io.print(`✅ Dispatched to Linear (${data.parentIssue?.identifier ?? 'parent issue'}). The daemon will pick it up — watch the Tasks tab.`); + } else { + io.print(`✅ Dispatched ${data.taskIds?.length ?? 0} task(s) to the exec pipeline — watch the Tasks tab.`); + } + } catch { + io.print('✖ Could not reach the daemon at :3847. Start it with `openswarm start`, then retry.'); + } +} diff --git a/src/support/planner.test.ts b/src/support/planner.test.ts new file mode 100644 index 0000000..6fd42c6 --- /dev/null +++ b/src/support/planner.test.ts @@ -0,0 +1,61 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest'; +import { initLocale } from '../locale/index.js'; + +// Mock the adapter layer so runPlanner exercises the agentic-loop path without a real model. +vi.mock('../adapters/index.js', () => ({ + getAdapter: vi.fn(() => ({ name: 'mock' })), + spawnCli: vi.fn(), +})); + +import { runPlanner } from './planner.js'; +import * as adapters from '../adapters/index.js'; + +const mockedSpawnCli = vi.mocked(adapters.spawnCli); + +function cliResult(stdout: string, exitCode = 0) { + return { exitCode, stdout, stderr: exitCode === 0 ? '' : 'boom', durationMs: 1 }; +} + +const PLAN_JSON = + '```json\n{"needsDecomposition":true,"subTasks":[{"title":"A","description":"d","estimatedMinutes":20,"priority":2}],"totalEstimatedMinutes":20}\n```'; + +beforeAll(() => { initLocale('en'); }); +afterEach(() => { vi.clearAllMocks(); }); + +describe('runPlanner — agentic loop migration', () => { + it('parses a decomposition from the loop plain-text JSON output', async () => { + mockedSpawnCli.mockResolvedValue(cliResult(`Here is the plan:\n${PLAN_JSON}`) as never); + const res = await runPlanner({ taskTitle: 'big task', taskDescription: 'do it', projectPath: '/tmp/x' }); + expect(res.success).toBe(true); + expect(res.needsDecomposition).toBe(true); + expect(res.subTasks).toHaveLength(1); + expect(res.subTasks[0].title).toBe('A'); + }); + + it('runs read-only and multi-turn (guard appended, maxTurns > 1)', async () => { + mockedSpawnCli.mockResolvedValue(cliResult(PLAN_JSON) as never); + await runPlanner({ taskTitle: 't', taskDescription: 'd', projectPath: '/tmp/x' }); + const opts = mockedSpawnCli.mock.calls[0][1]; + expect(opts.prompt).toContain('PLANNING ONLY'); + expect(opts.maxTurns).toBeGreaterThan(1); + }); + + it('drops a Claude-CLI model id (claude-*) → adapter default', async () => { + mockedSpawnCli.mockResolvedValue(cliResult(PLAN_JSON) as never); + await runPlanner({ taskTitle: 't', taskDescription: 'd', projectPath: '/tmp/x', model: 'claude-opus-4-7' }); + expect(mockedSpawnCli.mock.calls[0][1].model).toBeUndefined(); + }); + + it('keeps an OpenRouter-style Claude model id (org-prefixed)', async () => { + mockedSpawnCli.mockResolvedValue(cliResult(PLAN_JSON) as never); + await runPlanner({ taskTitle: 't', taskDescription: 'd', projectPath: '/tmp/x', model: 'anthropic/claude-opus-4' }); + expect(mockedSpawnCli.mock.calls[0][1].model).toBe('anthropic/claude-opus-4'); + }); + + it('returns failure when the adapter errors with no output', async () => { + mockedSpawnCli.mockResolvedValue(cliResult('', 1) as never); + const res = await runPlanner({ taskTitle: 't', taskDescription: 'd', projectPath: '/tmp/x' }); + expect(res.success).toBe(false); + expect(res.error).toBeTruthy(); + }); +}); diff --git a/src/support/planner.ts b/src/support/planner.ts index c68c97c..9ff7eb8 100644 --- a/src/support/planner.ts +++ b/src/support/planner.ts @@ -3,13 +3,13 @@ // Decompose large issues into 30-min sub-tasks // ============================================ -import { spawn } from 'node:child_process'; -import { writeFileSync, unlinkSync } from 'node:fs'; import type { TaskItem } from '../orchestration/decisionEngine.js'; -import { type CostInfo, extractCostFromStreamJson, formatCost } from './costTracker.js'; +import type { CostInfo } from './costTracker.js'; import { t, getPrompts } from '../locale/index.js'; import type { ImpactAnalysis } from '../knowledge/types.js'; -import { buildWorkerEnv } from '../adapters/envPath.js'; +import type { AdapterName } from '../adapters/types.js'; +import { getAdapter, spawnCli } from '../adapters/index.js'; +import { expandPath } from '../core/config.js'; // Types @@ -20,6 +20,8 @@ export interface PlannerOptions { projectName?: string; timeoutMs?: number; model?: string; + adapterName?: AdapterName; // CLI adapter (default: configured default) + maxTurns?: number; // Max agentic turns for read-only exploration (default 15) targetMinutes?: number; // Target time per sub-task (default 25 min) onLog?: (line: string) => void; // Stream planner stdout to dashboard impactAnalysis?: ImpactAnalysis; // KG 영향 분석 (파일 분리 유도) @@ -68,27 +70,57 @@ function buildPlannerPrompt(options: PlannerOptions): string { // Planner Execution /** - * Run Planner agent + * Read-only guard appended to the planner prompt. The planner runs through the + * same agentic loop as the worker (no built-in read-only mode), so the prompt + * keeps it from mutating the repo — it should explore (read_file/search_files) + * and emit the JSON plan, nothing else. + */ +const READ_ONLY_GUARD = + '\n\n---\n' + + 'IMPORTANT — you are PLANNING ONLY. Do NOT edit, write, or create any files. ' + + 'Use read_file and search_files to understand the codebase first, then output your ' + + 'decomposition as the required JSON (in a ```json block) as your final message.'; + +/** + * Run the Planner through the OpenSwarm agentic loop (the same path as the + * Worker), read-only and multi-turn. Replaces the former `claude -p --max-turns 1` + * shell-out — removes the claude-binary dependency and lets the planner read the + * code before decomposing. The `PlannerResult` contract is unchanged. */ export async function runPlanner(options: PlannerOptions): Promise { - const prompt = buildPlannerPrompt(options); + const prompt = buildPlannerPrompt(options) + READ_ONLY_GUARD; try { - const output = await runClaudeCli( + const adapter = getAdapter(options.adapterName); + const cwd = expandPath(options.projectPath); + // Drop Claude-CLI-style model ids (e.g. `claude-opus-4-7`) — they are not + // valid on the openrouter/local adapters; fall back to the adapter default. + // OpenRouter Claude ids carry an org prefix (`anthropic/claude-...`) and pass. + const model = options.model && !options.model.startsWith('claude-') ? options.model : undefined; + + const raw = await spawnCli(adapter, { prompt, - options.timeoutMs ?? 120000, // 2 min timeout - options.model, - options.onLog - ); - - const costInfo = extractCostFromStreamJson(output); - if (costInfo) { - console.log(`[Planner] Cost: ${formatCost(costInfo)}`); + cwd, + timeoutMs: options.timeoutMs ?? 600_000, + model, + maxTurns: options.maxTurns ?? 15, + onLog: options.onLog ? (line: string) => options.onLog!(humanizePlannerOutput(line)) : undefined, + systemPrompt: getPrompts().systemPrompt, + // Planner is a judgment role — keep reasoning ON (unlike the worker). + }); + + if (raw.exitCode !== 0 && !raw.stdout.trim()) { + return { + success: false, + originalIssue: options.taskTitle, + needsDecomposition: false, + subTasks: [], + totalEstimatedMinutes: 0, + error: raw.stderr.slice(0, 500) || `Planner adapter exited with code ${raw.exitCode}`, + }; } - const result = parsePlannerOutput(output, options.taskTitle); - result.costInfo = costInfo; - return result; + return parsePlannerOutput(raw.stdout, options.taskTitle); } catch (error) { return { success: false, @@ -130,117 +162,6 @@ function humanizePlannerOutput(text: string): string { return trimmed; } -/** - * Run Claude CLI from /tmp to avoid project-specific MCP servers and hooks. - * STONKS has session-start.sh + playwright/pykis/linear MCP servers that - * cause >10min startup when claude runs from the project directory. - */ -async function runClaudeCli( - prompt: string, - timeoutMs: number, - model?: string, - onLog?: (line: string) => void -): Promise { - const tmpFile = `/tmp/planner-prompt-${Date.now()}.txt`; - writeFileSync(tmpFile, prompt); - - const args = ['--output-format', 'stream-json', '--verbose', '--max-turns', '1']; - if (model) { - args.push('--model', model); - } - args.push('-p', prompt); - - return new Promise((resolve, reject) => { - const proc = spawn( - 'claude', - args, - { - shell: false, - cwd: '/tmp', // Neutral dir — no project .claude/ settings loaded - env: buildWorkerEnv(process.env), - stdio: ['ignore', 'pipe', 'pipe'], - } - ); - - let output = ''; - let stderrOutput = ''; - - proc.stdout?.on('data', (chunk: Buffer) => { - const text = chunk.toString(); - output += text; - if (onLog) { - // Stream assistant text lines to dashboard - for (const line of text.split('\n').filter((l: string) => l.trim())) { - try { - const event = JSON.parse(line); - if (event.type === 'assistant' && event.message?.content) { - for (const block of event.message.content) { - if (block.type === 'text') { - // Convert planner JSON result to human-readable summary - const humanized = humanizePlannerOutput(block.text); - onLog(humanized); - } - } - } - } catch { - // Not a JSON line — skip raw stream noise (tool calls, etc) - if (!line.startsWith('{') && !line.startsWith('[')) { - onLog(line); - } - } - } - } - }); - - proc.stderr?.on('data', (chunk: Buffer) => { - const text = chunk.toString(); - stderrOutput += text; - // Log stderr in real-time for debugging - console.error('[Planner stderr]', text.slice(0, 500)); - }); - - const timer = setTimeout(() => { - proc.kill('SIGTERM'); - reject(new Error(`Planner timeout after ${timeoutMs}ms`)); - }, timeoutMs); - - proc.on('close', (code: number | null) => { - clearTimeout(timer); - try { unlinkSync(tmpFile); } catch { /* ignore */ } - - // Success: exit code 0, or non-zero but we got parseable output - if (code === 0) { - resolve(output); - return; - } - - // Non-zero exit: check if we got usable output anyway - if (output.trim()) { - console.warn(`[Planner] Non-zero exit (${code}) but got output, attempting to parse`); - if (stderrOutput.trim()) { - console.warn('[Planner] stderr:', stderrOutput.slice(0, 500)); - } - resolve(output); - return; - } - - // Complete failure: no output and non-zero exit - const errorMsg = stderrOutput.trim() || 'No error output captured'; - const truncatedStderr = errorMsg.length > 1000 ? errorMsg.slice(0, 1000) + '... (truncated)' : errorMsg; - console.error('[Planner] Process exited with code', code); - console.error('[Planner] Full stderr:', errorMsg); - console.error('[Planner] stdout length:', output.length); - reject(new Error(`Claude CLI exited with code ${code}. stderr: ${truncatedStderr}`)); - }); - - proc.on('error', (err: Error) => { - clearTimeout(timer); - try { unlinkSync(tmpFile); } catch { /* ignore */ } - reject(err); - }); - }); -} - /** * Parse Planner output (plain text or stream-json fallback) */ diff --git a/src/support/web.ts b/src/support/web.ts index b47808e..7554325 100644 --- a/src/support/web.ts +++ b/src/support/web.ts @@ -30,6 +30,9 @@ import { initLocale } from '../locale/index.js'; import { runChatCompletion, getDefaultChatModel } from './chatBackend.js'; import { handleGraphQL, isGraphQLRequest } from '../issues/graphql/server.js'; import { ISSUE_BOARD_HTML } from '../issues/issueBoardHtml.js'; +import * as linear from '../linear/index.js'; +import { createSubIssuesWithDependencies } from '../automation/runnerExecution.js'; +import type { SubTask } from './planner.js'; let server: ReturnType | null = null; let runnerRef: AutonomousRunner | undefined; @@ -88,6 +91,81 @@ function cleanupExecTask(taskId: string): void { setTimeout(() => { execTasks.delete(taskId); }, 3600000); // 1 hour } +/** + * Create an in-memory exec task and run it through PairPipeline asynchronously. + * Shared by `POST /api/exec` and the `/api/plan/dispatch` fallback (Path B) so a + * fix to the exec lifecycle applies to both. Returns the taskId immediately; + * status is pollable via GET /api/exec/:taskId. + */ +function startExecTask( + prompt: string, + opts: { projectPath?: string; pipeline?: boolean; workerOnly?: boolean; model?: string } = {}, +): string { + const taskId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; + const resolvedPath = opts.projectPath ?? process.cwd(); + + const entry: ExecTaskEntry = { taskId, status: 'queued', createdAt: Date.now() }; + execTasks.set(taskId, entry); + + // Run pipeline asynchronously + (async () => { + try { + initLocale('en'); + entry.status = 'running'; + + let stages: PipelineStage[]; + if (opts.workerOnly) { + stages = ['worker']; + } else if (opts.pipeline) { + stages = ['worker', 'reviewer', 'tester', 'documenter']; + } else { + stages = ['worker', 'reviewer']; + } + + const roles: Record = {}; + if (opts.model) { + roles.worker = { enabled: true, model: opts.model, timeoutMs: 0 }; + } + + const task: TaskItem = { + id: taskId, + source: 'local', + title: prompt, + description: prompt, + priority: 3, + projectPath: resolvedPath, + createdAt: Date.now(), + }; + + const pipelineInstance = new PairPipeline({ + stages, + maxIterations: 3, + roles: Object.keys(roles).length > 0 ? roles as any : undefined, + }); + + pipelineInstance.on('stage:start', ({ stage }: { stage: string }) => { + entry.currentStage = stage; + }); + + const result: PipelineResult = await pipelineInstance.run(task, resolvedPath); + + entry.status = 'completed'; + entry.result = { + success: result.success, + summary: result.workerResult?.summary, + finalStatus: result.finalStatus, + }; + } catch (err) { + entry.status = 'failed'; + entry.error = err instanceof Error ? err.message : String(err); + } finally { + cleanupExecTask(taskId); + } + })(); + + return taskId; +} + // Pinned + enabled repos persistence const REPOS_FILE = join(homedir(), '.claude', 'openswarm-repos.json'); @@ -947,80 +1025,97 @@ export async function startWebServer(port: number = 3847): Promise { return; } - const taskId = `exec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; - const resolvedPath = projectPath ?? process.cwd(); - - const entry: ExecTaskEntry = { - taskId, - status: 'queued', - createdAt: Date.now(), - }; - execTasks.set(taskId, entry); + const taskId = startExecTask(prompt, { projectPath, pipeline, workerOnly, model }); res.writeHead(202, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ taskId, status: 'queued' })); - // Run pipeline asynchronously - (async () => { - try { - initLocale('en'); - entry.status = 'running'; - - // Determine stages - let stages: PipelineStage[]; - if (workerOnly) { - stages = ['worker']; - } else if (pipeline) { - stages = ['worker', 'reviewer', 'tester', 'documenter']; - } else { - stages = ['worker', 'reviewer']; - } - - // Build role config - const roles: Record = {}; - if (model) { - roles.worker = { enabled: true, model, timeoutMs: 0 }; - } - - const task: TaskItem = { - id: taskId, - source: 'local', - title: prompt, - description: prompt, - priority: 3, - projectPath: resolvedPath, - createdAt: Date.now(), - }; - - const pipelineInstance = new PairPipeline({ - stages, - maxIterations: 3, - roles: Object.keys(roles).length > 0 ? roles as any : undefined, - }); + } catch { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'Invalid JSON' })); + } - pipelineInstance.on('stage:start', ({ stage }: { stage: string }) => { - entry.currentStage = stage; - }); + // ---- Plan dispatch: TUI /plan cockpit → daemon loop ---- + } else if (url === '/api/plan/dispatch' && req.method === 'POST') { + const body = await readBody(req); + try { + const { goal, projectPath, subTasks } = JSON.parse(body) as { + goal: string; + projectPath?: string; + subTasks?: SubTask[]; + }; + if (!goal?.trim()) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'Missing goal' })); + return; + } + const resolvedPath = projectPath ?? process.cwd(); + const tasks = Array.isArray(subTasks) ? subTasks : []; + const triggerHeartbeat = () => { + runnerRef?.heartbeat().catch((e: Error) => console.error('[Web] plan heartbeat error:', e)); + }; + + // Path A — Linear configured: create a parent issue + sub-issues with + // dependency wiring (reusing the autonomous engine), then heartbeat. + if (linear.isLinearInitialized()) { + const parent = await linear.createIssue( + goal, + `Planned via the \`/plan\` cockpit.\n\n${tasks.length} sub-task(s) dispatched.`, + [], + ); + if ('error' in parent) { + res.writeHead(502, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: `Linear: ${parent.error}` })); + return; + } - const result: PipelineResult = await pipelineInstance.run(task, resolvedPath); - - entry.status = 'completed'; - entry.result = { - success: result.success, - summary: result.workerResult?.summary, - finalStatus: result.finalStatus, - }; - } catch (err) { - entry.status = 'failed'; - entry.error = err instanceof Error ? err.message : String(err); - } finally { - cleanupExecTask(taskId); + if (tasks.length === 0) { + // Planner saw no decomposition — run the goal itself as one task. + await linear.updateIssueState(parent.id, 'Todo').catch(() => {}); + triggerHeartbeat(); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + mode: 'linear', + parentIssue: { id: parent.id, identifier: parent.identifier }, + subIssues: [], + })); + return; } - })(); - } catch { + const totalMinutes = tasks.reduce((sum, t) => sum + (t.estimatedMinutes || 0), 0); + await createSubIssuesWithDependencies( + parent.id, + { title: goal }, + tasks, + totalMinutes, + { reportToDiscord: () => {}, scheduleNextHeartbeat: triggerHeartbeat }, + parent.id, + 20, + ); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + mode: 'linear', + parentIssue: { id: parent.id, identifier: parent.identifier }, + })); + return; + } + + // Path B (fallback) — no Linear: run each sub-task via the exec pipeline. + const items: SubTask[] = tasks.length > 0 + ? tasks + : [{ title: goal, description: goal, estimatedMinutes: 0, priority: 3 }]; + const taskIds = items.map((st) => + startExecTask(`${st.title}\n\n${st.description ?? ''}`.trim(), { + projectPath: resolvedPath, + pipeline: true, + }), + ); + res.writeHead(202, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ mode: 'exec', taskIds })); + + } catch (err) { res.writeHead(400, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ error: 'Invalid JSON' })); + res.end(JSON.stringify({ error: err instanceof Error ? err.message : 'Invalid JSON' })); } // ---- Exec: task status ----