diff --git a/apps/kimi-code/src/tui/components/dialogs/approval-panel.ts b/apps/kimi-code/src/tui/components/dialogs/approval-panel.ts index 1f1a55403..670e612cf 100644 --- a/apps/kimi-code/src/tui/components/dialogs/approval-panel.ts +++ b/apps/kimi-code/src/tui/components/dialogs/approval-panel.ts @@ -379,6 +379,18 @@ export class ApprovalPanelComponent extends Container implements Focusable { } else { lines.push(indent(strong(` ${labelWithNum}`))); } + + // Optional helper text under the label, aligned past the pointer/number. + // Choices without a description render exactly as before. + if ( + option.description !== undefined && + option.description.length > 0 && + !(this.feedbackMode && option.requires_feedback === true && isSelected) + ) { + for (const descLine of wrapTextWithAnsi(option.description, Math.max(20, width - 7))) { + lines.push(indent(` ${dim(descLine)}`)); + } + } } lines.push(''); diff --git a/apps/kimi-code/src/tui/components/dialogs/goal-start-permission-prompt.ts b/apps/kimi-code/src/tui/components/dialogs/goal-start-permission-prompt.ts index c7cc39923..e60d85ce0 100644 --- a/apps/kimi-code/src/tui/components/dialogs/goal-start-permission-prompt.ts +++ b/apps/kimi-code/src/tui/components/dialogs/goal-start-permission-prompt.ts @@ -11,7 +11,7 @@ export interface GoalStartPermissionPromptOptions { readonly onCancel: () => void; } -const MANUAL_OPTIONS: readonly StartPermissionOption[] = [ +export const GOAL_START_MANUAL_OPTIONS: readonly StartPermissionOption[] = [ { value: 'auto', label: 'Switch to Auto and start', @@ -37,7 +37,7 @@ const MANUAL_OPTIONS: readonly StartPermissionOption[] = [ }, ]; -const YOLO_OPTIONS: readonly StartPermissionOption[] = [ +export const GOAL_START_YOLO_OPTIONS: readonly StartPermissionOption[] = [ { value: 'auto', label: 'Switch to Auto and start', @@ -57,6 +57,14 @@ const YOLO_OPTIONS: readonly StartPermissionOption[] = [ }, ]; +export function goalStartOptions(mode: 'manual' | 'yolo'): readonly StartPermissionOption[] { + return mode === 'yolo' ? GOAL_START_YOLO_OPTIONS : GOAL_START_MANUAL_OPTIONS; +} + +const MANUAL_OPTIONS = GOAL_START_MANUAL_OPTIONS; + +const YOLO_OPTIONS = GOAL_START_YOLO_OPTIONS; + const MANUAL_NOTICE_LINES = [ 'Manual mode asks you before Kimi Code runs commands, edits files, or takes other risky actions.', 'Manual mode is not suitable for unattended goal work.', diff --git a/apps/kimi-code/src/tui/kimi-tui.ts b/apps/kimi-code/src/tui/kimi-tui.ts index 330f9c7f1..5b76b6d31 100644 --- a/apps/kimi-code/src/tui/kimi-tui.ts +++ b/apps/kimi-code/src/tui/kimi-tui.ts @@ -1477,7 +1477,12 @@ export class KimiTUI { request: ApprovalRequest, response: ApprovalResponse, ): void { - if (request.toolName === 'ExitPlanMode' || request.display.kind === 'plan_review') return; + if ( + request.toolName === 'ExitPlanMode' || + request.display.kind === 'plan_review' || + request.display.kind === 'goal_start' + ) + return; const parts: string[] = []; switch (response.decision) { case 'approved': diff --git a/apps/kimi-code/src/tui/reverse-rpc/approval/adapter.ts b/apps/kimi-code/src/tui/reverse-rpc/approval/adapter.ts index a17e60586..8112534a0 100644 --- a/apps/kimi-code/src/tui/reverse-rpc/approval/adapter.ts +++ b/apps/kimi-code/src/tui/reverse-rpc/approval/adapter.ts @@ -1,6 +1,7 @@ import type { ApprovalRequest, ApprovalResponse, ToolInputDisplay } from '@moonshot-ai/kimi-code-sdk'; import type { ApprovalPanelResponse } from '#/tui/components/dialogs/approval-panel'; +import { goalStartOptions } from '#/tui/components/dialogs/goal-start-permission-prompt'; import type { ApprovalPanelChoice, ApprovalPanelData, DisplayBlock } from '#/tui/reverse-rpc/types'; const DEFAULT_APPROVAL_CHOICES: ApprovalPanelChoice[] = [ @@ -176,6 +177,8 @@ function describeApproval(display: ToolInputDisplay, action: string): string { switch (display.kind) { case 'plan_review': return ''; + case 'goal_start': + return 'Start a goal?'; case 'generic': if (typeof display.detail === 'string' && display.detail.length > 0) { return display.detail; @@ -320,6 +323,13 @@ function adaptDisplay(display: ToolInputDisplay): DisplayBlock[] { ]; case 'plan_review': return []; + case 'goal_start': { + const lines = [`Start goal: ${display.objective}`]; + if (typeof display.completionCriterion === 'string' && display.completionCriterion.length > 0) { + lines.push(`Done when: ${display.completionCriterion}`); + } + return [{ type: 'brief', text: lines.join('\n') }]; + } case 'generic': return []; case 'todo_list': @@ -335,10 +345,36 @@ function adaptChoices(toolName: string, display: ToolInputDisplay): ApprovalPane if (toolName === 'ExitPlanMode' || display.kind === 'plan_review') { return adaptPlanReviewChoices(display); } + if (display.kind === 'goal_start') { + return adaptGoalStartChoices(display); + } return DEFAULT_APPROVAL_CHOICES.map((choice) => cloneChoice(choice)); } +function adaptGoalStartChoices( + display: Extract, +): ApprovalPanelChoice[] { + // Reuse the exact options the /goal start menu shows. Each mode option starts + // the goal under that permission mode (the policy reads selected_label); "Do + // not start" declines so no goal is created. + return goalStartOptions(display.mode).map((option) => + option.value === 'cancel' + ? { + label: option.label, + response: 'cancelled', + selected_label: 'cancel', + description: option.description, + } + : { + label: option.label, + response: 'approved', + selected_label: option.value, + description: option.description, + }, + ); +} + function adaptPlanReviewChoices(display: ToolInputDisplay): ApprovalPanelChoice[] { const optionChoices = display.kind === 'plan_review' && display.options !== undefined && display.options.length >= 2 diff --git a/apps/kimi-code/src/tui/reverse-rpc/types.ts b/apps/kimi-code/src/tui/reverse-rpc/types.ts index c23c938f0..2a41f0df2 100644 --- a/apps/kimi-code/src/tui/reverse-rpc/types.ts +++ b/apps/kimi-code/src/tui/reverse-rpc/types.ts @@ -103,6 +103,9 @@ export interface ApprovalPanelChoice { response: 'approved' | 'approved_for_session' | 'rejected' | 'cancelled'; selected_label?: string | undefined; requires_feedback?: boolean | undefined; + // Optional helper text shown dim beneath the label. Omitted/empty renders + // exactly as a plain label-only choice. + description?: string | undefined; } // ── Approval / Question view payloads ──────────────────────────────── diff --git a/apps/kimi-code/test/tui/components/dialogs/approval-panel.test.ts b/apps/kimi-code/test/tui/components/dialogs/approval-panel.test.ts index 473f3261e..612ec152f 100644 --- a/apps/kimi-code/test/tui/components/dialogs/approval-panel.test.ts +++ b/apps/kimi-code/test/tui/components/dialogs/approval-panel.test.ts @@ -61,6 +61,33 @@ describe('ApprovalPanelComponent', () => { expect(out).not.toContain('y/a/n/f'); }); + it('renders choice descriptions beneath the label when present', () => { + const pending: PendingApproval = { + data: { + id: 'approval_goal', + tool_call_id: 'tool_goal', + tool_name: 'CreateGoal', + action: 'Creating a goal', + description: '', + display: [], + choices: [ + { + label: 'Switch to Auto and start', + response: 'approved', + selected_label: 'auto', + description: 'Tools are approved automatically, and questions are skipped.', + }, + { label: 'Do not start', response: 'cancelled', selected_label: 'cancel' }, + ], + }, + }; + const out = strip(new ApprovalPanelComponent(pending, () => {}).render(80).join('\n')); + expect(out).toContain('1. Switch to Auto and start'); + expect(out).toContain('Tools are approved automatically, and questions are skipped.'); + // A choice without a description stays label-only — no stray blank helper line. + expect(out).toContain('2. Do not start'); + }); + it('renders dangerous shell warnings with simple copy and no icon', () => { const pending: PendingApproval = { data: { diff --git a/apps/kimi-code/test/tui/reverse-rpc/approval-adapter.test.ts b/apps/kimi-code/test/tui/reverse-rpc/approval-adapter.test.ts index 997230fec..cdc8709c3 100644 --- a/apps/kimi-code/test/tui/reverse-rpc/approval-adapter.test.ts +++ b/apps/kimi-code/test/tui/reverse-rpc/approval-adapter.test.ts @@ -211,6 +211,97 @@ describe('approval adapter', () => { ]); }); + it('renders the /goal start menu for a CreateGoal approval in manual mode', () => { + const adapted = adaptApprovalRequest({ + toolCallId: 'tc-goal', + toolName: 'CreateGoal', + action: 'Creating a goal', + display: { + kind: 'goal_start', + objective: 'Fix the failing auth tests', + completionCriterion: 'npm test -- auth exits 0', + mode: 'manual', + }, + }); + + // Objective + criterion are previewed as a brief block. + expect(adapted.display).toEqual([ + { + type: 'brief', + text: 'Start goal: Fix the failing auth tests\nDone when: npm test -- auth exits 0', + }, + ]); + // Choices mirror the manual-mode /goal start menu; mode options approve and + // carry the mode in selected_label, "Do not start" cancels. Each keeps the + // /goal menu's description. + expect(adapted.choices).toEqual([ + { + label: 'Switch to Auto and start', + response: 'approved', + selected_label: 'auto', + description: + 'Best if you want Kimi Code to keep working while you are away. Tools are approved automatically, and questions are skipped.', + }, + { + label: 'Switch to YOLO and start', + response: 'approved', + selected_label: 'yolo', + description: + 'Tools and plan changes are approved automatically. Kimi Code may still ask you questions.', + }, + { + label: 'Start in Manual', + response: 'approved', + selected_label: 'manual', + description: + 'Keep approvals on. Kimi Code will ask before risky actions, so the goal may stop and wait for you.', + }, + { + label: 'Do not start', + response: 'cancelled', + selected_label: 'cancel', + description: 'Return to the input box with your goal command.', + }, + ]); + }); + + it('renders the yolo-mode /goal start menu for a CreateGoal approval', () => { + const adapted = adaptApprovalRequest({ + toolCallId: 'tc-goal-yolo', + toolName: 'CreateGoal', + action: 'Creating a goal', + display: { + kind: 'goal_start', + objective: 'Ship the feature', + mode: 'yolo', + }, + }); + + expect(adapted.display).toEqual([{ type: 'brief', text: 'Start goal: Ship the feature' }]); + expect(adapted.choices).toEqual([ + { + label: 'Switch to Auto and start', + response: 'approved', + selected_label: 'auto', + description: + 'Best if you want Kimi Code to keep working while you are away. Tools are approved automatically, and questions are skipped.', + }, + { + label: 'Keep YOLO and start', + response: 'approved', + selected_label: 'yolo', + description: + 'Tools and plan changes stay approved automatically. Kimi Code may still ask you questions.', + }, + { + label: 'Do not start', + response: 'cancelled', + selected_label: 'cancel', + description: 'Return to the input box with your goal command.', + }, + ]); + }); + it('maps approved-for-session responses into core approval payloads', () => { expect( adaptPanelResponse({ diff --git a/packages/agent-core/src/agent/permission/policies/goal-start-review-ask.ts b/packages/agent-core/src/agent/permission/policies/goal-start-review-ask.ts new file mode 100644 index 000000000..7356ef598 --- /dev/null +++ b/packages/agent-core/src/agent/permission/policies/goal-start-review-ask.ts @@ -0,0 +1,49 @@ +import type { Agent } from '../..'; +import type { + ApprovalResponse, + PermissionMode, + PermissionPolicy, + PermissionPolicyContext, + PermissionPolicyResult, +} from '../types'; + +/** + * Starting a goal turns the agent loose on autonomous, multi-turn work, so a + * model-issued `CreateGoal` is confirmed with the same menu the `/goal` command + * shows: choose the permission mode to run the goal under, or decline. The + * chosen mode is applied before the goal is created so the run proceeds under + * it. `auto` mode auto-approves the goal upstream and never reaches here. + */ +export class GoalStartReviewAskPermissionPolicy implements PermissionPolicy { + readonly name = 'goal-start-review-ask'; + + constructor(private readonly agent: Agent) {} + + evaluate(context: PermissionPolicyContext): PermissionPolicyResult | undefined { + if (context.toolCall.name !== 'CreateGoal') return; + if (this.agent.permission.mode === 'auto') return; + if (context.execution.display?.kind !== 'goal_start') return; + return { + kind: 'ask', + resolveApproval: (result) => this.resolveGoalStart(result), + }; + } + + private resolveGoalStart(result: ApprovalResponse): undefined { + // Declining ("Do not start") or any non-approval creates no goal; the tool + // call is then blocked with the standard rejection message. + if (result.decision !== 'approved') return undefined; + // The selected option names the permission mode to run the goal under. + const mode = toPermissionMode(result.selectedLabel); + if (mode !== undefined && mode !== this.agent.permission.mode) { + this.agent.permission.setMode(mode); + } + // Approved: let CreateGoal execute and create the goal under the chosen mode. + return undefined; + } +} + +function toPermissionMode(label: string | undefined): PermissionMode | undefined { + if (label === 'auto' || label === 'yolo' || label === 'manual') return label; + return undefined; +} diff --git a/packages/agent-core/src/agent/permission/policies/index.ts b/packages/agent-core/src/agent/permission/policies/index.ts index 291d0f1b3..a0ba9bdfe 100644 --- a/packages/agent-core/src/agent/permission/policies/index.ts +++ b/packages/agent-core/src/agent/permission/policies/index.ts @@ -11,6 +11,7 @@ import { SensitiveFileAccessAskPermissionPolicy, } from './file-access-ask'; import { GitCwdWriteApprovePermissionPolicy } from './git-cwd-write-approve'; +import { GoalStartReviewAskPermissionPolicy } from './goal-start-review-ask'; import { PlanModeGuardDenyPermissionPolicy } from './plan-mode-guard-deny'; import { PlanModeToolApprovePermissionPolicy } from './plan-mode-tool-approve'; import { PreToolCallHookPermissionPolicy } from './pre-tool-call-hook'; @@ -46,6 +47,10 @@ export function createPermissionDecisionPolicies(agent: Agent): PermissionPolicy new UserConfiguredAllowPermissionPolicy(agent), // ExitPlanMode with active plan_review + non-empty plan + non-auto → ask (tracks plan_submitted/plan_resolved itself). Runs before session history so a stale session approval can't bypass review of a new plan body. new ExitPlanModeReviewAskPermissionPolicy(agent), + // CreateGoal (non-auto) → ask with the same start menu as /goal: choose the + // permission mode to run the goal under, or decline. Applies the mode, then + // lets the tool create the goal. + new GoalStartReviewAskPermissionPolicy(agent), // EnterPlanMode, Write/Edit on the plan file, or ExitPlanMode with no actionable plan_review → approve. new PlanModeToolApprovePermissionPolicy(agent), // Access touches a sensitive file (.env, SSH key, credentials) → ask. diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts index 3fe647fb7..ce035e0dd 100644 --- a/packages/agent-core/src/agent/turn/index.ts +++ b/packages/agent-core/src/agent/turn/index.ts @@ -301,12 +301,14 @@ export class TurnFlow { return await this.driveGoal(firstTurnId, input, origin, signal); } const end = await this.runOneTurn(firstTurnId, input, origin, signal, true); - const resumedFromPausedOrBlocked = - initialGoalStatus === 'paused' || initialGoalStatus === 'blocked'; - const currentGoalStatus = this.agent.goal.getGoal().goal?.status; + // A goal can become active during an ordinary turn: the model creates one + // with CreateGoal, or resumes a paused/blocked goal via UpdateGoal. Either + // way, hand the now-active goal to the driver so it is actually pursued, + // instead of stopping after the turn that merely started it. (The + // already-active case took the early return above.) + const goalBecameActive = this.agent.goal.getGoal().goal?.status === 'active'; if ( - resumedFromPausedOrBlocked && - currentGoalStatus === 'active' && + goalBecameActive && end.event.reason !== 'cancelled' && end.event.reason !== 'failed' ) { diff --git a/packages/agent-core/src/skill/builtin/index.ts b/packages/agent-core/src/skill/builtin/index.ts index e73204c06..a5150815e 100644 --- a/packages/agent-core/src/skill/builtin/index.ts +++ b/packages/agent-core/src/skill/builtin/index.ts @@ -8,12 +8,14 @@ import { SUB_SKILL_REVIEW, } from './sub-skill'; import { UPDATE_CONFIG_SKILL } from './update-config'; +import { WRITE_GOAL_SKILL } from './write-goal'; export function registerBuiltinSkills(registry: SessionSkillRegistry): void { registry.registerBuiltinSkill(MCP_CONFIG_SKILL); registry.registerBuiltinSkill(IMPORT_FROM_CC_CODEX_SKILL); registry.registerBuiltinSkill(UPDATE_CONFIG_SKILL); registry.registerBuiltinSkill(CUSTOM_THEME_SKILL); + registry.registerBuiltinSkill(WRITE_GOAL_SKILL); registry.registerBuiltinSkill(SUB_SKILL_PARENT); registry.registerBuiltinSkill(SUB_SKILL_REVIEW); registry.registerBuiltinSkill(SUB_SKILL_CONSOLIDATE); @@ -27,4 +29,5 @@ export { SUB_SKILL_PARENT, SUB_SKILL_REVIEW, UPDATE_CONFIG_SKILL, + WRITE_GOAL_SKILL, }; diff --git a/packages/agent-core/src/skill/builtin/write-goal.md b/packages/agent-core/src/skill/builtin/write-goal.md new file mode 100644 index 000000000..818834075 --- /dev/null +++ b/packages/agent-core/src/skill/builtin/write-goal.md @@ -0,0 +1,85 @@ +--- +name: write-goal +description: Help the user craft a well-specified `/goal` objective for goal mode — turn a rough intention into a completion contract with a clear finish line, proof, boundaries, and stop rule. Use when the user asks for help writing, refining, or improving a goal. +--- + +# Write a good goal (write-goal) + +Help the user turn a rough intention into a `/goal` objective that goal mode can pursue across many turns without supervision. A goal is not a task description — it is a completion contract. It says what must become *true*, how that truth is *proven*, where the work may and may not *reach*, and when to *stop and report* instead of grinding on. + +This skill is about authoring the objective text together with the user. Drafting and starting are separate steps: you settle the wording first, and only once the user has approved it do you start the goal by calling `CreateGoal`. The user still gets a final confirmation before it runs. + +## Rules of engagement + +- **Only help when the user has asked for it.** Never volunteer to wrap an ordinary request in a goal, and never start one on your own. A normal "fix this test" is a normal request; treat it as a goal only when the user says they want a goal. If a task looks like it would suit goal mode, you may mention that once — but wait for the user to choose. +- **Write in the user's language.** Draft the objective in whatever language the user is writing to you in. If the project configuration or a saved memory names a preferred language, honor that instead. Keep the surrounding discussion in the same language. +- **Show before you start.** Always present the full drafted goal back to the user and get their agreement before anything runs. The user should read the exact text that will become the objective, not a paraphrase of it. +- **Draft with the user, not for them.** Goal-writing is a conversation. Offer a draft, explain the choices you made, invite changes, and fold the feedback in. Expect more than one round. +- **Respect the user's final call.** If, after you have pointed out what is vague or risky, the user still wants a looser or thinner goal, write the goal they asked for. Note the trade-off once; do not keep relitigating it or quietly "improve" the wording against their wishes. + +## What makes a goal good + +The strongest goals share one shape: they define **proof, not effort**. "Keep improving the code" describes effort and never ends. "Done when `npm test` exits 0 and no file outside `src/auth` changed" describes proof and is checkable. Aim for a contract with these parts: + +1. **End state** — the condition that must become true. Name the finish line concretely: a passing suite, an empty queue, a search that returns zero matches, a deployed artifact. +2. **Proof** — the observable evidence that the end state holds. Prefer things the agent can run and you can inspect afterward: a command's exit code, a test count, a `grep`/`rg` with no hits, a file that now exists, a metric over a threshold. +3. **Boundaries** — what the work may and may not touch. Name the scope (which module, which directory) and the off-limits actions (do not edit the spec, do not change unrelated files, do not make destructive data changes). +4. **The loop** — when the work is iterative, say how to iterate: rerun the check after each change, work through the queue item by item, replay the failing cases until they pass. +5. **The stop rule** — how to end honestly when "done" is not reachable. A "stop and ask before widening scope" clause and an explicit blocked path ("if an external service is down, record it and move on") let the agent report instead of faking a pass or looping forever. This is about *honesty*, not a spending limit — keep it separate from any budget (see below). + +Two habits make almost any goal better: + +- **Make it queue-shaped.** Goals that shrink a list work best: failing tests, open issues, error traces, files to migrate, rows to process. A queue gives the agent a worklist and gives you a countable definition of done. +- **Lean on existing verification.** Tests, CI, type-checks, lint, eval suites, browser audits, and zero-match searches are leverage — they are what let a goal run unattended and still be trusted. If a task has no way to prove completion, help the user add one or reconsider whether goal mode fits. + +Longer runs are not better runs. A tight contract that finishes in a handful of turns beats an open-ended one that burns hours re-running the whole suite after every edit. + +## Budgets are opt-in + +Goal mode can run under a turn or token budget, but **do not set one by default, and never bake a turn cap into the objective text.** A well-specified goal already stops on its own — when the proof passes or a blocker is hit — so an arbitrary cap usually does nothing except risk cutting off work midway. + +When a budget is genuinely useful — typically an open-ended or exploratory goal that could run long unattended — you may suggest one, framed around the number users actually feel: token cost. Let the user choose the value, and sanity-check it against the work. A cap far larger than the task needs (say a thousand turns for a goal that will finish in a few) is not a safety net; it just invites wasted tokens. If the user asks for a value that looks oversized, say so and offer a smaller one, but respect their final call. + +## Workflow + +1. **Understand the intention.** Ask what outcome the user actually wants and what would prove it is done. If a finish line or a check is missing, that gap is the first thing to resolve together. +2. **Draft the goal.** Write a concrete objective in the user's language, covering as many parts of the contract above as the task warrants. Keep it readable — one or a few sentences for simple work, a short structured block (end state, checks, boundaries, stop rule) for larger work. +3. **Show it and explain.** Present the draft in full and walk through the choices: what you picked as the finish line, what proves it, what you fenced off, when it stops. Point out anything still soft. +4. **Revise together.** Take the user's edits and produce a new draft. Repeat until they are satisfied. If they want it looser than you would recommend, say so once, then write their version. +5. **Start it.** Once the user approves the wording, start the goal by calling `CreateGoal` with the agreed objective (and a `completionCriterion` if you settled on one). Do not just print the text for the user to paste, and do not start before they have approved. Starting still surfaces a final confirmation, so the user keeps the last word on whether it runs. + +## A reusable shape + +For a non-trivial goal, this fill-in-the-blanks structure covers the contract: + +``` + +Done when . +Scope: only ; do not . +Loop: . +If , stop and report instead of forcing a pass. +``` + +Not every goal needs every line, and none of them is a turn cap — the goal stops when the proof passes or a blocker is hit. A small, well-scoped task can be a single clear sentence. Add structure as the work grows or the cost of a wrong autonomous run rises. + +## Weak to strong + +- Weak: `Find all bugs in this codebase.` — no finish line, no proof, no stop. The agent may block at once or run far past what you wanted. + Strong: `Fix every test in test/auth that currently fails, rerun npm test until it exits 0, change no file outside test/ or src/auth, and report anything you cannot fix with its location and why.` +- Weak: `Optimize the project.` — no scope, no measure. + Strong: `Migrate the payment module to the new API, make npm test -- payment exit 0, keep the diff limited to payment-related files, and stop and ask before touching shared infrastructure.` +- Weak: `Make it faster.` + Strong: `Make renderFrame at least 3x faster measured by the bench/render benchmark; if you cannot reach 3x after several attempts, report the best result and why.` + +## Common mistakes + +| Mistake | Better | +| --- | --- | +| Starting or suggesting a goal the user did not ask for | Only draft a goal once the user asks; mention the option at most once otherwise | +| Drafting in English when the user is writing in another language | Match the user's language (or the project / memory preference) | +| Running the goal before the user has seen the exact text | Show the full draft and get agreement first | +| Polishing the goal silently against the user's stated wishes | Note the trade-off once, then write the goal they asked for | +| Specifying effort ("keep improving X") | Specify proof ("done when check X passes") | +| Baking a turn cap into the objective or setting a budget unprompted | Let the goal stop on its proof; suggest a budget only when useful, framed on token cost | +| No blocked path | Add an explicit "stop and report" rule for blockers | +| A goal with no way to verify completion | Anchor it to tests, a search, a metric, or another inspectable check | diff --git a/packages/agent-core/src/skill/builtin/write-goal.ts b/packages/agent-core/src/skill/builtin/write-goal.ts new file mode 100644 index 000000000..3203e7790 --- /dev/null +++ b/packages/agent-core/src/skill/builtin/write-goal.ts @@ -0,0 +1,22 @@ +import { parseSkillText } from '../parser'; +import type { SkillDefinition } from '../types'; +import WRITE_GOAL_BODY from './write-goal.md?raw'; + +const PSEUDO_PATH = 'builtin://write-goal'; + +const parsed = parseSkillText({ + skillMdPath: '/builtin/skills/write-goal.md', + skillDirName: 'write-goal', + source: 'builtin', + text: WRITE_GOAL_BODY, +}); + +export const WRITE_GOAL_SKILL: SkillDefinition = { + ...parsed, + path: PSEUDO_PATH, + dir: PSEUDO_PATH, + metadata: { + ...parsed.metadata, + type: parsed.metadata.type ?? 'inline', + }, +}; diff --git a/packages/agent-core/src/tools/builtin/goal/create-goal.ts b/packages/agent-core/src/tools/builtin/goal/create-goal.ts index 317350af6..440e5b40d 100644 --- a/packages/agent-core/src/tools/builtin/goal/create-goal.ts +++ b/packages/agent-core/src/tools/builtin/goal/create-goal.ts @@ -9,8 +9,10 @@ import { z } from 'zod'; import type { BuiltinTool } from '../../../agent/tool'; import type { ToolExecution } from '../../../loop/types'; +import type { ToolInputDisplay } from '../../display'; import { toInputJsonSchema } from '../../support/input-schema'; import DESCRIPTION from './create-goal.md?raw'; +import { goalForModel } from './serialize'; export const CreateGoalToolInputSchema = z .object({ @@ -40,6 +42,7 @@ export class CreateGoalTool implements BuiltinTool { return { description: 'Creating a goal', + display: this.resolveGoalStartDisplay(args), approvalRule: this.name, execute: async () => { const snapshot = await goal.createGoal( @@ -50,8 +53,25 @@ export class CreateGoalTool implements BuiltinTool { }, 'model', ); - return { output: JSON.stringify({ goal: snapshot }, null, 2) }; + return { output: JSON.stringify({ goal: goalForModel(snapshot) }, null, 2) }; }, }; } + + /** + * Starting a goal switches the agent into autonomous, multi-turn work, so its + * approval reuses the same choice the `/goal` command offers: pick the + * permission mode to run under, or decline. `auto` mode auto-approves the goal + * upstream and never reaches this prompt, so the menu only covers manual/yolo. + */ + private resolveGoalStartDisplay(args: CreateGoalToolInput): ToolInputDisplay | undefined { + const mode = this.agent.permission.mode; + if (mode === 'auto') return undefined; + return { + kind: 'goal_start', + objective: args.objective, + completionCriterion: args.completionCriterion, + mode, + }; + } } diff --git a/packages/agent-core/src/tools/builtin/goal/get-goal.ts b/packages/agent-core/src/tools/builtin/goal/get-goal.ts index df6c87feb..3713aa6ef 100644 --- a/packages/agent-core/src/tools/builtin/goal/get-goal.ts +++ b/packages/agent-core/src/tools/builtin/goal/get-goal.ts @@ -11,6 +11,7 @@ import type { BuiltinTool } from '../../../agent/tool'; import type { ToolExecution } from '../../../loop/types'; import { toInputJsonSchema } from '../../support/input-schema'; import DESCRIPTION from './get-goal.md?raw'; +import { goalResultForModel } from './serialize'; export const GetGoalToolInputSchema = z.object({}).strict(); export type GetGoalToolInput = z.infer; @@ -29,7 +30,7 @@ export class GetGoalTool implements BuiltinTool { approvalRule: this.name, execute: async () => { const result = store.getGoal(); - return { output: JSON.stringify(result, null, 2) }; + return { output: JSON.stringify(goalResultForModel(result), null, 2) }; }, }; } diff --git a/packages/agent-core/src/tools/builtin/goal/serialize.ts b/packages/agent-core/src/tools/builtin/goal/serialize.ts new file mode 100644 index 000000000..d125aeaac --- /dev/null +++ b/packages/agent-core/src/tools/builtin/goal/serialize.ts @@ -0,0 +1,17 @@ +import type { GoalSnapshot, GoalToolResult } from '../../../agent/goal'; + +/** + * The goalId is a random UUID with no user-facing meaning, and no goal tool + * takes one (there is only ever one goal at a time). Keep it out of what the + * model sees so it never echoes the id back to the user as if it mattered. + */ +export function goalForModel(goal: GoalSnapshot): Omit { + const { goalId: _goalId, ...rest } = goal; + return rest; +} + +export function goalResultForModel( + result: GoalToolResult, +): { goal: Omit | null } { + return { goal: result.goal === null ? null : goalForModel(result.goal) }; +} diff --git a/packages/agent-core/test/agent/permission.test.ts b/packages/agent-core/test/agent/permission.test.ts index 243ca7572..8fdd3d2b1 100644 --- a/packages/agent-core/test/agent/permission.test.ts +++ b/packages/agent-core/test/agent/permission.test.ts @@ -709,6 +709,7 @@ describe('Permission policy chain', () => { 'user-configured-ask', 'user-configured-allow', 'exit-plan-mode-review-ask', + 'goal-start-review-ask', 'plan-mode-tool-approve', 'sensitive-file-access-ask', 'git-control-path-access-ask', diff --git a/packages/agent-core/test/agent/permission/goal-start-review-ask.test.ts b/packages/agent-core/test/agent/permission/goal-start-review-ask.test.ts new file mode 100644 index 000000000..cd97e277d --- /dev/null +++ b/packages/agent-core/test/agent/permission/goal-start-review-ask.test.ts @@ -0,0 +1,104 @@ +import type { ToolCall } from '@moonshot-ai/kosong'; +import { describe, expect, it } from 'vitest'; + +import type { PermissionPolicyContext } from '../../../src/agent/permission'; +import type { PermissionMode } from '../../../src/agent/permission'; +import { GoalStartReviewAskPermissionPolicy } from '../../../src/agent/permission/policies/goal-start-review-ask'; +import type { ToolInputDisplay } from '../../../src/tools/display'; +import { ToolAccesses } from '../../../src/loop'; + +const signal = new AbortController().signal; + +function fakeAgent(initialMode: PermissionMode) { + const permission = { + mode: initialMode, + setMode(mode: PermissionMode) { + this.mode = mode; + }, + }; + return { agent: { permission } as never, permission }; +} + +function policyContext(toolName: string, display: ToolInputDisplay | undefined): PermissionPolicyContext { + return { + turnId: '0', + stepNumber: 1, + signal, + llm: {}, + args: {}, + toolCall: { + type: 'function', + id: `call_${toolName}`, + name: toolName, + arguments: '{}', + } satisfies ToolCall, + execution: { + accesses: ToolAccesses.none(), + approvalRule: toolName, + display, + execute: async () => ({ output: '' }), + }, + } as unknown as PermissionPolicyContext; +} + +const GOAL_DISPLAY: ToolInputDisplay = { + kind: 'goal_start', + objective: 'Fix the failing auth tests', + mode: 'manual', +}; + +describe('GoalStartReviewAskPermissionPolicy', () => { + it('ignores tools other than CreateGoal', () => { + const { agent } = fakeAgent('manual'); + const policy = new GoalStartReviewAskPermissionPolicy(agent); + expect(policy.evaluate(policyContext('Bash', undefined))).toBeUndefined(); + }); + + it('does not ask in auto mode (the goal is auto-approved upstream)', () => { + const { agent } = fakeAgent('auto'); + const policy = new GoalStartReviewAskPermissionPolicy(agent); + expect(policy.evaluate(policyContext('CreateGoal', GOAL_DISPLAY))).toBeUndefined(); + }); + + it('does not ask without a goal_start display', () => { + const { agent } = fakeAgent('manual'); + const policy = new GoalStartReviewAskPermissionPolicy(agent); + expect(policy.evaluate(policyContext('CreateGoal', undefined))).toBeUndefined(); + }); + + it('asks with the start menu for a CreateGoal in manual mode', () => { + const { agent } = fakeAgent('manual'); + const policy = new GoalStartReviewAskPermissionPolicy(agent); + const result = policy.evaluate(policyContext('CreateGoal', GOAL_DISPLAY)); + expect(result?.kind).toBe('ask'); + }); + + it('switches to the chosen mode on approval, then lets the goal be created', () => { + const { agent, permission } = fakeAgent('manual'); + const policy = new GoalStartReviewAskPermissionPolicy(agent); + const result = policy.evaluate(policyContext('CreateGoal', GOAL_DISPLAY)); + if (result?.kind !== 'ask') throw new Error('expected ask'); + // Returning undefined lets CreateGoal.execute run and create the goal. + expect(result.resolveApproval?.({ decision: 'approved', selectedLabel: 'auto' })).toBeUndefined(); + expect(permission.mode).toBe('auto'); + }); + + it('keeps the current mode when the user starts in manual', () => { + const { agent, permission } = fakeAgent('manual'); + const policy = new GoalStartReviewAskPermissionPolicy(agent); + const result = policy.evaluate(policyContext('CreateGoal', GOAL_DISPLAY)); + if (result?.kind !== 'ask') throw new Error('expected ask'); + expect(result.resolveApproval?.({ decision: 'approved', selectedLabel: 'manual' })).toBeUndefined(); + expect(permission.mode).toBe('manual'); + }); + + it('creates no goal and changes no mode when the user declines', () => { + const { agent, permission } = fakeAgent('manual'); + const policy = new GoalStartReviewAskPermissionPolicy(agent); + const result = policy.evaluate(policyContext('CreateGoal', GOAL_DISPLAY)); + if (result?.kind !== 'ask') throw new Error('expected ask'); + // A cancel resolves to undefined; the manager then blocks the tool call. + expect(result.resolveApproval?.({ decision: 'cancelled', selectedLabel: 'cancel' })).toBeUndefined(); + expect(permission.mode).toBe('manual'); + }); +}); diff --git a/packages/agent-core/test/harness/goal-session.test.ts b/packages/agent-core/test/harness/goal-session.test.ts index 84e0edffe..6376e906e 100644 --- a/packages/agent-core/test/harness/goal-session.test.ts +++ b/packages/agent-core/test/harness/goal-session.test.ts @@ -239,6 +239,48 @@ describe('goal session end-to-end', () => { expect((await api.getGoal({ agentId: 'main' })).goal).toBeNull(); }); + it('drives a goal the model creates mid-turn with CreateGoal', async () => { + const sessionDir = await makeTempDir(); + const events: Array> = []; + const { session, agent, scripted } = await setupSession(sessionDir, events, [ + 'CreateGoal', + 'GetGoal', + 'UpdateGoal', + ]); + const api = new SessionAPIImpl(session); + + // No goal exists at launch. The model creates one mid-turn via CreateGoal; + // the driver must then pursue it across continuation turns instead of + // stopping after the ordinary turn that merely started it. + scripted.mockNextResponse({ + type: 'function', + id: 'create', + name: 'CreateGoal', + arguments: JSON.stringify({ objective: 'work' }), + }); + scripted.mockNextResponse({ type: 'text', text: 'Goal created and active.' }); + scripted.mockNextResponse({ + type: 'function', + id: 'complete', + name: 'UpdateGoal', + arguments: JSON.stringify({ status: 'complete' }), + }); + scripted.mockNextResponse({ type: 'text', text: 'I completed the goal.' }); + + agent.turn.prompt([{ type: 'text', text: 'Please start a goal to do the work' }]); + await agent.turn.waitForCurrentTurn(); + + // The driver ran a continuation turn after the goal became active, reaching + // the UpdateGoal('complete') the standalone turn never would have. + expect(scripted.calls.length).toBeGreaterThanOrEqual(4); + expect(JSON.stringify(scripted.calls[2]?.history ?? [])).toContain( + 'Continue working toward the active goal', + ); + const turnStarts = events.filter((e) => e['type'] === 'turn.started').length; + expect(turnStarts).toBeGreaterThanOrEqual(2); + expect((await api.getGoal({ agentId: 'main' })).goal).toBeNull(); + }); + it('asks the model to explain why it marked a goal blocked', async () => { const sessionDir = await makeTempDir(); const events: Array> = []; diff --git a/packages/agent-core/test/tools/goal.test.ts b/packages/agent-core/test/tools/goal.test.ts index 2568be66c..f47bb709f 100644 --- a/packages/agent-core/test/tools/goal.test.ts +++ b/packages/agent-core/test/tools/goal.test.ts @@ -29,6 +29,7 @@ function fakeAgent(opts: { type?: 'main' | 'sub'; goal?: GoalMode } = {}): Agent emitEvent: () => {}, telemetry: { track: () => {} }, context: { appendSystemReminder: () => {} }, + permission: { mode: 'manual' }, } as unknown as Agent; (agent as { goal: GoalMode }).goal = opts.goal ?? new GoalMode(agent); return agent; @@ -47,6 +48,15 @@ describe('CreateGoalTool', () => { expect(store.getGoal().goal?.objective).toBe('Ship feature X'); }); + it('omits the internal goalId from the model-facing output', async () => { + const store = makeStore(); + const tool = new CreateGoalTool(fakeAgent({ goal: store })); + const result = await executeTool(tool, ctx({ objective: 'Ship feature X' })); + expect(store.getGoal().goal?.goalId).toBeTruthy(); + expect(result.output).not.toContain('goalId'); + expect(result.output).not.toContain(store.getGoal().goal?.goalId ?? 'no-id'); + }); + it('passes completionCriterion and replace', async () => { const store = makeStore(); const tool = new CreateGoalTool(fakeAgent({ goal: store })); diff --git a/packages/protocol/src/display.ts b/packages/protocol/src/display.ts index 223eef0ec..8628fbcf5 100644 --- a/packages/protocol/src/display.ts +++ b/packages/protocol/src/display.ts @@ -75,6 +75,15 @@ export const ToolInputDisplaySchema = z.discriminatedUnion('kind', [ .readonly() .optional(), }), + z.object({ + kind: z.literal('goal_start'), + objective: z.string(), + completionCriterion: z.string().optional(), + // Current permission mode at approval time. The client uses it to pick the + // start menu (manual vs yolo); `auto` never reaches this display because it + // auto-approves the goal without a prompt. + mode: z.enum(['manual', 'yolo']), + }), z.object({ kind: z.literal('generic'), summary: z.string(),