From c7a80556d5cfd08835394ac3e16d8d719f8f26dc Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Fri, 8 May 2026 17:49:11 +0800 Subject: [PATCH 01/15] =?UTF-8?q?pref(runtime)=EF=BC=9A=E9=AA=8C=E6=94=B6?= =?UTF-8?q?=E9=87=8D=E5=BB=BA=E7=AC=AC=E4=B8=80=E6=AD=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/cli/gateway_runtime_bridge.go | 11 +- internal/cli/gateway_runtime_bridge_test.go | 1 - internal/config/runtime_test.go | 4 - internal/config/verification.go | 16 -- internal/config/verification_test.go | 13 +- internal/context/accept_checks_test.go | 11 + internal/context/builder_test.go | 4 +- internal/context/source_plan_mode.go | 2 +- internal/context/source_plan_mode_test.go | 2 +- internal/context/source_todos.go | 6 - internal/runtime/accept_checks_test.go | 11 + internal/runtime/acceptance/policy.go | 8 +- internal/runtime/acceptance/policy_test.go | 10 +- internal/runtime/acceptance_service.go | 9 - internal/runtime/acceptance_service_test.go | 4 +- internal/runtime/acceptgate/checks.go | 233 ++++++++++++++++++ internal/runtime/acceptgate/gate.go | 121 +++++++++ internal/runtime/acceptgate/gate_test.go | 150 +++++++++++ internal/runtime/acceptgate_runtime.go | 110 +++++++++ internal/runtime/controlplane/progress.go | 10 + internal/runtime/controlplane/stop_reason.go | 8 + internal/runtime/events.go | 3 + internal/runtime/final_acceptance.go | 7 +- internal/runtime/final_acceptance_test.go | 2 +- internal/runtime/hooks_integration_test.go | 8 +- internal/runtime/planning.go | 12 +- internal/runtime/planning_test.go | 22 +- internal/runtime/run.go | 158 +++++------- internal/runtime/runtime_progress_test.go | 86 +++---- internal/runtime/runtime_snapshot.go | 34 +-- internal/runtime/runtime_snapshot_test.go | 4 - internal/runtime/runtime_test.go | 168 +++++++++---- internal/runtime/state.go | 2 + .../runtime/thinking_callprovider_test.go | 3 +- internal/runtime/todo_run_boundary.go | 49 +--- internal/runtime/todo_run_boundary_test.go | 67 ++--- internal/runtime/verify/git_diff.go | 94 ------- internal/runtime/verify/git_diff_test.go | 120 --------- internal/session/plan.go | 211 ++++++++++++++-- internal/session/plan_test.go | 28 ++- internal/session/store_test.go | 4 +- 41 files changed, 1187 insertions(+), 639 deletions(-) create mode 100644 internal/context/accept_checks_test.go create mode 100644 internal/runtime/accept_checks_test.go create mode 100644 internal/runtime/acceptgate/checks.go create mode 100644 internal/runtime/acceptgate/gate.go create mode 100644 internal/runtime/acceptgate/gate_test.go create mode 100644 internal/runtime/acceptgate_runtime.go delete mode 100644 internal/runtime/verify/git_diff.go delete mode 100644 internal/runtime/verify/git_diff_test.go diff --git a/internal/cli/gateway_runtime_bridge.go b/internal/cli/gateway_runtime_bridge.go index 1193038d..3e290906 100644 --- a/internal/cli/gateway_runtime_bridge.go +++ b/internal/cli/gateway_runtime_bridge.go @@ -1509,19 +1509,16 @@ func convertRuntimeSnapshot(snapshot agentruntime.RuntimeSnapshot) gateway.Runti RunID: strings.TrimSpace(snapshot.RunID), SessionID: strings.TrimSpace(snapshot.SessionID), Phase: strings.TrimSpace(snapshot.Phase), - TaskKind: strings.TrimSpace(snapshot.TaskKind), UpdatedAt: snapshot.UpdatedAt, Todos: convertRuntimeTodoSnapshot(snapshot.Todos), Facts: map[string]any{ "runtime_facts": snapshot.Facts.RuntimeFacts, }, Decision: map[string]any{ - "status": strings.TrimSpace(snapshot.Decision.Status), - "stop_reason": strings.TrimSpace(snapshot.Decision.StopReason), - "missing_facts": snapshot.Decision.MissingFacts, - "required_next_actions": snapshot.Decision.RequiredNextActions, - "user_visible_summary": strings.TrimSpace(snapshot.Decision.UserVisibleSummary), - "internal_summary": strings.TrimSpace(snapshot.Decision.InternalSummary), + "status": strings.TrimSpace(snapshot.Decision.Status), + "stop_reason": strings.TrimSpace(snapshot.Decision.StopReason), + "summary": strings.TrimSpace(snapshot.Decision.Summary), + "details": append([]string(nil), snapshot.Decision.Details...), }, SubAgents: map[string]any{ "started_count": snapshot.SubAgents.StartedCount, diff --git a/internal/cli/gateway_runtime_bridge_test.go b/internal/cli/gateway_runtime_bridge_test.go index 91a82bd3..303fcab1 100644 --- a/internal/cli/gateway_runtime_bridge_test.go +++ b/internal/cli/gateway_runtime_bridge_test.go @@ -965,7 +965,6 @@ func TestGatewayRuntimePortBridgeListSessionTodosAndSnapshot(t *testing.T) { RunID: "run-1", SessionID: "session-2", Phase: "acceptance", - TaskKind: "workspace_write", Decision: agentruntime.DecisionSnapshot{Status: "continue", StopReason: "unverified_write"}, SubAgents: agentruntime.SubAgentSnapshot{StartedCount: 1, CompletedCount: 1, FailedCount: 0}, }, diff --git a/internal/config/runtime_test.go b/internal/config/runtime_test.go index 0c67477a..5918c034 100644 --- a/internal/config/runtime_test.go +++ b/internal/config/runtime_test.go @@ -17,9 +17,6 @@ func TestRuntimeConfigCloneAndDefaults(t *testing.T) { if zero.MaxNoProgressStreak != defaults.MaxNoProgressStreak { t.Fatalf("MaxNoProgressStreak = %d, want %d", zero.MaxNoProgressStreak, defaults.MaxNoProgressStreak) } - if zero.Verification.MaxNoProgress != defaults.Verification.MaxNoProgress { - t.Fatalf("Verification.MaxNoProgress = %d, want %d", zero.Verification.MaxNoProgress, defaults.Verification.MaxNoProgress) - } if len(zero.Verification.Verifiers) == 0 { t.Fatal("expected default verifiers to be populated") } @@ -46,7 +43,6 @@ func TestRuntimeConfigValidate(t *testing.T) { MaxRepeatCycleStreak: 1, MaxTurns: 1, Verification: VerificationConfig{ - MaxNoProgress: 1, Verifiers: map[string]VerifierConfig{ "": {}, }, diff --git a/internal/config/verification.go b/internal/config/verification.go index f578a58f..b6d46dc1 100644 --- a/internal/config/verification.go +++ b/internal/config/verification.go @@ -11,7 +11,6 @@ const ( verifierFileExists = "file_exists" verifierContentMatch = "content_match" verifierCommandSuccess = "command_success" - verifierGitDiff = "git_diff" verifierBuild = "build" verifierTest = "test" verifierLint = "lint" @@ -51,7 +50,6 @@ var defaultVerificationDeniedCommands = []string{ // VerificationConfig 定义 runtime final 验收阶段的 verifier 执行配置。 type VerificationConfig struct { - MaxNoProgress int `yaml:"max_no_progress,omitempty"` Verifiers map[string]VerifierConfig `yaml:"verifiers,omitempty"` ExecutionPolicy VerificationExecutionPolicyConfig `yaml:"execution_policy,omitempty"` } @@ -77,7 +75,6 @@ type VerificationExecutionPolicyConfig struct { // defaultVerificationConfig 返回验证引擎默认策略。 func defaultVerificationConfig() VerificationConfig { return VerificationConfig{ - MaxNoProgress: 2, Verifiers: map[string]VerifierConfig{ verifierTodoConvergence: { TimeoutSec: 5, @@ -99,12 +96,6 @@ func defaultVerificationConfig() VerificationConfig { OutputCapBytes: 128 * 1024, Scope: verificationScopeProject, }, - verifierGitDiff: { - Command: []string{"git", "status", "--porcelain", "--untracked-files=normal"}, - TimeoutSec: 15, - OutputCapBytes: 64 * 1024, - Scope: verificationScopeProject, - }, verifierBuild: { TimeoutSec: 300, OutputCapBytes: 256 * 1024, @@ -145,7 +136,6 @@ func defaultVerificationExecutionPolicyConfig() VerificationExecutionPolicyConfi // Clone 复制 verification 配置,避免 map/slice 共享底层数据。 func (c VerificationConfig) Clone() VerificationConfig { cloned := VerificationConfig{ - MaxNoProgress: c.MaxNoProgress, ExecutionPolicy: c.ExecutionPolicy.Clone(), } if len(c.Verifiers) > 0 { @@ -162,9 +152,6 @@ func (c *VerificationConfig) ApplyDefaults(defaults VerificationConfig) { if c == nil { return } - if c.MaxNoProgress <= 0 { - c.MaxNoProgress = defaults.MaxNoProgress - } if c.Verifiers == nil { c.Verifiers = make(map[string]VerifierConfig, len(defaults.Verifiers)) } @@ -182,9 +169,6 @@ func (c *VerificationConfig) ApplyDefaults(defaults VerificationConfig) { // Validate 校验 verification 配置合法性。 func (c VerificationConfig) Validate() error { - if c.MaxNoProgress <= 0 { - return errors.New("runtime.verification.max_no_progress must be greater than 0") - } for name, verifier := range c.Verifiers { if strings.TrimSpace(name) == "" { return errors.New("runtime.verification.verifiers has empty name") diff --git a/internal/config/verification_test.go b/internal/config/verification_test.go index 4ce76a22..bd73b5e9 100644 --- a/internal/config/verification_test.go +++ b/internal/config/verification_test.go @@ -9,14 +9,11 @@ func TestVerificationConfigApplyDefaultsAndValidate(t *testing.T) { cfg := VerificationConfig{} cfg.ApplyDefaults(defaults) - if cfg.MaxNoProgress != defaults.MaxNoProgress { - t.Fatalf("MaxNoProgress = %d, want %d", cfg.MaxNoProgress, defaults.MaxNoProgress) - } if len(cfg.Verifiers) != len(defaults.Verifiers) { t.Fatalf("verifier count = %d, want %d", len(cfg.Verifiers), len(defaults.Verifiers)) } - if cfg.Verifiers[verifierGitDiff].Command[0] != "git" { - t.Fatalf("expected git_diff default argv, got %#v", cfg.Verifiers[verifierGitDiff].Command) + if _, ok := cfg.Verifiers["git_diff"]; ok { + t.Fatal("git_diff verifier should not be configured by default") } if err := cfg.Validate(); err != nil { t.Fatalf("Validate() error = %v", err) @@ -27,12 +24,6 @@ func TestVerificationConfigValidateRejectsBadFields(t *testing.T) { t.Parallel() cfg := defaultVerificationConfig() - cfg.MaxNoProgress = 0 - if err := cfg.Validate(); err == nil { - t.Fatal("expected max_no_progress validation error") - } - - cfg = defaultVerificationConfig() cfg.Verifiers[" "] = VerifierConfig{} if err := cfg.Validate(); err == nil { t.Fatal("expected empty verifier name validation error") diff --git a/internal/context/accept_checks_test.go b/internal/context/accept_checks_test.go new file mode 100644 index 00000000..d3c88a4d --- /dev/null +++ b/internal/context/accept_checks_test.go @@ -0,0 +1,11 @@ +package context + +import agentsession "neo-code/internal/session" + +func acceptText(items ...string) agentsession.AcceptChecks { + out := make(agentsession.AcceptChecks, 0, len(items)) + for _, item := range items { + out = append(out, agentsession.AcceptCheck{Kind: agentsession.AcceptCheckOutputOnly, Target: item}) + } + return out +} diff --git a/internal/context/builder_test.go b/internal/context/builder_test.go index f0273319..5e45b0cc 100644 --- a/internal/context/builder_test.go +++ b/internal/context/builder_test.go @@ -159,13 +159,13 @@ func TestDefaultBuilderBuildIncludesPlanSections(t *testing.T) { Goal: "引入 plan/build 模式", Steps: []string{"扩展 session", "扩展 runtime"}, Constraints: []string{"保持 tools 边界"}, - Verify: []string{"go test ./internal/..."}, + Verify: acceptText("go test ./internal/..."), }, Summary: agentsession.SummaryView{ Goal: "引入 plan/build 模式", KeySteps: []string{"扩展 session", "扩展 runtime"}, Constraints: []string{"保持 tools 边界"}, - Verify: []string{"go test ./internal/..."}, + Verify: acceptText("go test ./internal/..."), ActiveTodoIDs: []string{"todo-1"}, }, }, diff --git a/internal/context/source_plan_mode.go b/internal/context/source_plan_mode.go index 5ca40fe9..eb727669 100644 --- a/internal/context/source_plan_mode.go +++ b/internal/context/source_plan_mode.go @@ -80,7 +80,7 @@ func renderCurrentPlanSection(plan *agentsession.PlanArtifact, injectFull bool) } if len(plan.Summary.Verify) > 0 { lines = append(lines, "verify:") - for _, check := range plan.Summary.Verify { + for _, check := range plan.Summary.Verify.RenderLines() { lines = append(lines, "- "+check) } } diff --git a/internal/context/source_plan_mode_test.go b/internal/context/source_plan_mode_test.go index ae0d7c12..39adf98e 100644 --- a/internal/context/source_plan_mode_test.go +++ b/internal/context/source_plan_mode_test.go @@ -111,7 +111,7 @@ func TestRenderCurrentPlanSectionInjectsFullPlan(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "完整计划", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), OpenQuestions: []string{"问题一"}, }, Summary: agentsession.SummaryView{ diff --git a/internal/context/source_todos.go b/internal/context/source_todos.go index cb58cbf2..2fa8db1c 100644 --- a/internal/context/source_todos.go +++ b/internal/context/source_todos.go @@ -80,12 +80,6 @@ func (todosSource) Sections(ctx context.Context, input BuildInput) ([]promptSect } } - lines = append(lines, "", - "stale_todo_reminder: If any todo above is no longer relevant to the current task,", - "or the user clearly switches to a different task, use todo_write to mark it completed", - "only if the work is actually done; otherwise set_status=canceled before moving on.", - ) - return []promptSection{ { Title: "Todo State", diff --git a/internal/runtime/accept_checks_test.go b/internal/runtime/accept_checks_test.go new file mode 100644 index 00000000..2758e1be --- /dev/null +++ b/internal/runtime/accept_checks_test.go @@ -0,0 +1,11 @@ +package runtime + +import agentsession "neo-code/internal/session" + +func acceptText(items ...string) agentsession.AcceptChecks { + out := make(agentsession.AcceptChecks, 0, len(items)) + for _, item := range items { + out = append(out, agentsession.AcceptCheck{Kind: agentsession.AcceptCheckOutputOnly, Target: item}) + } + return out +} diff --git a/internal/runtime/acceptance/policy.go b/internal/runtime/acceptance/policy.go index 784bada9..a5f82062 100644 --- a/internal/runtime/acceptance/policy.go +++ b/internal/runtime/acceptance/policy.go @@ -48,8 +48,6 @@ func (p DefaultPolicy) buildVerifier(name string) verify.FinalVerifier { return verify.ContentMatchVerifier{} case "command_success": return verify.CommandSuccessVerifier{VerifierName: "command_success", Executor: p.Executor} - case "git_diff": - return verify.GitDiffVerifier{Executor: p.Executor} case "build": return verify.NewBuildVerifier(p.Executor) case "test": @@ -73,11 +71,11 @@ func mappedVerifierNames(profile agentsession.VerificationProfile) []string { case agentsession.VerificationProfileConfig: return []string{"todo_convergence", "file_exists", "content_match", "command_success"} case agentsession.VerificationProfileEditCode: - return []string{"todo_convergence", "git_diff", "build", "test", "typecheck"} + return []string{"todo_convergence", "build", "test", "typecheck"} case agentsession.VerificationProfileFixBug: - return []string{"todo_convergence", "git_diff", "test", "build", "typecheck"} + return []string{"todo_convergence", "test", "build", "typecheck"} case agentsession.VerificationProfileRefactor: - return []string{"todo_convergence", "git_diff", "build", "test", "lint", "typecheck"} + return []string{"todo_convergence", "build", "test", "lint", "typecheck"} default: return nil } diff --git a/internal/runtime/acceptance/policy_test.go b/internal/runtime/acceptance/policy_test.go index 96c5cf1a..df81c5b4 100644 --- a/internal/runtime/acceptance/policy_test.go +++ b/internal/runtime/acceptance/policy_test.go @@ -17,8 +17,8 @@ func TestMappedVerifierNames(t *testing.T) { {profile: agentsession.VerificationProfileTaskOnly, want: []string{"todo_convergence"}}, {profile: agentsession.VerificationProfileCreateFile, want: []string{"todo_convergence", "file_exists", "content_match"}}, {profile: agentsession.VerificationProfileConfig, want: []string{"todo_convergence", "file_exists", "content_match", "command_success"}}, - {profile: agentsession.VerificationProfileEditCode, want: []string{"todo_convergence", "git_diff", "build", "test", "typecheck"}}, - {profile: agentsession.VerificationProfileRefactor, want: []string{"todo_convergence", "git_diff", "build", "test", "lint", "typecheck"}}, + {profile: agentsession.VerificationProfileEditCode, want: []string{"todo_convergence", "build", "test", "typecheck"}}, + {profile: agentsession.VerificationProfileRefactor, want: []string{"todo_convergence", "build", "test", "lint", "typecheck"}}, } for _, tc := range cases { @@ -43,10 +43,10 @@ func TestDefaultPolicyResolveVerifiers(t *testing.T) { if err != nil { t.Fatalf("ResolveVerifiers() error = %v", err) } - if len(verifiers) != 5 { - t.Fatalf("ResolveVerifiers() len = %d, want 5", len(verifiers)) + if len(verifiers) != 4 { + t.Fatalf("ResolveVerifiers() len = %d, want 4", len(verifiers)) } - if verifiers[0].Name() != "todo_convergence" || verifiers[1].Name() != "git_diff" { + if verifiers[0].Name() != "todo_convergence" || verifiers[1].Name() != "build" { t.Fatalf("unexpected verifier order: %s, %s", verifiers[0].Name(), verifiers[1].Name()) } } diff --git a/internal/runtime/acceptance_service.go b/internal/runtime/acceptance_service.go index b8ffe6ab..98c4ee9e 100644 --- a/internal/runtime/acceptance_service.go +++ b/internal/runtime/acceptance_service.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" - "neo-code/internal/config" "neo-code/internal/runtime/acceptance" "neo-code/internal/runtime/controlplane" "neo-code/internal/runtime/decider" @@ -356,11 +355,3 @@ func toSessionTodos(snapshot decider.TodoSnapshot) []agentsession.TodoItem { } return out } - -func resolveAcceptanceMaxNoProgress(cfg config.VerificationConfig) int { - limit := cfg.MaxNoProgress - if limit <= 0 { - return 3 - } - return limit -} diff --git a/internal/runtime/acceptance_service_test.go b/internal/runtime/acceptance_service_test.go index 77d2b5d4..c78cb6e4 100644 --- a/internal/runtime/acceptance_service_test.go +++ b/internal/runtime/acceptance_service_test.go @@ -420,9 +420,9 @@ func TestNoProgressThresholdProducesIncomplete(t *testing.T) { service := &Service{events: make(chan RuntimeEvent, 16)} state := newRunState("run-no-progress", agentsession.New("no-progress")) state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - state.finalInterceptStreak = 3 + state.finalInterceptStreak = config.DefaultMaxNoProgressStreak state.mustUseToolAfterFinalContinue = true - state.noToolAfterFinalContinueStreak = 3 + state.noToolAfterFinalContinueStreak = config.DefaultMaxNoProgressStreak snapshot := TurnBudgetSnapshot{Config: config.StaticDefaults().Clone(), Workdir: t.TempDir()} decision, err := service.beforeAcceptFinal( diff --git a/internal/runtime/acceptgate/checks.go b/internal/runtime/acceptgate/checks.go new file mode 100644 index 00000000..60031537 --- /dev/null +++ b/internal/runtime/acceptgate/checks.go @@ -0,0 +1,233 @@ +package acceptgate + +import ( + "path/filepath" + "strings" + + agentsession "neo-code/internal/session" +) + +func checkRequiredTodoFailures(todos []agentsession.TodoItem) CheckResult { + for _, todo := range todos { + if !todo.RequiredValue() { + continue + } + if todo.Status == agentsession.TodoStatusFailed { + return CheckResult{ + Passed: false, + Name: "required_todo_failed", + Reason: "required todo failed: " + strings.TrimSpace(todo.ID), + } + } + } + return CheckResult{Passed: true, Name: "required_todo_failed"} +} + +func checkRequiredTodoConvergence(todos []agentsession.TodoItem) CheckResult { + for _, todo := range todos { + if !todo.RequiredValue() { + continue + } + if !todo.Status.IsTerminal() { + return CheckResult{ + Passed: false, + Name: "required_todo_convergence", + Reason: "required todo is not terminal: " + strings.TrimSpace(todo.ID), + } + } + } + return CheckResult{Passed: true, Name: "required_todo_convergence"} +} + +func evaluateAcceptCheck(input Input, check agentsession.AcceptCheck) CheckResult { + check.Kind = strings.TrimSpace(check.Kind) + check.Target = strings.TrimSpace(check.Target) + switch check.Kind { + case agentsession.AcceptCheckOutputOnly: + return checkOutputOnly(input, check) + case agentsession.AcceptCheckWorkspaceChange: + return checkWorkspaceChange(input, check) + case agentsession.AcceptCheckCommandSuccess: + return checkCommandSuccess(input, check) + case agentsession.AcceptCheckFileExists: + return checkFileExists(input, check) + case agentsession.AcceptCheckContentContains: + return checkContentContains(input, check) + case agentsession.AcceptCheckToolFact: + return checkToolFact(input, check) + default: + return CheckResult{ + Passed: false, + Name: checkName(check), + Kind: check.Kind, + Target: check.Target, + Reason: "unknown required accept check kind", + } + } +} + +func checkOutputOnly(input Input, check agentsession.AcceptCheck) CheckResult { + if strings.TrimSpace(input.LastAssistantText) != "" { + return pass(check) + } + return fail(check, "assistant output is empty") +} + +func checkWorkspaceChange(input Input, check agentsession.AcceptCheck) CheckResult { + if len(input.Facts.Files.Written) > 0 { + return pass(check) + } + for _, item := range input.Facts.Files.Exists { + switch strings.TrimSpace(item.Source) { + case "filesystem_write_file", "filesystem_write_file_noop", "filesystem_edit", "bash", "workspace_write": + return pass(check) + } + } + return fail(check, "missing workspace change evidence") +} + +func checkCommandSuccess(input Input, check agentsession.AcceptCheck) CheckResult { + target := normalizeCommand(check.Target) + if target == "" { + return fail(check, "command target is empty") + } + for _, fact := range input.Facts.Commands.Executed { + if !fact.Succeeded { + continue + } + if commandMatches(normalizeCommand(fact.Command), target, check.Match) { + return pass(check) + } + } + return fail(check, "missing successful command evidence") +} + +func checkFileExists(input Input, check agentsession.AcceptCheck) CheckResult { + target := normalizePath(check.Target) + if target == "" { + return fail(check, "file target is empty") + } + for _, fact := range input.Facts.Files.Exists { + if normalizePath(fact.Path) == target { + return pass(check) + } + } + for _, fact := range input.Facts.Files.Written { + if normalizePath(fact.Path) == target { + return pass(check) + } + } + return fail(check, "missing file existence evidence") +} + +func checkContentContains(input Input, check agentsession.AcceptCheck) CheckResult { + target := normalizePath(check.Target) + if target == "" { + return fail(check, "content target is empty") + } + for _, fact := range input.Facts.Files.ContentMatch { + if normalizePath(fact.Path) != target || !fact.VerificationPassed { + continue + } + if expected := strings.TrimSpace(check.Params["contains"]); expected != "" { + if !containsString(fact.ExpectedContains, expected) { + continue + } + } + return pass(check) + } + return fail(check, "missing content match evidence") +} + +func checkToolFact(input Input, check agentsession.AcceptCheck) CheckResult { + scope := strings.TrimSpace(firstNonEmpty(check.Params["scope"], check.Target)) + tool := strings.TrimSpace(check.Params["tool"]) + for _, fact := range input.Facts.Verification.Passed { + if tool != "" && !strings.EqualFold(strings.TrimSpace(fact.Tool), tool) { + continue + } + if scope != "" && strings.TrimSpace(fact.Scope) != scope { + continue + } + return pass(check) + } + return fail(check, "missing tool verification fact") +} + +func pass(check agentsession.AcceptCheck) CheckResult { + return CheckResult{Passed: true, Name: checkName(check), Kind: check.Kind, Target: check.Target} +} + +func fail(check agentsession.AcceptCheck, reason string) CheckResult { + return CheckResult{Passed: false, Name: checkName(check), Kind: check.Kind, Target: check.Target, Reason: reason} +} + +func checkName(check agentsession.AcceptCheck) string { + if id := strings.TrimSpace(check.ID); id != "" { + return id + } + if kind := strings.TrimSpace(check.Kind); kind != "" { + return kind + } + return "accept_check" +} + +func commandMatches(actual, target, mode string) bool { + switch strings.TrimSpace(strings.ToLower(mode)) { + case "exact": + return actual == target + case "prefix": + return strings.HasPrefix(actual, target) + case "contains", "normalized_contains", "": + return actual == target || strings.Contains(actual, target) + default: + return actual == target || strings.Contains(actual, target) + } +} + +func normalizeCommand(value string) string { + value = strings.TrimSpace(value) + if value == "" { + return "" + } + fields := strings.Fields(value) + out := make([]string, 0, len(fields)) + for _, field := range fields { + if strings.Contains(field, "=") && !strings.Contains(field, "/") && !strings.Contains(field, "\\") { + continue + } + if strings.HasPrefix(strings.ToLower(field), "$env:") { + continue + } + out = append(out, field) + } + return strings.ToLower(strings.Join(out, " ")) +} + +func normalizePath(value string) string { + value = strings.TrimSpace(value) + if value == "" { + return "" + } + cleaned := filepath.ToSlash(filepath.Clean(value)) + cleaned = strings.TrimPrefix(cleaned, "./") + return strings.ToLower(cleaned) +} + +func containsString(values []string, target string) bool { + for _, value := range values { + if strings.TrimSpace(value) == target { + return true + } + } + return false +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if strings.TrimSpace(value) != "" { + return value + } + } + return "" +} diff --git a/internal/runtime/acceptgate/gate.go b/internal/runtime/acceptgate/gate.go new file mode 100644 index 00000000..1cb3870f --- /dev/null +++ b/internal/runtime/acceptgate/gate.go @@ -0,0 +1,121 @@ +package acceptgate + +import ( + "context" + "fmt" + "strings" + + "neo-code/internal/runtime/controlplane" + runtimefacts "neo-code/internal/runtime/facts" + agentsession "neo-code/internal/session" +) + +// Outcome 表示 Accept Gate 的二元终态结果。 +type Outcome string + +const ( + // OutcomeAccepted 表示所有必需验收项均已满足。 + OutcomeAccepted Outcome = "accepted" + // OutcomeFailed 表示至少一个必需验收项缺少运行期证据或状态未收敛。 + OutcomeFailed Outcome = "failed" +) + +// Input 汇总最终验收所需的运行期事实和 plan 状态。 +type Input struct { + PlanVerify agentsession.AcceptChecks + Facts runtimefacts.RuntimeFacts + Todos []agentsession.TodoItem + LastAssistantText string +} + +// CheckResult 描述单个验收项的判定结果。 +type CheckResult struct { + Passed bool `json:"passed"` + Name string `json:"name"` + Kind string `json:"kind,omitempty"` + Target string `json:"target,omitempty"` + Reason string `json:"reason,omitempty"` +} + +// Report 描述 Accept Gate 的完整判定报告。 +type Report struct { + Outcome Outcome `json:"status"` + StopReason controlplane.StopReason `json:"stop_reason,omitempty"` + Summary string `json:"summary,omitempty"` + Results []CheckResult `json:"results,omitempty"` +} + +// Evaluate 按固定顺序检查 plan-owned todo 与 Plan.Verify 运行期证据。 +func Evaluate(ctx context.Context, input Input) Report { + if err := ctx.Err(); err != nil { + return Report{ + Outcome: OutcomeFailed, + StopReason: controlplane.StopReasonFatalError, + Summary: err.Error(), + } + } + + report := Report{ + Outcome: OutcomeAccepted, + StopReason: controlplane.StopReasonAccepted, + } + + report.add(checkRequiredTodoFailures(input.Todos)) + report.add(checkRequiredTodoConvergence(input.Todos)) + + checks := input.PlanVerify.Normalize() + if len(checks) == 0 { + checks = agentsession.AcceptChecks{{Kind: agentsession.AcceptCheckOutputOnly, Required: true}} + } + for _, check := range checks { + report.add(evaluateAcceptCheck(input, check)) + } + report.finalize() + return report +} + +func (r *Report) add(result CheckResult) { + if strings.TrimSpace(result.Name) == "" { + return + } + r.Results = append(r.Results, result) + if result.Passed { + return + } + r.Outcome = OutcomeFailed + switch result.Name { + case "required_todo_failed": + r.StopReason = controlplane.StopReasonRequiredTodoFailed + case "required_todo_convergence": + if r.StopReason != controlplane.StopReasonRequiredTodoFailed { + r.StopReason = controlplane.StopReasonTodoNotConverged + } + default: + if r.StopReason == "" || r.StopReason == controlplane.StopReasonAccepted { + r.StopReason = controlplane.StopReasonAcceptCheckFailed + } + } +} + +func (r *Report) finalize() { + if r.Outcome == OutcomeAccepted { + r.StopReason = controlplane.StopReasonAccepted + r.Summary = "acceptance checks passed" + return + } + if r.StopReason == "" || r.StopReason == controlplane.StopReasonAccepted { + r.StopReason = controlplane.StopReasonAcceptCheckFailed + } + failures := make([]string, 0, len(r.Results)) + for _, result := range r.Results { + if result.Passed { + continue + } + reason := strings.TrimSpace(result.Reason) + if reason == "" { + reason = "failed" + } + failures = append(failures, fmt.Sprintf("%s: %s", result.Name, reason)) + } + r.Summary = strings.Join(failures, "; ") +} diff --git a/internal/runtime/acceptgate/gate_test.go b/internal/runtime/acceptgate/gate_test.go new file mode 100644 index 00000000..d22b617b --- /dev/null +++ b/internal/runtime/acceptgate/gate_test.go @@ -0,0 +1,150 @@ +package acceptgate + +import ( + "context" + "testing" + + "neo-code/internal/runtime/controlplane" + runtimefacts "neo-code/internal/runtime/facts" + agentsession "neo-code/internal/session" +) + +func TestEvaluateFallbackOutputOnly(t *testing.T) { + t.Parallel() + + report := Evaluate(context.Background(), Input{LastAssistantText: "done"}) + if report.Outcome != OutcomeAccepted || report.StopReason != controlplane.StopReasonAccepted { + t.Fatalf("report = %+v, want accepted", report) + } + + report = Evaluate(context.Background(), Input{}) + if report.Outcome != OutcomeFailed || report.StopReason != controlplane.StopReasonAcceptCheckFailed { + t.Fatalf("report = %+v, want accept_check_failed", report) + } +} + +func TestEvaluateCommandSuccess(t *testing.T) { + t.Parallel() + + input := Input{ + PlanVerify: agentsession.AcceptChecks{{Kind: agentsession.AcceptCheckCommandSuccess, Target: "go test ./..."}}, + Facts: runtimefacts.RuntimeFacts{ + Commands: runtimefacts.CommandFacts{Executed: []runtimefacts.CommandFact{ + {Tool: "bash", Command: "GOFLAGS=-count=1 go test ./...", Succeeded: true}, + }}, + }, + LastAssistantText: "done", + } + if report := Evaluate(context.Background(), input); report.Outcome != OutcomeAccepted { + t.Fatalf("report = %+v, want accepted", report) + } + + input.Facts.Commands.Executed[0].Succeeded = false + if report := Evaluate(context.Background(), input); report.Outcome != OutcomeFailed { + t.Fatalf("report = %+v, want failed", report) + } +} + +func TestEvaluateWorkspaceChangeUsesRuntimeFactsOnly(t *testing.T) { + t.Parallel() + + input := Input{ + PlanVerify: agentsession.AcceptChecks{{Kind: agentsession.AcceptCheckWorkspaceChange}}, + Facts: runtimefacts.RuntimeFacts{ + Files: runtimefacts.FileFacts{Written: []runtimefacts.FileWriteFact{{Path: "internal/foo.go"}}}, + }, + LastAssistantText: "done", + } + if report := Evaluate(context.Background(), input); report.Outcome != OutcomeAccepted { + t.Fatalf("written fact report = %+v, want accepted", report) + } + + input.Facts.Files.Written = nil + input.Facts.Files.Exists = []runtimefacts.FileExistFact{{Path: "internal/foo.go", Source: "filesystem_write_file"}} + if report := Evaluate(context.Background(), input); report.Outcome != OutcomeAccepted { + t.Fatalf("write-source exists report = %+v, want accepted", report) + } + + input.Facts.Files.Exists = []runtimefacts.FileExistFact{{Path: "internal/foo.go", Source: "filesystem_read_file"}} + if report := Evaluate(context.Background(), input); report.Outcome != OutcomeFailed { + t.Fatalf("read-only fact report = %+v, want failed", report) + } +} + +func TestEvaluateFileAndContentFacts(t *testing.T) { + t.Parallel() + + input := Input{ + PlanVerify: agentsession.AcceptChecks{ + {Kind: agentsession.AcceptCheckFileExists, Target: "./README.md"}, + {Kind: agentsession.AcceptCheckContentContains, Target: "README.md", Params: map[string]string{"contains": "NeoCode"}}, + }, + Facts: runtimefacts.RuntimeFacts{ + Files: runtimefacts.FileFacts{ + Exists: []runtimefacts.FileExistFact{{Path: "README.md", Source: "filesystem_read_file"}}, + ContentMatch: []runtimefacts.FileContentMatchFact{{ + Path: "README.md", + ExpectedContains: []string{"NeoCode"}, + VerificationPassed: true, + }}, + }, + }, + LastAssistantText: "done", + } + if report := Evaluate(context.Background(), input); report.Outcome != OutcomeAccepted { + t.Fatalf("report = %+v, want accepted", report) + } + + input.Facts.Files.ContentMatch[0].VerificationPassed = false + report := Evaluate(context.Background(), input) + if report.Outcome != OutcomeFailed || len(report.Results) != 4 { + t.Fatalf("report = %+v, want failed with all results", report) + } +} + +func TestEvaluateToolFactAndUnknownKind(t *testing.T) { + t.Parallel() + + input := Input{ + PlanVerify: agentsession.AcceptChecks{ + {Kind: agentsession.AcceptCheckToolFact, Params: map[string]string{"tool": "bash", "scope": "test"}}, + {Kind: "future_check"}, + }, + Facts: runtimefacts.RuntimeFacts{ + Verification: runtimefacts.VerificationFacts{ + Passed: []runtimefacts.VerificationFact{{Tool: "bash", Scope: "test"}}, + }, + }, + LastAssistantText: "done", + } + report := Evaluate(context.Background(), input) + if report.Outcome != OutcomeFailed || report.StopReason != controlplane.StopReasonAcceptCheckFailed { + t.Fatalf("report = %+v, want unknown kind failure", report) + } + if report.Results[len(report.Results)-1].Reason != "unknown required accept check kind" { + t.Fatalf("last result = %+v, want unknown kind reason", report.Results[len(report.Results)-1]) + } +} + +func TestEvaluateTodoPriority(t *testing.T) { + t.Parallel() + + required := true + input := Input{ + PlanVerify: agentsession.AcceptChecks{{Kind: agentsession.AcceptCheckOutputOnly}}, + LastAssistantText: "done", + Todos: []agentsession.TodoItem{ + {ID: "todo-1", Status: agentsession.TodoStatusFailed, Required: &required}, + }, + } + report := Evaluate(context.Background(), input) + if report.Outcome != OutcomeFailed || report.StopReason != controlplane.StopReasonRequiredTodoFailed { + t.Fatalf("failed todo report = %+v, want required_todo_failed", report) + } + + input.Todos[0].Status = agentsession.TodoStatusPending + report = Evaluate(context.Background(), input) + if report.Outcome != OutcomeFailed || report.StopReason != controlplane.StopReasonTodoNotConverged { + t.Fatalf("pending todo report = %+v, want todo_not_converged", report) + } +} diff --git a/internal/runtime/acceptgate_runtime.go b/internal/runtime/acceptgate_runtime.go new file mode 100644 index 00000000..4ba00269 --- /dev/null +++ b/internal/runtime/acceptgate_runtime.go @@ -0,0 +1,110 @@ +package runtime + +import ( + "context" + "strings" + + providertypes "neo-code/internal/provider/types" + "neo-code/internal/runtime/acceptance" + "neo-code/internal/runtime/acceptgate" + runtimefacts "neo-code/internal/runtime/facts" + agentsession "neo-code/internal/session" +) + +const completionProtocolReminder = "[Runtime Control]\n你当前没有调用工具,也没有输出 task_completion。若任务已完成,请按结构化完成信号结束;否则继续调用工具推进。" + +// evaluateAcceptGate 从运行态提取事实快照,并执行最终 Accept Gate。 +func (s *Service) evaluateAcceptGate(ctx context.Context, state *runState, assistantMessage providertypes.Message) acceptgate.Report { + if state == nil { + return acceptgate.Evaluate(ctx, acceptgate.Input{}) + } + state.mu.Lock() + var planVerify agentsession.AcceptChecks + var currentPlan *agentsession.PlanArtifact + if state.session.CurrentPlan != nil { + currentPlan = state.session.CurrentPlan.Clone() + planVerify = currentPlan.Summary.Verify.Clone() + if len(planVerify) == 0 { + planVerify = currentPlan.Spec.Verify.Clone() + } + } + todos := selectPlanOwnedTodos(currentPlan, cloneTodosForPersistence(state.session.Todos)) + factsSnapshot := runtimefacts.RuntimeFacts{} + if state.factsCollector != nil { + factsSnapshot = state.factsCollector.Snapshot() + } + state.mu.Unlock() + + return acceptgate.Evaluate(ctx, acceptgate.Input{ + PlanVerify: planVerify, + Facts: factsSnapshot, + Todos: todos, + LastAssistantText: renderAssistantTextWithoutCompletion(assistantMessage), + }) +} + +// selectPlanOwnedTodos 只把当前计划显式拥有的 todo 交给终态验收,避免无 plan 的 chat/read-only 被旧 todo 污染。 +func selectPlanOwnedTodos(plan *agentsession.PlanArtifact, todos []agentsession.TodoItem) []agentsession.TodoItem { + if plan == nil || len(todos) == 0 { + return nil + } + owned := make(map[string]struct{}) + for _, id := range plan.Summary.ActiveTodoIDs { + id = strings.TrimSpace(id) + if id != "" { + owned[id] = struct{}{} + } + } + for _, todo := range plan.Spec.Todos { + id := strings.TrimSpace(todo.ID) + if id != "" { + owned[id] = struct{}{} + } + } + if len(owned) == 0 { + return nil + } + selected := make([]agentsession.TodoItem, 0, len(todos)) + for _, todo := range todos { + if _, ok := owned[strings.TrimSpace(todo.ID)]; ok { + selected = append(selected, todo) + } + } + return selected +} + +// emitAcceptGateReport 将 Accept Gate 报告发布为统一 acceptance_decided 事件。 +func (s *Service) emitAcceptGateReport(state *runState, report acceptgate.Report) { + status := acceptance.AcceptanceFailed + if report.Outcome == acceptgate.OutcomeAccepted { + status = acceptance.AcceptanceAccepted + } + s.emitRunScopedOptional(EventAcceptanceDecided, state, AcceptanceDecidedPayload{ + Status: status, + StopReason: report.StopReason, + Summary: report.Summary, + Results: append([]acceptgate.CheckResult(nil), report.Results...), + }) +} + +func renderAssistantTextWithoutCompletion(message providertypes.Message) string { + text := strings.TrimSpace(renderPartsForVerification(message.Parts)) + if text == "" { + return "" + } + candidate, ok := extractPlanningJSONObjectIfPresent(text, "task_completion") + if !ok { + return text + } + return strings.TrimSpace(stripPlanningJSONObjectText(text, candidate)) +} + +// stripCompletionSignalFromAssistantMessage 移除仅供 runtime 控制使用的 task_completion JSON,保留用户可见回复。 +func stripCompletionSignalFromAssistantMessage(message providertypes.Message) providertypes.Message { + text := renderAssistantTextWithoutCompletion(message) + if strings.TrimSpace(text) == strings.TrimSpace(renderPartsForVerification(message.Parts)) { + return message + } + message.Parts = []providertypes.ContentPart{providertypes.NewTextPart(text)} + return message +} diff --git a/internal/runtime/controlplane/progress.go b/internal/runtime/controlplane/progress.go index 7a74438a..0e9c4cdb 100644 --- a/internal/runtime/controlplane/progress.go +++ b/internal/runtime/controlplane/progress.go @@ -73,6 +73,8 @@ type ProgressScore struct { SameSubgoal SubgoalRelation `json:"same_subgoal"` StalledProgressState StalledProgressState `json:"stalled_progress_state"` ReminderKind ReminderKind `json:"reminder_kind,omitempty"` + ShouldTerminate bool `json:"should_terminate"` + TerminateReason StopReason `json:"terminate_reason,omitempty"` } // ProgressState 保存跨轮 progress 判定所需的历史快照。 @@ -141,6 +143,14 @@ func EvaluateProgress(state ProgressState, input ProgressInput) ProgressState { next.StalledProgressState = StalledProgressHealthy next.ReminderKind = ReminderKindNone } + if input.NoProgressLimit > 0 && next.NoProgressStreak >= input.NoProgressLimit { + next.ShouldTerminate = true + next.TerminateReason = StopReasonNoProgress + } + if input.RepeatCycleLimit > 0 && next.RepeatCycleStreak >= input.RepeatCycleLimit { + next.ShouldTerminate = true + next.TerminateReason = StopReasonRepeatCycle + } return ProgressState{ LastScore: next, diff --git a/internal/runtime/controlplane/stop_reason.go b/internal/runtime/controlplane/stop_reason.go index 737ae17e..f3dc5842 100644 --- a/internal/runtime/controlplane/stop_reason.go +++ b/internal/runtime/controlplane/stop_reason.go @@ -16,12 +16,20 @@ const ( StopReasonVerificationFailed StopReason = "verification_failed" // StopReasonAccepted 表示 completion gate 与 verifier gate 均通过并完成收尾。 StopReasonAccepted StopReason = "accepted" + // StopReasonMissingCompletionSignal 表示模型停止调用工具但没有输出结构化完成信号。 + StopReasonMissingCompletionSignal StopReason = "missing_completion_signal" + // StopReasonAcceptCheckFailed 表示最终 Accept Gate 的验收项失败。 + StopReasonAcceptCheckFailed StopReason = "accept_check_failed" // StopReasonTodoNotConverged 表示 required todo 尚未收敛。 StopReasonTodoNotConverged StopReason = "todo_not_converged" // StopReasonTodoWaitingExternal 表示 required todo 仍在等待外部条件。 StopReasonTodoWaitingExternal StopReason = "todo_waiting_external" // StopReasonNoProgressAfterFinalIntercept 表示 final 连续被拦截且没有新进展。 StopReasonNoProgressAfterFinalIntercept StopReason = "no_progress_after_final_intercept" + // StopReasonNoProgress 表示运行连续缺少实质进展并触发硬终止。 + StopReasonNoProgress StopReason = "no_progress" + // StopReasonRepeatCycle 表示运行重复相同动作/结果并触发硬终止。 + StopReasonRepeatCycle StopReason = "repeat_cycle" // StopReasonMaxTurnExceededWithUnconvergedTodos 表示达到最大轮次时 todo 仍未收敛。 StopReasonMaxTurnExceededWithUnconvergedTodos StopReason = "max_turn_exceeded_with_unconverged_todos" // StopReasonMaxTurnExceededWithFailedVerification 表示达到最大轮次时 verifier 已失败。 diff --git a/internal/runtime/events.go b/internal/runtime/events.go index 55b62086..c78763b3 100644 --- a/internal/runtime/events.go +++ b/internal/runtime/events.go @@ -4,6 +4,7 @@ import ( "time" "neo-code/internal/runtime/acceptance" + "neo-code/internal/runtime/acceptgate" "neo-code/internal/runtime/controlplane" "neo-code/internal/runtime/verify" ) @@ -102,6 +103,8 @@ type AcceptanceDecidedPayload struct { UserVisibleSummary string `json:"user_visible_summary,omitempty"` InternalSummary string `json:"internal_summary,omitempty"` ContinueHint string `json:"continue_hint,omitempty"` + Summary string `json:"summary,omitempty"` + Results []acceptgate.CheckResult `json:"results,omitempty"` } // LedgerReconciledPayload 为账本对账预留负载。 diff --git a/internal/runtime/final_acceptance.go b/internal/runtime/final_acceptance.go index 28e89b0f..c55353e8 100644 --- a/internal/runtime/final_acceptance.go +++ b/internal/runtime/final_acceptance.go @@ -31,7 +31,7 @@ func (s *Service) beforeAcceptFinal( return acceptance.AcceptanceDecision{}, nil } - maxNoProgress := resolveAcceptanceMaxNoProgress(snapshot.Config.Runtime.Verification) + maxNoProgress := resolveNoProgressStreakLimit(snapshot.Config.Runtime) noProgressStreak := state.finalInterceptStreak if noProgressStreak < 0 { noProgressStreak = 0 @@ -349,10 +349,7 @@ func (s *Service) beforeAcceptFinalLegacy( } engine := acceptance.NewEngine(policy) - maxNoProgress := verificationCfg.MaxNoProgress - if maxNoProgress <= 0 { - maxNoProgress = 3 - } + maxNoProgress := resolveNoProgressStreakLimit(snapshot.Config.Runtime) noProgressStreak := state.finalInterceptStreak if noProgressStreak < 0 { noProgressStreak = 0 diff --git a/internal/runtime/final_acceptance_test.go b/internal/runtime/final_acceptance_test.go index e08b1096..aa3f229d 100644 --- a/internal/runtime/final_acceptance_test.go +++ b/internal/runtime/final_acceptance_test.go @@ -91,7 +91,7 @@ func TestBeforeAcceptFinalDecisionPaths(t *testing.T) { t.Parallel() state := newRunState("run-incomplete", agentsession.New("incomplete")) required := true - state.finalInterceptStreak = snapshot.Config.Runtime.Verification.MaxNoProgress + state.finalInterceptStreak = snapshot.Config.Runtime.MaxNoProgressStreak state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly state.session.Todos = []agentsession.TodoItem{ {ID: "todo-1", Content: "do work", Status: agentsession.TodoStatusPending, Required: &required}, diff --git a/internal/runtime/hooks_integration_test.go b/internal/runtime/hooks_integration_test.go index 3b98eba4..4fcab3a9 100644 --- a/internal/runtime/hooks_integration_test.go +++ b/internal/runtime/hooks_integration_test.go @@ -266,13 +266,9 @@ func TestRunBeforeCompletionDecisionHookBlockIsObservedOnly(t *testing.T) { if eventIndex(events, EventHookBlocked) >= 0 { t.Fatalf("before_completion_decision should not emit hook_blocked when point is observe-only") } - assertEventContains(t, events, EventHookFinished) assertEventContains(t, events, EventAgentDone) - if eventIndex(events, EventHookFinished) > eventIndex(events, EventVerificationStarted) { - t.Fatalf("before_completion_decision hook should finish before verification_started") - } - if capturedWorkdir == "" { - t.Fatalf("expected before_completion_decision hook metadata to include workdir") + if capturedWorkdir != "" { + t.Fatalf("before_completion_decision hook should not run as an authoritative terminal gate") } } diff --git a/internal/runtime/planning.go b/internal/runtime/planning.go index a906951a..14d463cd 100644 --- a/internal/runtime/planning.go +++ b/internal/runtime/planning.go @@ -18,11 +18,11 @@ const ( ) type summaryCandidate struct { - Goal string `json:"goal"` - KeySteps []string `json:"key_steps"` - Constraints []string `json:"constraints"` - Verify []string `json:"verify"` - ActiveTodoIDs []string `json:"active_todo_ids"` + Goal string `json:"goal"` + KeySteps []string `json:"key_steps"` + Constraints []string `json:"constraints"` + Verify agentsession.AcceptChecks `json:"verify"` + ActiveTodoIDs []string `json:"active_todo_ids"` } type planTurnOutput struct { @@ -119,7 +119,7 @@ func normalizeSummaryCandidate(candidate summaryCandidate) agentsession.SummaryV Goal: strings.TrimSpace(candidate.Goal), KeySteps: append([]string(nil), candidate.KeySteps...), Constraints: append([]string(nil), candidate.Constraints...), - Verify: append([]string(nil), candidate.Verify...), + Verify: candidate.Verify.Clone(), ActiveTodoIDs: append([]string(nil), candidate.ActiveTodoIDs...), } } diff --git a/internal/runtime/planning_test.go b/internal/runtime/planning_test.go index 5bcd873e..bd7e9b63 100644 --- a/internal/runtime/planning_test.go +++ b/internal/runtime/planning_test.go @@ -267,14 +267,14 @@ func TestBuildPlanArtifact(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "旧计划", Steps: []string{"旧步骤"}, - Verify: []string{"旧验证"}, + Verify: acceptText("旧验证"), }, } output := planTurnOutput{ PlanSpec: agentsession.PlanSpec{ Goal: "新计划", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), Todos: []agentsession.TodoItem{ {ID: "todo-1", Content: "待办", Status: agentsession.TodoStatusPending}, }, @@ -282,7 +282,7 @@ func TestBuildPlanArtifact(t *testing.T) { SummaryCandidate: summaryCandidate{ Goal: "新计划", KeySteps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), ActiveTodoIDs: []string{"todo-1"}, }, } @@ -319,7 +319,7 @@ func TestMarkCurrentPlanCompleted(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "执行当前计划", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, } if !markCurrentPlanCompleted(&session, true) { @@ -342,7 +342,7 @@ func TestMarkCurrentPlanCompleted(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "草案计划", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, } if markCurrentPlanCompleted(&session, false) { @@ -361,7 +361,7 @@ func TestPlanningNeedsFullPlan(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "Use full plan when alignment is pending", Steps: []string{"align plan"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), Todos: []agentsession.TodoItem{ {ID: "todo-1", Content: "align plan", Status: agentsession.TodoStatusPending}, }, @@ -369,7 +369,7 @@ func TestPlanningNeedsFullPlan(t *testing.T) { Summary: agentsession.SummaryView{ Goal: "Use full plan when alignment is pending", KeySteps: []string{"align plan"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), ActiveTodoIDs: []string{"todo-1"}, }, } @@ -416,7 +416,7 @@ func TestApproveCurrentPlan(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "批准当前计划", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, } if err := approveCurrentPlan(&session, "plan-approve", 3); err != nil { @@ -441,7 +441,7 @@ func TestRememberFullPlanRevisionClearsAlignmentFlags(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "完成全文对齐", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, } session.PlanApprovalPendingFullAlign = true @@ -479,7 +479,7 @@ func TestMarkCurrentPlanRestorePendingAndContextDirty(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "restore full plan", Steps: []string{"step one"}, - Verify: []string{"verify one"}, + Verify: acceptText("verify one"), }, } if !markCurrentPlanRestorePending(&session) { @@ -535,7 +535,7 @@ func TestApproveCurrentPlanValidationErrors(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "审批校验", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, } diff --git a/internal/runtime/run.go b/internal/runtime/run.go index c4bdb542..09e786f5 100644 --- a/internal/runtime/run.go +++ b/internal/runtime/run.go @@ -17,7 +17,7 @@ import ( "neo-code/internal/promptasset" "neo-code/internal/provider" providertypes "neo-code/internal/provider/types" - "neo-code/internal/runtime/acceptance" + "neo-code/internal/runtime/acceptgate" "neo-code/internal/runtime/controlplane" runtimehooks "neo-code/internal/runtime/hooks" "neo-code/internal/runtime/streaming" @@ -314,15 +314,6 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { return s.handleRunError(err) } hasToolCalls := len(turnOutput.assistant.ToolCalls) > 0 - state.mu.Lock() - if hasToolCalls { - state.mustUseToolAfterFinalContinue = false - state.noToolAfterFinalContinueStreak = 0 - } else if state.mustUseToolAfterFinalContinue { - state.pendingFinalProgress = false - state.noToolAfterFinalContinueStreak++ - } - state.mu.Unlock() if hasToolCalls { if err := s.appendAssistantMessageAndSave( ctx, @@ -360,7 +351,7 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { turnOutput.assistant, hasToolCalls, ) - completionState, completed := controlplane.EvaluateCompletion( + completionState, _ := controlplane.EvaluateCompletion( state.completion, hasToolCalls, ) @@ -396,101 +387,83 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { s.emitRunScoped(ctx, EventAgentDone, &state, planMessage) return nil } + if strings.TrimSpace(renderPartsForVerification(turnOutput.assistant.Parts)) != "" { + if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, turnOutput.assistant); err != nil { + return s.handleRunError(err) + } + s.emitRunScoped(ctx, EventAgentDone, &state, turnOutput.assistant) + return nil + } } completionSignaled, err := maybeParseCompletionTurnOutput(turnOutput.assistant) if err != nil { return s.handleRunError(err) } + if !completionSignaled { + state.mu.Lock() + state.missingCompletionSignalStreak++ + alreadyHinted := state.completionProtocolHinted + if !alreadyHinted { + state.completionProtocolHinted = true + } + state.mu.Unlock() + if !alreadyHinted { + if err := s.appendSystemMessageAndSave(ctx, &state, completionProtocolReminder); err != nil { + return s.handleRunError(err) + } + break turnAttempt + } + state.markTerminalDecision( + controlplane.TerminalStatusIncomplete, + controlplane.StopReasonMissingCompletionSignal, + "assistant stopped without task_completion", + ) + s.emitRunScoped(ctx, EventAgentDone, &state, turnOutput.assistant) + return nil + } + if err := s.setBaseRunState(ctx, &state, controlplane.RunStateVerify); err != nil { return s.handleRunError(err) } s.updateResumeCheckpoint(ctx, &state, "verify", "completed") - acceptanceDecision, err := s.runBeforeCompletionDecisionAcceptance( - ctx, - &state, - snapshot, - turnOutput.assistant, - snapshot.Workdir, - completed, - hasToolCalls, - turnOutput.assistant.Role, - ) - if err != nil { - return s.handleRunError(err) - } - s.emitAcceptanceDecisionEvents(&state, acceptanceDecision) - applyAcceptanceResultProgress(&state, acceptanceDecision) - - switch acceptanceDecision.Status { - case acceptance.AcceptanceAccepted: - state.lastAcceptanceBlockedReason = "" - state.mustUseToolAfterFinalContinue = false - state.noToolAfterFinalContinueStreak = 0 + report := s.evaluateAcceptGate(ctx, &state, turnOutput.assistant) + s.emitAcceptGateReport(&state, report) + assistantForFinal := stripCompletionSignalFromAssistantMessage(turnOutput.assistant) + if report.Outcome == acceptgate.OutcomeAccepted { + state.mu.Lock() + state.missingCompletionSignalStreak = 0 + state.completionProtocolHinted = false + state.mu.Unlock() if markCurrentPlanCompleted(&state.session, completionSignaled) { state.touchSession() if err := s.sessionStore.UpdateSessionState(ctx, sessionStateInputFromSession(state.session)); err != nil { return s.handleRunError(err) } } - if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, turnOutput.assistant); err != nil { + if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, assistantForFinal); err != nil { return s.handleRunError(err) } s.emitRunScopedOptional(EventVerificationCompleted, &state, VerificationCompletedPayload{ - StopReason: acceptanceDecision.StopReason, + StopReason: report.StopReason, }) - recordAcceptanceTerminal(&state, acceptanceDecision) - s.emitRunScoped(ctx, EventAgentDone, &state, turnOutput.assistant) + state.markTerminalDecision(controlplane.TerminalStatusCompleted, report.StopReason, report.Summary) + s.emitRunScoped(ctx, EventAgentDone, &state, assistantForFinal) s.triggerMemoExtraction(state.session.ID, state.session.Messages, state.rememberedThisRun) return nil - case acceptance.AcceptanceContinue: - state.lastAcceptanceBlockedReason = strings.TrimSpace(acceptanceDecision.CompletionBlockedReason) - state.mustUseToolAfterFinalContinue = true - if state.noToolAfterFinalContinueStreak == 0 { - state.noToolAfterFinalContinueStreak = 1 - } - reminder := strings.TrimSpace(buildAcceptanceContinueHint(acceptanceDecision)) - if reminder == "" { - reminder = finalContinueReminder - } - if err := s.appendSystemMessageAndSave(ctx, &state, reminder); err != nil { - return s.handleRunError(err) - } - break turnAttempt - case acceptance.AcceptanceIncomplete: - state.lastAcceptanceBlockedReason = "" - state.mustUseToolAfterFinalContinue = false - state.noToolAfterFinalContinueStreak = 0 - if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, turnOutput.assistant); err != nil { - return s.handleRunError(err) - } - recordAcceptanceTerminal(&state, acceptanceDecision) - s.emitRunScoped(ctx, EventAgentDone, &state, turnOutput.assistant) - return nil - case acceptance.AcceptanceFailed: - state.lastAcceptanceBlockedReason = "" - state.mustUseToolAfterFinalContinue = false - state.noToolAfterFinalContinueStreak = 0 - if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, turnOutput.assistant); err != nil { - return s.handleRunError(err) - } - s.emitRunScopedOptional(EventVerificationFailed, &state, VerificationFailedPayload{ - StopReason: acceptanceDecision.StopReason, - ErrorClass: acceptanceDecision.ErrorClass, - }) - recordAcceptanceTerminal(&state, acceptanceDecision) - s.emitRunScoped(ctx, EventAgentDone, &state, turnOutput.assistant) - return nil - default: - state.lastAcceptanceBlockedReason = "" - state.mustUseToolAfterFinalContinue = false - state.noToolAfterFinalContinueStreak = 0 - if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, turnOutput.assistant); err != nil { - return s.handleRunError(err) - } - recordAcceptanceTerminal(&state, acceptanceDecision) - s.emitRunScoped(ctx, EventAgentDone, &state, turnOutput.assistant) - return nil } + state.mu.Lock() + state.missingCompletionSignalStreak = 0 + state.completionProtocolHinted = false + state.mu.Unlock() + if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, assistantForFinal); err != nil { + return s.handleRunError(err) + } + s.emitRunScopedOptional(EventVerificationFailed, &state, VerificationFailedPayload{ + StopReason: report.StopReason, + }) + state.markTerminalDecision(controlplane.TerminalStatusFailed, report.StopReason, report.Summary) + s.emitRunScoped(ctx, EventAgentDone, &state, assistantForFinal) + return nil } beforeTask := state.session.TaskState.Clone() @@ -531,17 +504,18 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { ) state.progress = controlplane.EvaluateProgress(state.progress, progressInput) currentScore := state.progress.LastScore - if shouldPromotePendingFinalProgress( - currentScore, - summary, - state.completion, - state.lastAcceptanceBlockedReason, - ) { - state.pendingFinalProgress = true - } state.mu.Unlock() s.emitRunScoped(ctx, EventProgressEvaluated, &state, ProgressEvaluatedPayload{Score: currentScore}) + if currentScore.ShouldTerminate { + reason := currentScore.TerminateReason + if reason == "" { + reason = controlplane.StopReasonNoProgress + } + state.markTerminalDecision(controlplane.TerminalStatusIncomplete, reason, "progress hard stop") + s.emitRunScoped(ctx, EventAgentDone, &state, providertypes.Message{Role: providertypes.RoleAssistant}) + return nil + } if err := s.setBaseRunState(ctx, &state, controlplane.RunStateVerify); err != nil { return s.handleRunError(err) } diff --git a/internal/runtime/runtime_progress_test.go b/internal/runtime/runtime_progress_test.go index df2d56f2..6fc53d22 100644 --- a/internal/runtime/runtime_progress_test.go +++ b/internal/runtime/runtime_progress_test.go @@ -51,7 +51,7 @@ func TestProgressStreakNoLongerStopsRun(t *testing.T) { promptInjected = true } if call >= 5 { - events <- providertypes.NewTextDeltaStreamEvent("done") + events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\ndone") events <- providertypes.NewMessageDoneStreamEvent("stop", nil) return nil } @@ -84,17 +84,17 @@ func TestProgressStreakNoLongerStopsRun(t *testing.T) { } if err := service.Run(context.Background(), input); err != nil { - t.Fatalf("expected run success without no-progress hard stop, got %v", err) + t.Fatalf("expected run to stop cleanly on no-progress, got %v", err) } events := collectRuntimeEvents(service.Events()) - assertStopReasonDecided(t, events, controlplane.StopReasonAccepted, "") + assertStopReasonDecided(t, events, controlplane.StopReasonNoProgress, "") - if !promptInjected { - t.Error("expected self-healing prompt to be injected before repetitive no-progress turns") + if promptInjected { + t.Error("did not expect self-healing prompt injection after hard no-progress termination") } - if providerCalls != 5 { - t.Fatalf("expected 5 provider turns (4 tool cycles + done), got %d", providerCalls) + if providerCalls != 3 { + t.Fatalf("expected 3 provider turns before no-progress termination, got %d", providerCalls) } } @@ -138,7 +138,7 @@ func TestProgressEvidenceResetsNoProgressStreak(t *testing.T) { events <- providertypes.NewMessageDoneStreamEvent("tool_calls", nil) return nil } - events <- providertypes.NewTextDeltaStreamEvent("done") + events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\ndone") events <- providertypes.NewMessageDoneStreamEvent("stop", nil) return nil }, @@ -207,7 +207,7 @@ func TestRepeatCycleStreakNoLongerStopsRunAndInjectsReminder(t *testing.T) { promptInjected = true } if call >= 5 { - events <- providertypes.NewTextDeltaStreamEvent("done") + events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\ndone") events <- providertypes.NewMessageDoneStreamEvent("stop", nil) return nil } @@ -233,20 +233,20 @@ func TestRepeatCycleStreakNoLongerStopsRunAndInjectsReminder(t *testing.T) { Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger repeat loop")}, }) if err != nil { - t.Fatalf("expected run success without repeat hard stop, got %v", err) + t.Fatalf("expected run to stop cleanly on repeat-cycle, got %v", err) } events := collectRuntimeEvents(service.Events()) - assertStopReasonDecided(t, events, controlplane.StopReasonAccepted, "") + assertStopReasonDecided(t, events, controlplane.StopReasonRepeatCycle, "") - if !promptInjected { - t.Fatal("expected repeat self-healing prompt to be injected before repeat limit is reached") + if promptInjected { + t.Fatal("did not expect repeat self-healing prompt injection after hard repeat-cycle termination") } if executeCalls != 4 { - t.Fatalf("expected repeated tool executions to continue until model stops, got %d", executeCalls) + t.Fatalf("expected repeated tool executions to stop at repeat limit, got %d", executeCalls) } - if providerCalls != 5 { - t.Fatalf("expected 5 provider turns (4 tool cycles + done), got %d", providerCalls) + if providerCalls != 4 { + t.Fatalf("expected 4 provider turns before repeat-cycle termination, got %d", providerCalls) } } @@ -280,7 +280,7 @@ func TestRepeatCycleFailedCallsNoLongerHardStop(t *testing.T) { chatFn: func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { call := atomic.AddInt32(&providerCalls, 1) if call >= 5 { - events <- providertypes.NewTextDeltaStreamEvent("done") + events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\ndone") events <- providertypes.NewMessageDoneStreamEvent("stop", nil) return nil } @@ -306,13 +306,13 @@ func TestRepeatCycleFailedCallsNoLongerHardStop(t *testing.T) { Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger repeat fail loop")}, }) if err != nil { - t.Fatalf("expected run success without repeat hard stop, got %v", err) + t.Fatalf("expected run to stop cleanly on repeat-cycle, got %v", err) } if executeCalls != 4 { - t.Fatalf("expected failed repeated calls to continue until model stops, got %d", executeCalls) + t.Fatalf("expected failed repeated calls to stop at repeat limit, got %d", executeCalls) } - if providerCalls != 5 { - t.Fatalf("expected 5 provider turns (4 tool cycles + done), got %d", providerCalls) + if providerCalls != 4 { + t.Fatalf("expected 4 provider turns before repeat-cycle termination, got %d", providerCalls) } } @@ -556,7 +556,7 @@ func TestNoToolIncompleteTurnStillEvaluatesProgressAndInjectsReminder(t *testing manager := newRuntimeConfigManager(t) if err := manager.Update(context.Background(), func(cfg *config.Config) error { - cfg.Runtime.MaxNoProgressStreak = 1 + cfg.Runtime.MaxNoProgressStreak = 5 return nil }); err != nil { t.Fatalf("update config: %v", err) @@ -583,7 +583,7 @@ func TestNoToolIncompleteTurnStillEvaluatesProgressAndInjectsReminder(t *testing { Message: providertypes.Message{ Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\ndone")}, }, FinishReason: "stop", }, @@ -646,14 +646,14 @@ func TestNoToolIncompleteTurnStillEvaluatesProgressAndInjectsReminder(t *testing for _, message := range providerImpl.requests[1].Messages { content := renderPartsForTest(message.Parts) if message.Role == providertypes.RoleSystem && - strings.Contains(content, "") && - strings.Contains(content, "MUST call todo_write") { + strings.Contains(content, "[Runtime Control]") && + strings.Contains(content, "task_completion") { foundReminder = true break } } if !foundReminder { - t.Fatalf("expected continue reminder in second provider request messages, got %+v", providerImpl.requests[1].Messages) + t.Fatalf("expected runtime protocol note in second provider request messages, got %+v", providerImpl.requests[1].Messages) } events := collectRuntimeEvents(service.Events()) @@ -666,7 +666,7 @@ func TestAcceptanceContinueWithoutToolCallStopsAsIncomplete(t *testing.T) { manager := newRuntimeConfigManager(t) if err := manager.Update(context.Background(), func(cfg *config.Config) error { - cfg.Runtime.Verification.MaxNoProgress = 2 + cfg.Runtime.MaxNoProgressStreak = 2 return nil }); err != nil { t.Fatalf("update config: %v", err) @@ -691,14 +691,14 @@ func TestAcceptanceContinueWithoutToolCallStopsAsIncomplete(t *testing.T) { { Message: providertypes.Message{ Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("我已经完成了")}, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n我没有完成信号")}, }, FinishReason: "stop", }, { Message: providertypes.Message{ Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("任务已完成")}, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n仍然没有完成信号")}, }, FinishReason: "stop", }, @@ -738,39 +738,17 @@ func TestAcceptanceContinueWithoutToolCallStopsAsIncomplete(t *testing.T) { continue } content := renderPartsForTest(message.Parts) - if strings.Contains(content, "") && strings.Contains(content, "MUST call todo_write") { + if strings.Contains(content, "[Runtime Control]") && strings.Contains(content, "task_completion") { foundHint = true break } } if !foundHint { - t.Fatalf("expected actionable acceptance continue hint, got messages: %+v", secondRequestMessages) + t.Fatalf("expected runtime protocol note, got messages: %+v", secondRequestMessages) } events := collectRuntimeEvents(service.Events()) - assertStopReasonDecided(t, events, controlplane.StopReasonNoProgressAfterFinalIntercept, "") - foundVerificationReason := false - foundAcceptanceReason := false - for _, event := range events { - switch event.Type { - case EventVerificationStarted: - payload, ok := event.Payload.(VerificationStartedPayload) - if ok && strings.TrimSpace(payload.CompletionBlockedReason) == string(controlplane.CompletionBlockedReasonPendingTodo) { - foundVerificationReason = true - } - case EventAcceptanceDecided: - payload, ok := event.Payload.(AcceptanceDecidedPayload) - if ok && strings.TrimSpace(payload.CompletionBlockedReason) == string(controlplane.CompletionBlockedReasonPendingTodo) { - foundAcceptanceReason = true - } - } - } - if !foundVerificationReason { - t.Fatal("expected verification_started payload to include completion_blocked_reason=pending_todo") - } - if !foundAcceptanceReason { - t.Fatal("expected acceptance_decided payload to include completion_blocked_reason=pending_todo") - } + assertStopReasonDecided(t, events, controlplane.StopReasonMissingCompletionSignal, "") } func assertStopReasonDecided(t *testing.T, events []RuntimeEvent, wantReason controlplane.StopReason, wantDetail string) { diff --git a/internal/runtime/runtime_snapshot.go b/internal/runtime/runtime_snapshot.go index 9b888459..22d18a5b 100644 --- a/internal/runtime/runtime_snapshot.go +++ b/internal/runtime/runtime_snapshot.go @@ -5,7 +5,6 @@ import ( "strings" "time" - "neo-code/internal/runtime/decider" runtimefacts "neo-code/internal/runtime/facts" agentsession "neo-code/internal/session" ) @@ -15,7 +14,6 @@ type RuntimeSnapshot struct { RunID string `json:"run_id"` SessionID string `json:"session_id"` Phase string `json:"phase,omitempty"` - TaskKind string `json:"task_kind,omitempty"` UpdatedAt time.Time `json:"updated_at"` Todos TodoSnapshot `json:"todos"` Facts FactsSnapshot `json:"facts"` @@ -30,15 +28,10 @@ type FactsSnapshot struct { // DecisionSnapshot 是终态裁决快照。 type DecisionSnapshot struct { - Status string `json:"status,omitempty"` - StopReason string `json:"stop_reason,omitempty"` - MissingFacts []decider.MissingFact `json:"missing_facts,omitempty"` - RequiredNextActions []decider.RequiredAction `json:"required_next_actions,omitempty"` - RequiredInput *decider.RequiredInput `json:"required_input,omitempty"` - IntentHint string `json:"intent_hint,omitempty"` - EffectiveTaskKind string `json:"effective_task_kind,omitempty"` - UserVisibleSummary string `json:"user_visible_summary,omitempty"` - InternalSummary string `json:"internal_summary,omitempty"` + Status string `json:"status,omitempty"` + StopReason string `json:"stop_reason,omitempty"` + Summary string `json:"summary,omitempty"` + Details []string `json:"details,omitempty"` } // SubAgentSnapshot 汇总子代理事实状态,避免客户端自行遍历事实结构。 @@ -82,23 +75,19 @@ func buildRuntimeSnapshot(state *runState) RuntimeSnapshot { factsSnapshot = state.factsCollector.Snapshot() } - decisionSnapshot := DecisionSnapshot{ - Status: strings.TrimSpace(string(state.lastDeciderDecision.Status)), - StopReason: strings.TrimSpace(state.lastDeciderDecision.StopReason), - MissingFacts: append([]decider.MissingFact(nil), state.lastDeciderDecision.MissingFacts...), - RequiredNextActions: append([]decider.RequiredAction(nil), state.lastDeciderDecision.RequiredNextActions...), - RequiredInput: state.lastDeciderDecision.RequiredInput, - IntentHint: strings.TrimSpace(string(state.lastDeciderDecision.IntentHint)), - EffectiveTaskKind: strings.TrimSpace(string(state.lastDeciderDecision.EffectiveTaskKind)), - UserVisibleSummary: strings.TrimSpace(state.lastDeciderDecision.UserVisibleSummary), - InternalSummary: strings.TrimSpace(state.lastDeciderDecision.InternalSummary), + decisionSnapshot := DecisionSnapshot{} + if state.terminalSet || state.terminalStatus != "" || state.terminalStopReason != "" { + decisionSnapshot = DecisionSnapshot{ + Status: strings.TrimSpace(string(state.terminalStatus)), + StopReason: strings.TrimSpace(string(state.terminalStopReason)), + Summary: strings.TrimSpace(state.terminalStopDetail), + } } return RuntimeSnapshot{ RunID: strings.TrimSpace(state.runID), SessionID: strings.TrimSpace(state.session.ID), Phase: strings.TrimSpace(string(state.lifecycle)), - TaskKind: strings.TrimSpace(string(state.taskKind)), UpdatedAt: time.Now(), Todos: todoSnapshot, Facts: FactsSnapshot{ @@ -204,7 +193,6 @@ func (s *Service) GetRuntimeSnapshot(ctx context.Context, sessionID string) (Run snapshot := RuntimeSnapshot{ SessionID: normalizedSessionID, Phase: "", - TaskKind: string(decider.TaskKindMixed), UpdatedAt: session.UpdatedAt, Todos: buildTodoSnapshotFromItems(session.ListTodos()), Facts: FactsSnapshot{ diff --git a/internal/runtime/runtime_snapshot_test.go b/internal/runtime/runtime_snapshot_test.go index e297db56..390b8851 100644 --- a/internal/runtime/runtime_snapshot_test.go +++ b/internal/runtime/runtime_snapshot_test.go @@ -6,7 +6,6 @@ import ( "testing" "time" - "neo-code/internal/runtime/decider" runtimefacts "neo-code/internal/runtime/facts" agentsession "neo-code/internal/session" ) @@ -80,9 +79,6 @@ func TestGetRuntimeSnapshotBranches(t *testing.T) { if got.SessionID != session.ID { t.Fatalf("session id = %q, want %q", got.SessionID, session.ID) } - if got.TaskKind != string(decider.TaskKindMixed) { - t.Fatalf("task kind = %q, want %q", got.TaskKind, decider.TaskKindMixed) - } if got.Facts.RuntimeFacts.Progress.ObservedFactCount != 0 { t.Fatalf("unexpected facts snapshot: %+v", got.Facts.RuntimeFacts) } diff --git a/internal/runtime/runtime_test.go b/internal/runtime/runtime_test.go index a5608684..ccb009a1 100644 --- a/internal/runtime/runtime_test.go +++ b/internal/runtime/runtime_test.go @@ -15,9 +15,9 @@ import ( "neo-code/internal/config" agentcontext "neo-code/internal/context" contextcompact "neo-code/internal/context/compact" - "neo-code/internal/repository" "neo-code/internal/provider" providertypes "neo-code/internal/provider/types" + "neo-code/internal/repository" approvalflow "neo-code/internal/runtime/approval" "neo-code/internal/runtime/controlplane" "neo-code/internal/runtime/streaming" @@ -618,13 +618,14 @@ func (s *blockingLoadStore) CleanupExpiredSessions(ctx context.Context, maxAge t } type scriptedProvider struct { - name string - streams [][]providertypes.StreamEvent - responses []scriptedResponse - requests []providertypes.GenerateRequest - callCount int - estimateFn func(ctx context.Context, req providertypes.GenerateRequest) (providertypes.BudgetEstimate, error) - chatFn func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error + name string + streams [][]providertypes.StreamEvent + responses []scriptedResponse + requests []providertypes.GenerateRequest + callCount int + requireExplicitCompletion bool + estimateFn func(ctx context.Context, req providertypes.GenerateRequest) (providertypes.BudgetEstimate, error) + chatFn func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error } func (p *scriptedProvider) EstimateInputTokens( @@ -654,10 +655,32 @@ func (p *scriptedProvider) Generate(ctx context.Context, req providertypes.Gener p.callCount++ if p.chatFn != nil { - return p.chatFn(ctx, req, events) + stream, err := p.collectChatFnStream(ctx, req) + if p.shouldInjectDefaultCompletionForStream(stream) { + select { + case events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\n"): + case <-ctx.Done(): + return ctx.Err() + } + } + for _, event := range stream { + select { + case events <- event: + case <-ctx.Done(): + return ctx.Err() + } + } + return err } if callIndex < len(p.streams) { + if p.shouldInjectDefaultCompletionForStream(p.streams[callIndex]) { + select { + case events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\n"): + case <-ctx.Done(): + return ctx.Err() + } + } for _, event := range p.streams[callIndex] { select { case events <- event: @@ -675,6 +698,7 @@ func (p *scriptedProvider) Generate(ctx context.Context, req providertypes.Gener } if callIndex < len(p.responses) { response := p.responses[callIndex] + response.Message = p.withDefaultCompletionSignal(response.Message) for index, toolCall := range response.Message.ToolCalls { select { case events <- providertypes.NewToolCallStartStreamEvent(index, toolCall.ID, toolCall.Name): @@ -704,6 +728,68 @@ func (p *scriptedProvider) Generate(ctx context.Context, req providertypes.Gener return nil } +// withDefaultCompletionSignal 让旧测试脚本中的普通最终回复满足新的 task_completion 协议。 +func (p *scriptedProvider) withDefaultCompletionSignal(message providertypes.Message) providertypes.Message { + if p.requireExplicitCompletion || len(message.ToolCalls) > 0 { + return message + } + text := renderPartsForTest(message.Parts) + if strings.TrimSpace(text) == "" || + strings.Contains(text, `"task_completion"`) || + strings.Contains(text, `"plan_spec"`) { + return message + } + message.Parts = []providertypes.ContentPart{ + providertypes.NewTextPart("{\"task_completion\":{\"completed\":true}}\n" + text), + } + return message +} + +// collectChatFnStream 收集自定义测试 provider 的流事件,便于统一补齐完成信号。 +func (p *scriptedProvider) collectChatFnStream( + ctx context.Context, + req providertypes.GenerateRequest, +) ([]providertypes.StreamEvent, error) { + proxy := make(chan providertypes.StreamEvent, 1024) + errCh := make(chan error, 1) + go func() { + errCh <- p.chatFn(ctx, req, proxy) + close(proxy) + }() + stream := make([]providertypes.StreamEvent, 0) + for event := range proxy { + stream = append(stream, event) + } + return stream, <-errCh +} + +// shouldInjectDefaultCompletionForStream 为旧测试中的纯文本流补齐完成信号,工具调用流和显式完成流保持原样。 +func (p *scriptedProvider) shouldInjectDefaultCompletionForStream(stream []providertypes.StreamEvent) bool { + if p.requireExplicitCompletion { + return false + } + hasText := false + for _, event := range stream { + switch event.Type { + case providertypes.StreamEventToolCallStart, providertypes.StreamEventToolCallDelta: + return false + case providertypes.StreamEventTextDelta: + if event.TextDelta == nil { + continue + } + text := strings.TrimSpace(event.TextDelta.Text) + if text == "" { + continue + } + hasText = true + if strings.Contains(text, `"task_completion"`) || strings.Contains(text, `"plan_spec"`) { + return false + } + } + } + return hasText +} + // streamContainsMessageDone 判断测试流中是否已显式包含结束事件,避免辅助 provider 重复补发 message_done。 func streamContainsMessageDone(events []providertypes.StreamEvent) bool { for _, event := range events { @@ -2442,15 +2528,8 @@ func TestServiceRunErrorPaths(t *testing.T) { expectEvents: []EventType{EventUserMessage, EventToolStart, EventToolChunk, EventToolResult, EventAgentDone}, assert: func(t *testing.T, store *memoryStore, scripted *scriptedProvider, tool *stubTool) { t.Helper() - if scripted.callCount != 10 { - t.Fatalf("expected 10 provider calls without loop cap, got %d", scripted.callCount) - } - session := onlySession(t, store) - if got := len(session.Messages); got != 20 { - t.Fatalf("expected 20 persisted messages after 9 tool cycles and final answer, got %d", got) - } - if renderPartsForTest(session.Messages[len(session.Messages)-1].Parts) != "done after many cycles" { - t.Fatalf("expected final assistant reply to be persisted, got %+v", session.Messages[len(session.Messages)-1]) + if scripted.callCount != 7 { + t.Fatalf("expected 7 provider calls before no-progress hard stop, got %d", scripted.callCount) } }, }, @@ -3864,12 +3943,12 @@ func TestServiceRunPlanModeKeepsExistingPlanWhenPlanSpecIsInvalid(t *testing.T) Spec: agentsession.PlanSpec{ Goal: "Keep previous plan", Steps: []string{"existing step"}, - Verify: []string{"existing verify"}, + Verify: acceptText("existing verify"), }, Summary: agentsession.SummaryView{ Goal: "Keep previous plan", KeySteps: []string{"existing step"}, - Verify: []string{"existing verify"}, + Verify: acceptText("existing verify"), }, } seed.LastFullPlanRevision = 2 @@ -3882,7 +3961,8 @@ func TestServiceRunPlanModeKeepsExistingPlanWhenPlanSpecIsInvalid(t *testing.T) { Message: providertypes.Message{ Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart(`{ + Parts: []providertypes.ContentPart{providertypes.NewTextPart(`{"task_completion":{"completed":true}} +{ "plan_spec": { "goal": "", "steps": ["bad update"], @@ -3973,8 +4053,8 @@ func TestServiceRunBuildModeDoesNotRequireCurrentPlan(t *testing.T) { if saved.CurrentPlan != nil { t.Fatalf("expected build mode to complete without CurrentPlan, got %+v", saved.CurrentPlan) } - if builder.callCount != 1 { - t.Fatalf("builder call count = %d, want 1", builder.callCount) + if builder.callCount != 2 { + t.Fatalf("builder call count = %d, want 2", builder.callCount) } if builder.builds[0].PlanStage != planStageBuildExecute { t.Fatalf("PlanStage = %q, want %q", builder.builds[0].PlanStage, planStageBuildExecute) @@ -3998,7 +4078,8 @@ func TestServiceRunPlanModeInjectsFullPlanOnNextTurnAfterDraftCreation(t *testin { Message: providertypes.Message{ Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart(`{ + Parts: []providertypes.ContentPart{providertypes.NewTextPart(`{"task_completion":{"completed":true}} +{ "plan_spec": { "goal": "Introduce plan mode", "steps": ["persist plan state"], @@ -4089,7 +4170,7 @@ func TestServiceRunPlanModeUsesSummaryViewForAlignedPlanTurn(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "Keep planning aligned", Steps: []string{"summarize current plan"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), Todos: []agentsession.TodoItem{ {ID: "todo-aligned", Content: "summarize current plan", Status: agentsession.TodoStatusPending}, }, @@ -4097,7 +4178,7 @@ func TestServiceRunPlanModeUsesSummaryViewForAlignedPlanTurn(t *testing.T) { Summary: agentsession.SummaryView{ Goal: "Keep planning aligned", KeySteps: []string{"summarize current plan"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), ActiveTodoIDs: []string{"todo-aligned"}, }, } @@ -4165,7 +4246,7 @@ func TestServiceRunBuildModeInjectsFullPlanForUnalignedExistingPlan(t *testing.T Spec: agentsession.PlanSpec{ Goal: "Resume build execution", Steps: []string{"resume implementation"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), Todos: []agentsession.TodoItem{ {ID: "todo-restored", Content: "resume implementation", Status: agentsession.TodoStatusPending}, }, @@ -4173,7 +4254,7 @@ func TestServiceRunBuildModeInjectsFullPlanForUnalignedExistingPlan(t *testing.T Summary: agentsession.SummaryView{ Goal: "Resume build execution", KeySteps: []string{"resume implementation"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), ActiveTodoIDs: []string{"todo-restored"}, }, } @@ -4231,7 +4312,7 @@ func TestServiceRunBuildModeUsesSummaryViewForAlignedExecuteTurn(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "Execute aligned build", Steps: []string{"continue implementation"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), Todos: []agentsession.TodoItem{ {ID: "todo-build-aligned", Content: "continue implementation", Status: agentsession.TodoStatusPending}, }, @@ -4239,7 +4320,7 @@ func TestServiceRunBuildModeUsesSummaryViewForAlignedExecuteTurn(t *testing.T) { Summary: agentsession.SummaryView{ Goal: "Execute aligned build", KeySteps: []string{"continue implementation"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), ActiveTodoIDs: []string{"todo-build-aligned"}, }, } @@ -4293,7 +4374,7 @@ func TestServiceRunBuildModeInjectsFullPlanWhenSummaryIsUnusable(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "Follow full plan when summary is missing", Steps: []string{"review whole plan"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), Todos: []agentsession.TodoItem{ {ID: "todo-full-fallback", Content: "review whole plan", Status: agentsession.TodoStatusPending}, }, @@ -4350,12 +4431,12 @@ func TestServiceApproveCurrentPlanTriggersOneFullPlanAlignment(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "批准并执行当前计划", Steps: []string{"继续实现"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), }, Summary: agentsession.SummaryView{ Goal: "批准并执行当前计划", KeySteps: []string{"继续实现"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), }, } seed.LastFullPlanRevision = 4 @@ -4459,7 +4540,7 @@ func TestServiceApproveCurrentPlanTrimsSessionID(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "trim session id before load", Steps: []string{"step one"}, - Verify: []string{"verify one"}, + Verify: acceptText("verify one"), }, } if _, err := store.CreateSession(context.Background(), createSessionInputFromSession(seed)); err != nil { @@ -4499,12 +4580,12 @@ func TestServiceRunBuildModeIgnoresPlanningJSON(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "保持旧计划不被覆盖", Steps: []string{"旧步骤"}, - Verify: []string{"旧验证"}, + Verify: acceptText("旧验证"), }, Summary: agentsession.SummaryView{ Goal: "保持旧计划不被覆盖", KeySteps: []string{"旧步骤"}, - Verify: []string{"旧验证"}, + Verify: acceptText("旧验证"), }, } seed.LastFullPlanRevision = 1 @@ -4517,7 +4598,8 @@ func TestServiceRunBuildModeIgnoresPlanningJSON(t *testing.T) { { Message: providertypes.Message{ Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart(`{ + Parts: []providertypes.ContentPart{providertypes.NewTextPart(`{"task_completion":{"completed":true}} +{ "plan_spec": { "goal": "不应在 build 中落库", "steps": ["错误改写计划"], @@ -4569,12 +4651,12 @@ func TestServiceRunCompletedPlanRequestsOneFinalFullReview(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "完成计划后仍需一次全文确认", Steps: []string{"收尾"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), }, Summary: agentsession.SummaryView{ Goal: "完成计划后仍需一次全文确认", KeySteps: []string{"收尾"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), }, } seed.LastFullPlanRevision = 2 @@ -4657,12 +4739,12 @@ func TestServiceCompactMarksPlanContextDirty(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "compact 后重对齐计划", Steps: []string{"压缩历史"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), }, Summary: agentsession.SummaryView{ Goal: "compact 后重对齐计划", KeySteps: []string{"压缩历史"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), }, } if _, err := store.CreateSession(context.Background(), createSessionInputFromSession(session)); err != nil { @@ -4708,12 +4790,12 @@ func TestServiceRunCompactedSessionRequestsRestoreAlignment(t *testing.T) { Spec: agentsession.PlanSpec{ Goal: "compact 恢复后重新对齐计划", Steps: []string{"继续执行"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), }, Summary: agentsession.SummaryView{ Goal: "compact 恢复后重新对齐计划", KeySteps: []string{"继续执行"}, - Verify: []string{"go test ./internal/runtime"}, + Verify: acceptText("go test ./internal/runtime"), }, } seed.LastFullPlanRevision = 1 diff --git a/internal/runtime/state.go b/internal/runtime/state.go index 2308a6c9..a3f231c4 100644 --- a/internal/runtime/state.go +++ b/internal/runtime/state.go @@ -41,6 +41,8 @@ type runState struct { lastAcceptanceBlockedReason string taskKind decider.TaskKind userGoal string + missingCompletionSignalStreak int + completionProtocolHinted bool factsCollector *runtimefacts.Collector lastDeciderDecision decider.Decision terminalStatus controlplane.TerminalStatus diff --git a/internal/runtime/thinking_callprovider_test.go b/internal/runtime/thinking_callprovider_test.go index 0a1f2ddf..8f451853 100644 --- a/internal/runtime/thinking_callprovider_test.go +++ b/internal/runtime/thinking_callprovider_test.go @@ -13,7 +13,7 @@ import ( func TestCallProviderRetriesWithThinkingDowngrade(t *testing.T) { t.Parallel() - scripted := &scriptedProvider{} + scripted := &scriptedProvider{requireExplicitCompletion: true} scripted.chatFn = func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { if len(scripted.requests) == 1 { return errors.Join(provider.ErrThinkingNotSupported, errors.New("upstream rejected thinking")) @@ -57,6 +57,7 @@ func TestCallProviderEmitsThinkingDeltaEvent(t *testing.T) { t.Parallel() scripted := &scriptedProvider{ + requireExplicitCompletion: true, chatFn: func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { events <- providertypes.NewThinkingDeltaStreamEvent("plan") events <- providertypes.NewTextDeltaStreamEvent("answer") diff --git a/internal/runtime/todo_run_boundary.go b/internal/runtime/todo_run_boundary.go index 38dc735a..1ba5a7dc 100644 --- a/internal/runtime/todo_run_boundary.go +++ b/internal/runtime/todo_run_boundary.go @@ -2,10 +2,10 @@ package runtime import ( "context" - "strings" "time" runtimefacts "neo-code/internal/runtime/facts" + agentsession "neo-code/internal/session" ) // resetTodosForUserRun 清空新用户 Run 的当前 Todo 状态,避免上一任务遗留的 open todo 阻塞本轮验收。 @@ -13,7 +13,7 @@ func (s *Service) resetTodosForUserRun(ctx context.Context, state *runState) err if s == nil || state == nil { return nil } - if !shouldResetTodosForUserRun(state.userGoal) { + if !shouldResetTodosForUserRun(state.session) { return nil } @@ -40,42 +40,17 @@ func (s *Service) resetTodosForUserRun(ctx context.Context, state *runState) err return nil } -// shouldResetTodosForUserRun 判断本轮用户输入是否应开启新的 Todo 边界。 -// 策略:默认保留旧 Todo,由 prompt 层 stale_todo_reminder 引导模型自行清理; -// 仅当用户输入极少且明确的"全新任务"表达时,才主动清空,避免硬编码过度覆盖。 -func shouldResetTodosForUserRun(userGoal string) bool { - goal := strings.ToLower(strings.TrimSpace(userGoal)) - if goal == "" { - return false +// shouldResetTodosForUserRun 根据 PlanArtifact 生命周期判断本轮是否开启新的 Todo 边界。 +func shouldResetTodosForUserRun(session agentsession.Session) bool { + if session.CurrentPlan == nil { + return true } - goal = strings.TrimRight(goal, " 。.!!??,,;;~~") - if goal == "" { + switch agentsession.NormalizePlanStatus(session.CurrentPlan.Status) { + case agentsession.PlanStatusDraft, agentsession.PlanStatusApproved: return false + case agentsession.PlanStatusCompleted: + return true + default: + return true } - return isExplicitNewTaskIntent(goal) -} - -// newTaskChineseKeywords 中文明确新任务关键词,仅含完全无歧义的表达。 -var newTaskChineseKeywords = []string{"新任务", "换个任务", "换任务", "新需求"} - -// newTaskEnglishKeywords 英文明确新任务关键词,仅含完全无歧义的表达。 -var newTaskEnglishKeywords = []string{"new task", "different task", "switch task"} - -// isExplicitNewTaskIntent 判断标准化后的 goal 是否含明确的新任务意图。 -// 默认返回 false,仅匹配极少且高度精确的关键词。 -func isExplicitNewTaskIntent(goal string) bool { - for _, kw := range newTaskChineseKeywords { - if strings.Contains(goal, kw) { - return true - } - } - for _, kw := range newTaskEnglishKeywords { - if goal == kw { - return true - } - if strings.HasPrefix(goal, kw+" ") || strings.HasPrefix(goal, kw+"\t") { - return true - } - } - return false } diff --git a/internal/runtime/todo_run_boundary_test.go b/internal/runtime/todo_run_boundary_test.go index aaf9e49e..79eb1edc 100644 --- a/internal/runtime/todo_run_boundary_test.go +++ b/internal/runtime/todo_run_boundary_test.go @@ -26,7 +26,6 @@ func TestResetTodosForUserRunClearsSessionAndEmitsEmptySnapshot(t *testing.T) { service := &Service{sessionStore: store, events: make(chan RuntimeEvent, 8)} state := newRunState("run-boundary", created) - state.userGoal = "新任务" if err := service.resetTodosForUserRun(context.Background(), &state); err != nil { t.Fatalf("resetTodosForUserRun() error = %v", err) } @@ -61,15 +60,19 @@ func TestResetTodosForUserRunClearsSessionAndEmitsEmptySnapshot(t *testing.T) { } } -func TestResetTodosForUserRunKeepsTodosForContinuePrompt(t *testing.T) { +func TestResetTodosForUserRunKeepsTodosForActivePlan(t *testing.T) { t.Parallel() store := newMemoryStore() required := true - session := agentsession.New("todo-boundary-continue") + session := agentsession.New("todo-boundary-plan") + session.CurrentPlan = &agentsession.PlanArtifact{ + ID: "plan-1", + Status: agentsession.PlanStatusApproved, + } session.Todos = []agentsession.TodoItem{{ - ID: "old-todo", - Content: "old task", + ID: "plan-todo", + Content: "plan task", Status: agentsession.TodoStatusPending, Required: &required, }} @@ -79,8 +82,7 @@ func TestResetTodosForUserRunKeepsTodosForContinuePrompt(t *testing.T) { } service := &Service{sessionStore: store, events: make(chan RuntimeEvent, 8)} - state := newRunState("run-boundary-continue", created) - state.userGoal = "继续" + state := newRunState("run-boundary-plan", created) if err := service.resetTodosForUserRun(context.Background(), &state); err != nil { t.Fatalf("resetTodosForUserRun() error = %v", err) } @@ -88,7 +90,7 @@ func TestResetTodosForUserRunKeepsTodosForContinuePrompt(t *testing.T) { t.Fatalf("state todos = %+v, want preserved", state.session.Todos) } if events := collectRuntimeEvents(service.Events()); len(events) != 0 { - t.Fatalf("continue prompt should not emit reset events, got %+v", events) + t.Fatalf("active plan should not emit reset events, got %+v", events) } } @@ -97,49 +99,32 @@ func TestShouldResetTodosForUserRunBoundaryVariants(t *testing.T) { cases := []struct { name string - goal string + session agentsession.Session wantReset bool }{ - // 空输入 → 保留 - {"empty", "", false}, - - // 明确新任务 → 清空 - {"chinese exact 新任务", "新任务", true}, - {"chinese 帮我做个新任务", "帮我做个新任务", true}, - {"chinese 换个任务", "换个任务", true}, - {"chinese 新需求", "新需求", true}, - {"english exact new task", "new task", true}, - {"english 新任务", "new task please", true}, - {"english switch task", "switch task", true}, - {"english different task", "different task", true}, - - // 默认保留:绝大多数输入不再被硬编码清空,交给 prompt 引导模型自行处理 - {"chinese 继续", "继续", false}, - {"chinese 继续修这个", "继续修这个", false}, - {"chinese 接着做", "接着做", false}, - {"chinese 刚才的代码还有问题", "刚才的代码还有问题", false}, - {"chinese 再优化一下", "再优化一下", false}, - {"chinese 补充测试用例", "补充测试用例", false}, - {"chinese 修复登录bug", "修复登录 bug", false}, - {"chinese 开始下一个任务", "开始下一个任务", false}, - {"chinese 重新实现", "重新实现", false}, - {"english continue", "continue", false}, - {"english continue with the failing test", "continue with the failing test", false}, - {"english implement search api", "implement search api", false}, - {"english keep going", "keep going", false}, - {"english keep it simple", "keep it simple please", false}, - {"english resume", "resume task", false}, - {"english go on", "go on please", false}, + {name: "no plan resets", session: agentsession.New("no plan"), wantReset: true}, + {name: "draft plan keeps", session: sessionWithPlanStatus(agentsession.PlanStatusDraft), wantReset: false}, + {name: "approved plan keeps", session: sessionWithPlanStatus(agentsession.PlanStatusApproved), wantReset: false}, + {name: "completed plan resets", session: sessionWithPlanStatus(agentsession.PlanStatusCompleted), wantReset: true}, } for _, tc := range cases { tc := tc t.Run(tc.name, func(t *testing.T) { t.Parallel() - got := shouldResetTodosForUserRun(tc.goal) + got := shouldResetTodosForUserRun(tc.session) if got != tc.wantReset { - t.Fatalf("shouldResetTodosForUserRun(%q) = %v, want %v", tc.goal, got, tc.wantReset) + t.Fatalf("shouldResetTodosForUserRun() = %v, want %v", got, tc.wantReset) } }) } } + +func sessionWithPlanStatus(status agentsession.PlanStatus) agentsession.Session { + session := agentsession.New("plan-boundary") + session.CurrentPlan = &agentsession.PlanArtifact{ + ID: "plan-1", + Status: status, + } + return session +} diff --git a/internal/runtime/verify/git_diff.go b/internal/runtime/verify/git_diff.go deleted file mode 100644 index ef12a61c..00000000 --- a/internal/runtime/verify/git_diff.go +++ /dev/null @@ -1,94 +0,0 @@ -package verify - -import ( - "context" - "strings" -) - -const gitDiffVerifierName = "git_diff" - -// GitDiffVerifier 校验工作区是否存在真实交付证据。 -type GitDiffVerifier struct { - Executor CommandExecutor -} - -// Name 返回 verifier 名称。 -func (v GitDiffVerifier) Name() string { - return gitDiffVerifierName -} - -// VerifyFinal 执行 git status 检查,确保 edit/fix/refactor 任务有真实改动。 -func (v GitDiffVerifier) VerifyFinal(ctx context.Context, input FinalVerifyInput) (VerificationResult, error) { - executor := v.Executor - if executor == nil { - executor = PolicyCommandExecutor{} - } - cfg := input.VerificationConfig.Verifiers[gitDiffVerifierName] - argv := compactStrings(cfg.Command) - if len(argv) == 0 { - argv = []string{"git", "status", "--porcelain", "--untracked-files=normal"} - } - - result, err := executor.Execute(ctx, CommandExecutionRequest{ - Argv: argv, - Workdir: input.Workdir, - TimeoutSec: cfg.TimeoutSec, - OutputCapByte: cfg.OutputCapBytes, - Policy: input.VerificationConfig.ExecutionPolicy, - }) - if err != nil { - return VerificationResult{ - Name: gitDiffVerifierName, - Status: VerificationFail, - Summary: err.Error(), - Reason: "git status command execution failed", - ErrorClass: classifyCommandExecutionError(err), - Evidence: commandEvidence(argv, result), - }, nil - } - if result.ExitCode != 0 { - return VerificationResult{ - Name: gitDiffVerifierName, - Status: VerificationFail, - Summary: "git status command returned non-zero", - Reason: "git status command failed", - ErrorClass: ErrorClassUnknown, - Evidence: commandEvidence(argv, result), - }, nil - } - - lines := nonEmptyLines(result.Stdout) - if len(lines) == 0 { - return VerificationResult{ - Name: gitDiffVerifierName, - Status: VerificationSoftBlock, - Summary: "git status is empty", - Reason: "no changed files detected", - Evidence: commandEvidence(argv, result), - }, nil - } - evidence := commandEvidence(argv, result) - evidence["changed_files"] = lines - evidence["changed_files_count"] = len(lines) - return VerificationResult{ - Name: gitDiffVerifierName, - Status: VerificationPass, - Summary: "git status contains changed files", - Reason: "workspace change detected", - Evidence: evidence, - }, nil -} - -// nonEmptyLines 返回文本中的非空行列表。 -func nonEmptyLines(text string) []string { - raw := strings.Split(text, "\n") - lines := make([]string, 0, len(raw)) - for _, item := range raw { - item = strings.TrimSpace(item) - if item == "" { - continue - } - lines = append(lines, item) - } - return lines -} diff --git a/internal/runtime/verify/git_diff_test.go b/internal/runtime/verify/git_diff_test.go deleted file mode 100644 index 165d9d78..00000000 --- a/internal/runtime/verify/git_diff_test.go +++ /dev/null @@ -1,120 +0,0 @@ -package verify - -import ( - "context" - "errors" - "testing" - - "neo-code/internal/config" -) - -func verifyConfigForGitDiffTests() config.VerificationConfig { - cfg := config.StaticDefaults().Runtime.Verification - cfg.ExecutionPolicy = config.VerificationExecutionPolicyConfig{ - Mode: "non_interactive", - DefaultTimeout: 1, - DefaultOutputCap: 1, - } - return cfg -} - -func TestGitDiffVerifier(t *testing.T) { - t.Parallel() - - t.Run("empty output soft blocks", func(t *testing.T) { - t.Parallel() - cfg := verifyConfigForGitDiffTests() - executor := &stubCommandExecutor{result: CommandExecutionResult{ExitCode: 0, Stdout: ""}} - result, err := (GitDiffVerifier{Executor: executor}).VerifyFinal(context.Background(), FinalVerifyInput{VerificationConfig: cfg}) - if err != nil { - t.Fatalf("VerifyFinal() error = %v", err) - } - if result.Status != VerificationSoftBlock { - t.Fatalf("status = %q, want soft_block", result.Status) - } - }) - - t.Run("changed files pass", func(t *testing.T) { - t.Parallel() - cfg := verifyConfigForGitDiffTests() - executor := &stubCommandExecutor{result: CommandExecutionResult{ExitCode: 0, Stdout: "M main.go\n?? new.txt\n"}} - result, err := (GitDiffVerifier{Executor: executor}).VerifyFinal(context.Background(), FinalVerifyInput{ - Workdir: "/workspace", - VerificationConfig: cfg, - }) - if err != nil { - t.Fatalf("VerifyFinal() error = %v", err) - } - if result.Status != VerificationPass { - t.Fatalf("status = %q, want pass", result.Status) - } - if len(executor.requests) != 1 || executor.requests[0].Argv[1] != "status" { - t.Fatalf("unexpected argv: %+v", executor.requests) - } - }) - - t.Run("staged only pass", func(t *testing.T) { - t.Parallel() - cfg := verifyConfigForGitDiffTests() - executor := &stubCommandExecutor{result: CommandExecutionResult{ExitCode: 0, Stdout: "A staged.txt\n"}} - result, err := (GitDiffVerifier{Executor: executor}).VerifyFinal(context.Background(), FinalVerifyInput{VerificationConfig: cfg}) - if err != nil { - t.Fatalf("VerifyFinal() error = %v", err) - } - if result.Status != VerificationPass { - t.Fatalf("status = %q, want pass", result.Status) - } - }) - - t.Run("unstaged only pass", func(t *testing.T) { - t.Parallel() - cfg := verifyConfigForGitDiffTests() - executor := &stubCommandExecutor{result: CommandExecutionResult{ExitCode: 0, Stdout: " M unstaged.go\n"}} - result, err := (GitDiffVerifier{Executor: executor}).VerifyFinal(context.Background(), FinalVerifyInput{VerificationConfig: cfg}) - if err != nil { - t.Fatalf("VerifyFinal() error = %v", err) - } - if result.Status != VerificationPass { - t.Fatalf("status = %q, want pass", result.Status) - } - }) - - t.Run("untracked only pass", func(t *testing.T) { - t.Parallel() - cfg := verifyConfigForGitDiffTests() - executor := &stubCommandExecutor{result: CommandExecutionResult{ExitCode: 0, Stdout: "?? untracked.txt\n"}} - result, err := (GitDiffVerifier{Executor: executor}).VerifyFinal(context.Background(), FinalVerifyInput{VerificationConfig: cfg}) - if err != nil { - t.Fatalf("VerifyFinal() error = %v", err) - } - if result.Status != VerificationPass { - t.Fatalf("status = %q, want pass", result.Status) - } - }) - - t.Run("ignored only pass", func(t *testing.T) { - t.Parallel() - cfg := verifyConfigForGitDiffTests() - executor := &stubCommandExecutor{result: CommandExecutionResult{ExitCode: 0, Stdout: "!! ignored.log\n"}} - result, err := (GitDiffVerifier{Executor: executor}).VerifyFinal(context.Background(), FinalVerifyInput{VerificationConfig: cfg}) - if err != nil { - t.Fatalf("VerifyFinal() error = %v", err) - } - if result.Status != VerificationPass { - t.Fatalf("status = %q, want pass", result.Status) - } - }) - - t.Run("execution error fails", func(t *testing.T) { - t.Parallel() - cfg := verifyConfigForGitDiffTests() - executor := &stubCommandExecutor{err: errors.New("timeout")} - result, err := (GitDiffVerifier{Executor: executor}).VerifyFinal(context.Background(), FinalVerifyInput{VerificationConfig: cfg}) - if err != nil { - t.Fatalf("VerifyFinal() error = %v", err) - } - if result.Status != VerificationFail || result.ErrorClass != ErrorClassTimeout { - t.Fatalf("unexpected result: %+v", result) - } - }) -} diff --git a/internal/session/plan.go b/internal/session/plan.go index 4bfead1b..816704a4 100644 --- a/internal/session/plan.go +++ b/internal/session/plan.go @@ -1,6 +1,7 @@ package session import ( + "encoding/json" "fmt" "strings" "time" @@ -30,6 +31,34 @@ const ( maxSummaryTodoIDs = 20 ) +const ( + // AcceptCheckOutputOnly 表示仅需要最终回复文本作为交付物。 + AcceptCheckOutputOnly = "output_only" + // AcceptCheckWorkspaceChange 表示需要运行期观测到 agent 产生工作区变更。 + AcceptCheckWorkspaceChange = "workspace_change" + // AcceptCheckCommandSuccess 表示需要运行期命令成功事实。 + AcceptCheckCommandSuccess = "command_success" + // AcceptCheckFileExists 表示需要运行期文件存在或写入事实。 + AcceptCheckFileExists = "file_exists" + // AcceptCheckContentContains 表示需要运行期内容匹配事实。 + AcceptCheckContentContains = "content_contains" + // AcceptCheckToolFact 表示需要运行期工具验证事实。 + AcceptCheckToolFact = "tool_fact" +) + +// AcceptCheck 声明 plan 阶段模型提出的机器可检查验收项。 +type AcceptCheck struct { + ID string `json:"id,omitempty"` + Kind string `json:"kind"` + Target string `json:"target,omitempty"` + Match string `json:"match,omitempty"` + Required bool `json:"required,omitempty"` + Params map[string]string `json:"params,omitempty"` +} + +// AcceptChecks 保存 plan 级验收项,并兼容读取旧的 []string 格式。 +type AcceptChecks []AcceptCheck + // PlanArtifact stores the current plan persisted in the session. type PlanArtifact struct { ID string `json:"id"` @@ -43,21 +72,21 @@ type PlanArtifact struct { // PlanSpec is the source of truth for the current plan. type PlanSpec struct { - Goal string `json:"goal"` - Steps []string `json:"steps,omitempty"` - Constraints []string `json:"constraints,omitempty"` - Verify []string `json:"verify,omitempty"` - Todos []TodoItem `json:"todos,omitempty"` - OpenQuestions []string `json:"open_questions,omitempty"` + Goal string `json:"goal"` + Steps []string `json:"steps,omitempty"` + Constraints []string `json:"constraints,omitempty"` + Verify AcceptChecks `json:"verify,omitempty"` + Todos []TodoItem `json:"todos,omitempty"` + OpenQuestions []string `json:"open_questions,omitempty"` } // SummaryView is the compact projection derived from PlanSpec. type SummaryView struct { - Goal string `json:"goal"` - KeySteps []string `json:"key_steps,omitempty"` - Constraints []string `json:"constraints,omitempty"` - Verify []string `json:"verify,omitempty"` - ActiveTodoIDs []string `json:"active_todo_ids,omitempty"` + Goal string `json:"goal"` + KeySteps []string `json:"key_steps,omitempty"` + Constraints []string `json:"constraints,omitempty"` + Verify AcceptChecks `json:"verify,omitempty"` + ActiveTodoIDs []string `json:"active_todo_ids,omitempty"` } // Clone returns a deep copy of the plan artifact. @@ -76,7 +105,7 @@ func (p PlanSpec) Clone() PlanSpec { p.Goal = strings.TrimSpace(p.Goal) p.Steps = append([]string(nil), p.Steps...) p.Constraints = append([]string(nil), p.Constraints...) - p.Verify = append([]string(nil), p.Verify...) + p.Verify = p.Verify.Clone() p.OpenQuestions = append([]string(nil), p.OpenQuestions...) p.Todos = cloneTodoItems(p.Todos) return p @@ -87,7 +116,7 @@ func (s SummaryView) Clone() SummaryView { s.Goal = strings.TrimSpace(s.Goal) s.KeySteps = append([]string(nil), s.KeySteps...) s.Constraints = append([]string(nil), s.Constraints...) - s.Verify = append([]string(nil), s.Verify...) + s.Verify = s.Verify.Clone() s.ActiveTodoIDs = append([]string(nil), s.ActiveTodoIDs...) return s } @@ -159,7 +188,7 @@ func NormalizePlanSpec(spec PlanSpec) (PlanSpec, error) { spec.Goal = strings.TrimSpace(spec.Goal) spec.Steps = normalizeTodoTextList(spec.Steps) spec.Constraints = normalizeTodoTextList(spec.Constraints) - spec.Verify = normalizeTodoTextList(spec.Verify) + spec.Verify = spec.Verify.Normalize() spec.OpenQuestions = normalizeTodoTextList(spec.OpenQuestions) todos, err := normalizeAndValidateTodos(spec.Todos) @@ -180,7 +209,7 @@ func NormalizeSummaryView(summary SummaryView, spec PlanSpec) SummaryView { normalized.Goal = strings.TrimSpace(normalized.Goal) normalized.KeySteps = normalizeTodoTextList(normalized.KeySteps) normalized.Constraints = normalizeTodoTextList(normalized.Constraints) - normalized.Verify = normalizeTodoTextList(normalized.Verify) + normalized.Verify = normalized.Verify.Normalize() normalized.ActiveTodoIDs = normalizeTodoTextList(normalized.ActiveTodoIDs) if !summaryViewStructurallyValid(normalized, spec) { return BuildSummaryView(spec) @@ -198,7 +227,7 @@ func BuildSummaryView(spec PlanSpec) SummaryView { Goal: spec.Goal, KeySteps: clampStringList(spec.Steps, maxSummaryKeySteps), Constraints: clampStringList(spec.Constraints, maxSummaryConstraints), - Verify: clampStringList(spec.Verify, maxSummaryVerify), + Verify: clampAcceptChecks(spec.Verify, maxSummaryVerify), ActiveTodoIDs: collectActiveTodoIDs(spec.Todos, maxSummaryTodoIDs), } } @@ -219,7 +248,7 @@ func RenderPlanContent(spec PlanSpec) string { sections = append(sections, "约束\n"+renderBulletList(spec.Constraints)) } if len(spec.Verify) > 0 { - sections = append(sections, "验证\n"+renderBulletList(spec.Verify)) + sections = append(sections, "验证\n"+renderBulletList(spec.Verify.RenderLines())) } activeTodos := collectActiveTodoLines(spec.Todos) if len(activeTodos) > 0 { @@ -261,6 +290,154 @@ func clampStringList(items []string, maxItems int) []string { return append([]string(nil), normalized[:maxItems]...) } +// UnmarshalJSON 兼容读取新 AcceptCheck 对象数组与旧字符串数组。 +func (checks *AcceptChecks) UnmarshalJSON(data []byte) error { + var structured []AcceptCheck + if err := json.Unmarshal(data, &structured); err == nil { + *checks = AcceptChecks(structured).Normalize() + return nil + } + var legacy []string + if err := json.Unmarshal(data, &legacy); err != nil { + return err + } + migrated := make(AcceptChecks, 0, len(legacy)) + for _, item := range normalizeTodoTextList(legacy) { + migrated = append(migrated, migrateLegacyAcceptCheck(item)) + } + *checks = migrated.Normalize() + return nil +} + +// Clone 返回验收项深拷贝,避免调用方共享 Params map。 +func (checks AcceptChecks) Clone() AcceptChecks { + if len(checks) == 0 { + return nil + } + out := make(AcceptChecks, 0, len(checks)) + for _, check := range checks { + cloned := check + cloned.ID = strings.TrimSpace(cloned.ID) + cloned.Kind = strings.TrimSpace(cloned.Kind) + cloned.Target = strings.TrimSpace(cloned.Target) + cloned.Match = strings.TrimSpace(cloned.Match) + if len(check.Params) > 0 { + cloned.Params = make(map[string]string, len(check.Params)) + for key, value := range check.Params { + key = strings.TrimSpace(key) + value = strings.TrimSpace(value) + if key == "" && value == "" { + continue + } + cloned.Params[key] = value + } + } + out = append(out, cloned) + } + return out +} + +// Normalize 规范化验收项文本字段并迁移旧 kind 名称。 +func (checks AcceptChecks) Normalize() AcceptChecks { + if len(checks) == 0 { + return nil + } + out := make(AcceptChecks, 0, len(checks)) + seen := make(map[string]struct{}, len(checks)) + for _, check := range checks.Clone() { + check.ID = strings.TrimSpace(check.ID) + check.Kind = normalizeAcceptCheckKind(check.Kind) + check.Target = strings.TrimSpace(check.Target) + check.Match = strings.TrimSpace(check.Match) + key := check.Kind + "\x00" + check.Target + "\x00" + check.Match + if key == "\x00\x00" { + continue + } + if _, exists := seen[key]; exists { + continue + } + seen[key] = struct{}{} + out = append(out, check) + } + if len(out) == 0 { + return nil + } + return out +} + +// RenderLines 返回面向计划正文的稳定验收项文本。 +func (checks AcceptChecks) RenderLines() []string { + normalized := checks.Normalize() + if len(normalized) == 0 { + return nil + } + lines := make([]string, 0, len(normalized)) + for _, check := range normalized { + label := check.Kind + if check.Target != "" { + label += ": " + check.Target + } + lines = append(lines, label) + } + return lines +} + +func clampAcceptChecks(items AcceptChecks, maxItems int) AcceptChecks { + normalized := items.Normalize() + if len(normalized) <= maxItems || maxItems <= 0 { + return normalized + } + return normalized[:maxItems].Clone() +} + +func migrateLegacyAcceptCheck(value string) AcceptCheck { + kind := AcceptCheckOutputOnly + switch { + case looksLikeCommand(value): + kind = AcceptCheckCommandSuccess + case looksLikePath(value): + kind = AcceptCheckFileExists + } + return AcceptCheck{Kind: kind, Target: strings.TrimSpace(value), Required: true} +} + +func normalizeAcceptCheckKind(kind string) string { + normalized := strings.ToLower(strings.TrimSpace(kind)) + switch normalized { + case "command": + return AcceptCheckCommandSuccess + default: + return normalized + } +} + +func looksLikeCommand(value string) bool { + trimmed := strings.ToLower(strings.TrimSpace(value)) + if trimmed == "" { + return false + } + prefixes := []string{ + "go ", "go\t", "npm ", "pnpm ", "yarn ", "make", "cargo ", "python ", "pytest", "ruff ", + "eslint", "tsc", "golangci-lint", "git ", "powershell ", "pwsh ", + } + for _, prefix := range prefixes { + if strings.HasPrefix(trimmed, prefix) { + return true + } + } + return strings.Contains(trimmed, " test ") || strings.Contains(trimmed, " build ") +} + +func looksLikePath(value string) bool { + trimmed := strings.TrimSpace(value) + if trimmed == "" || strings.Contains(trimmed, " ") { + return false + } + return strings.Contains(trimmed, "/") || + strings.Contains(trimmed, "\\") || + strings.Contains(strings.TrimPrefix(trimmed, "."), ".") +} + func collectActiveTodoIDs(items []TodoItem, limit int) []string { if len(items) == 0 || limit <= 0 { return nil diff --git a/internal/session/plan_test.go b/internal/session/plan_test.go index 06dff310..07b65031 100644 --- a/internal/session/plan_test.go +++ b/internal/session/plan_test.go @@ -7,6 +7,10 @@ import ( "time" ) +func acceptText(target string) AcceptChecks { + return AcceptChecks{{Kind: AcceptCheckOutputOnly, Target: target, Required: true}} +} + func TestNormalizeSummaryViewFallsBackToBuiltSummaryWhenStructurallyInvalid(t *testing.T) { t.Parallel() @@ -14,7 +18,7 @@ func TestNormalizeSummaryViewFallsBackToBuiltSummaryWhenStructurallyInvalid(t *t Goal: "为 runtime 引入 plan/build 模式", Steps: []string{"扩展 session", "过滤工具", "调整 runtime"}, Constraints: []string{"plan 模式禁止写工具"}, - Verify: []string{"build 结束后进入 verify"}, + Verify: acceptText("build 结束后进入 verify"), Todos: []TodoItem{ {ID: "todo-1", Content: "扩展 session", Status: TodoStatusPending}, {ID: "todo-2", Content: "过滤工具", Status: TodoStatusCompleted}, @@ -27,7 +31,7 @@ func TestNormalizeSummaryViewFallsBackToBuiltSummaryWhenStructurallyInvalid(t *t got := NormalizeSummaryView(SummaryView{ Goal: " ", KeySteps: []string{"仅一步"}, - Verify: []string{"验收"}, + Verify: acceptText("验收"), ActiveTodoIDs: []string{"missing"}, }, spec) want := BuildSummaryView(spec) @@ -49,7 +53,7 @@ func TestBuildSummaryViewUsesActiveNonTerminalTodosOnly(t *testing.T) { spec, err := NormalizePlanSpec(PlanSpec{ Goal: "整理当前执行摘要", Steps: []string{"步骤一", "步骤二"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), Todos: []TodoItem{ {ID: "todo-1", Content: "待执行", Status: TodoStatusPending}, {ID: "todo-2", Content: "执行中", Status: TodoStatusInProgress}, @@ -82,7 +86,7 @@ func TestNormalizePlanArtifactDefaultsAndStatusNormalization(t *testing.T) { Spec: PlanSpec{ Goal: "规范化计划对象", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, }) if err != nil { @@ -116,7 +120,7 @@ func TestNormalizePlanArtifactPreservesCreatedAtAndNormalizesUpdatedAt(t *testin Spec: PlanSpec{ Goal: "保留时间字段", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, }) if err != nil { @@ -136,7 +140,7 @@ func TestNormalizeSummaryViewAllowsEmptyTodoRefsWhenPlanHasNoTodos(t *testing.T) spec, err := NormalizePlanSpec(PlanSpec{ Goal: "无 todo 计划", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }) if err != nil { t.Fatalf("NormalizePlanSpec() error = %v", err) @@ -145,7 +149,7 @@ func TestNormalizeSummaryViewAllowsEmptyTodoRefsWhenPlanHasNoTodos(t *testing.T) summary := NormalizeSummaryView(SummaryView{ Goal: "无 todo 计划", KeySteps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, spec) if summary.Goal != "无 todo 计划" { t.Fatalf("Goal = %q", summary.Goal) @@ -162,7 +166,7 @@ func TestRenderPlanContentIncludesAllSections(t *testing.T) { Goal: "输出完整计划正文", Steps: []string{"步骤一", "步骤二"}, Constraints: []string{"约束一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), OpenQuestions: []string{"问题一"}, Todos: []TodoItem{ {ID: "todo-1", Content: "待执行", Status: TodoStatusPending}, @@ -254,7 +258,7 @@ func TestNormalizePlanArtifactEmptyID(t *testing.T) { Spec: PlanSpec{ Goal: "测试", Steps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), }, }) if err == nil { @@ -326,20 +330,20 @@ func TestClampStringListMaxItems(t *testing.T) { func TestSummaryViewStructurallyValidDetectsInvalid(t *testing.T) { t.Parallel() - spec := PlanSpec{Goal: "目标", Steps: []string{"步骤一"}, Verify: []string{"验证一"}} + spec := PlanSpec{Goal: "目标", Steps: []string{"步骤一"}, Verify: acceptText("验证一")} // Empty goal if summaryViewStructurallyValid(SummaryView{}, spec) { t.Fatal("expected false for empty summary") } // Missing key steps - if summaryViewStructurallyValid(SummaryView{Goal: "目标", Verify: []string{"v"}}, spec) { + if summaryViewStructurallyValid(SummaryView{Goal: "目标", Verify: acceptText("v")}, spec) { t.Fatal("expected false for missing key steps") } // Unknown active todo IDs if summaryViewStructurallyValid(SummaryView{ Goal: "目标", KeySteps: []string{"步骤一"}, - Verify: []string{"验证一"}, + Verify: acceptText("验证一"), ActiveTodoIDs: []string{"unknown"}, }, spec) { t.Fatal("expected false for unknown todo IDs") diff --git a/internal/session/store_test.go b/internal/session/store_test.go index 00228be2..d7bb8fa8 100644 --- a/internal/session/store_test.go +++ b/internal/session/store_test.go @@ -562,7 +562,7 @@ func TestSQLiteStorePersistsPlanStateRoundTrip(t *testing.T) { Goal: "落地 plan/build 模式", Steps: []string{"扩展 session", "扩展 runtime"}, Constraints: []string{"保持 tools 边界"}, - Verify: []string{"go test ./internal/..."}, + Verify: AcceptChecks{{Kind: AcceptCheckCommandSuccess, Target: "go test ./internal/...", Required: true}}, Todos: []TodoItem{ {ID: "todo-plan-1", Content: "补 plan 模型"}, }, @@ -571,7 +571,7 @@ func TestSQLiteStorePersistsPlanStateRoundTrip(t *testing.T) { Goal: "落地 plan/build 模式", KeySteps: []string{"扩展 session", "扩展 runtime"}, Constraints: []string{"保持 tools 边界"}, - Verify: []string{"go test ./internal/..."}, + Verify: AcceptChecks{{Kind: AcceptCheckCommandSuccess, Target: "go test ./internal/...", Required: true}}, ActiveTodoIDs: []string{"todo-plan-1"}, }, }, From 47afbc611b4da573357c091ee828915dbd935eb0 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sat, 9 May 2026 17:58:36 +0800 Subject: [PATCH 02/15] =?UTF-8?q?pref(runtime)=EF=BC=9A=E9=AA=8C=E6=94=B6?= =?UTF-8?q?=E9=87=8D=E5=BB=BA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/config/provider.go | 2 +- internal/context/source_todos_test.go | 7 +- internal/promptasset/assets.go | 14 + internal/promptasset/assets_test.go | 6 + .../context/plan_mode_build_execute.md | 6 +- .../templates/context/plan_mode_plan.md | 6 + .../templates/core/agent_identity.md | 4 +- .../completion_protocol_final_reminder.md | 5 + .../runtime/completion_protocol_reminder.md | 5 + internal/runtime/acceptance/decider.go | 17 - internal/runtime/acceptance/decider_test.go | 32 - internal/runtime/acceptance/engine.go | 140 --- internal/runtime/acceptance/engine_test.go | 146 ---- internal/runtime/acceptance/error_class.go | 6 - internal/runtime/acceptance/policy.go | 82 -- internal/runtime/acceptance/policy_test.go | 72 -- internal/runtime/acceptance/stop_reason.go | 6 - internal/runtime/acceptance/types.go | 54 -- internal/runtime/acceptance_events.go | 34 - internal/runtime/acceptance_service.go | 357 -------- internal/runtime/acceptance_service_test.go | 513 ----------- internal/runtime/acceptgate/checks.go | 2 +- internal/runtime/acceptgate/gate.go | 19 +- internal/runtime/acceptgate/gate_test.go | 27 + internal/runtime/acceptgate_runtime.go | 16 +- .../runtime/before_completion_orchestrator.go | 160 ---- internal/runtime/controlplane/progress.go | 4 +- .../runtime/controlplane/progress_test.go | 2 +- internal/runtime/decider/decide.go | 723 ---------------- .../runtime/decider/decide_additional_test.go | 155 ---- internal/runtime/decider/decide_test.go | 624 -------------- internal/runtime/decider/infer.go | 125 --- internal/runtime/decider/infer_test.go | 117 --- internal/runtime/decider/types.go | 134 --- internal/runtime/events.go | 45 +- internal/runtime/facts/collector.go | 2 +- internal/runtime/final_acceptance.go | 812 ------------------ .../final_acceptance_additional_test.go | 107 --- internal/runtime/final_acceptance_test.go | 273 ------ internal/runtime/hooks_integration_test.go | 91 -- internal/runtime/message_render.go | 13 + .../repository_context_additional_test.go | 2 +- internal/runtime/repository_context_test.go | 2 +- internal/runtime/run.go | 23 +- internal/runtime/runtime_progress_test.go | 52 +- internal/runtime/runtime_test.go | 6 +- internal/runtime/state.go | 92 +- internal/runtime/task_kind.go | 27 - internal/runtime/todo_bootstrap.go | 56 ++ internal/runtime/todo_bootstrap_test.go | 164 ++++ internal/runtime/todo_run_boundary.go | 42 +- internal/runtime/todo_run_boundary_test.go | 35 + internal/runtime/tool_diff_helpers_test.go | 2 +- internal/runtime/verify/command_success.go | 2 +- .../runtime/verify/command_success_test.go | 2 +- internal/runtime/verify/content_match.go | 2 +- internal/runtime/verify/content_match_test.go | 2 +- internal/runtime/verify/file_exists.go | 4 +- internal/runtime/verify/file_exists_test.go | 2 +- internal/runtime/verify/orchestrator.go | 20 +- internal/runtime/verify/orchestrator_test.go | 16 +- internal/runtime/verify/todo_convergence.go | 4 +- .../runtime/verify/todo_convergence_test.go | 12 +- internal/runtime/verify/types.go | 15 +- internal/session/plan.go | 16 +- internal/session/plan_test.go | 21 +- internal/session/store_test.go | 4 +- internal/tools/todo/write.go | 46 +- internal/tools/todo/write_test.go | 51 ++ internal/tui/core/app/todo_test.go | 31 +- internal/tui/core/app/update.go | 72 +- .../core/app/update_runtime_events_test.go | 21 +- internal/tui/services/runtime_contract.go | 33 +- 73 files changed, 814 insertions(+), 5030 deletions(-) create mode 100644 internal/promptasset/templates/runtime/completion_protocol_final_reminder.md create mode 100644 internal/promptasset/templates/runtime/completion_protocol_reminder.md delete mode 100644 internal/runtime/acceptance/decider.go delete mode 100644 internal/runtime/acceptance/decider_test.go delete mode 100644 internal/runtime/acceptance/engine.go delete mode 100644 internal/runtime/acceptance/engine_test.go delete mode 100644 internal/runtime/acceptance/error_class.go delete mode 100644 internal/runtime/acceptance/policy.go delete mode 100644 internal/runtime/acceptance/policy_test.go delete mode 100644 internal/runtime/acceptance/stop_reason.go delete mode 100644 internal/runtime/acceptance/types.go delete mode 100644 internal/runtime/acceptance_events.go delete mode 100644 internal/runtime/acceptance_service.go delete mode 100644 internal/runtime/acceptance_service_test.go delete mode 100644 internal/runtime/before_completion_orchestrator.go delete mode 100644 internal/runtime/decider/decide.go delete mode 100644 internal/runtime/decider/decide_additional_test.go delete mode 100644 internal/runtime/decider/decide_test.go delete mode 100644 internal/runtime/decider/infer.go delete mode 100644 internal/runtime/decider/infer_test.go delete mode 100644 internal/runtime/decider/types.go delete mode 100644 internal/runtime/final_acceptance.go delete mode 100644 internal/runtime/final_acceptance_additional_test.go delete mode 100644 internal/runtime/final_acceptance_test.go create mode 100644 internal/runtime/message_render.go delete mode 100644 internal/runtime/task_kind.go create mode 100644 internal/runtime/todo_bootstrap.go create mode 100644 internal/runtime/todo_bootstrap_test.go diff --git a/internal/config/provider.go b/internal/config/provider.go index 494c8994..fc968a1d 100644 --- a/internal/config/provider.go +++ b/internal/config/provider.go @@ -734,7 +734,7 @@ const ( GLMDefaultAPIKeyEnv = "GLM_API_KEY" MiMoName = "mimo" - MiMoDefaultBaseURL = "https://token-plan-cn.xiaomimimo.com/v1" + MiMoDefaultBaseURL = "https://api.xiaomimimo.com/v1" MiMoDefaultModel = "mimo-v2.5-pro" MiMoDefaultAPIKeyEnv = "MIMO_API_KEY" diff --git a/internal/context/source_todos_test.go b/internal/context/source_todos_test.go index 5276b9fe..a0bf7b66 100644 --- a/internal/context/source_todos_test.go +++ b/internal/context/source_todos_test.go @@ -61,11 +61,8 @@ func TestTodosSourceSections(t *testing.T) { if len(lines) < 2 || !strings.Contains(lines[0], "in-progress") { t.Fatalf("expected in_progress todo first, got %q", sections[0].Content) } - if !strings.Contains(sections[0].Content, "user clearly switches to a different task") { - t.Fatalf("expected stale todo reminder to mention task switching, got %q", sections[0].Content) - } - if !strings.Contains(sections[0].Content, "only if the work is actually done") { - t.Fatalf("expected stale todo reminder to distinguish completed from canceled, got %q", sections[0].Content) + if strings.Contains(sections[0].Content, "stale_todo_reminder") { + t.Fatalf("expected stale todo reminder to be removed, got %q", sections[0].Content) } } diff --git a/internal/promptasset/assets.go b/internal/promptasset/assets.go index 3a640f7b..f50aed11 100644 --- a/internal/promptasset/assets.go +++ b/internal/promptasset/assets.go @@ -26,6 +26,10 @@ var noProgressReminder = mustReadTemplate("templates/runtime/self_healing_no_pro var repeatCycleReminder = mustReadTemplate("templates/runtime/self_healing_repeat_cycle.txt") +var completionProtocolReminder = mustReadTemplate("templates/runtime/completion_protocol_reminder.md") + +var completionProtocolFinalReminder = mustReadTemplate("templates/runtime/completion_protocol_final_reminder.md") + var compactSystemPromptTemplate = mustReadTemplate("templates/context/compact_system_prompt.md") var planModePlanPrompt = mustReadTemplate("templates/context/plan_mode_plan.md") @@ -57,6 +61,16 @@ func RepeatCycleReminder() string { return repeatCycleReminder } +// CompletionProtocolReminder 返回缺少 task_completion 时的普通协议提示。 +func CompletionProtocolReminder() string { + return completionProtocolReminder +} + +// CompletionProtocolFinalReminder 返回缺少 task_completion 时的最终协议提示。 +func CompletionProtocolFinalReminder() string { + return completionProtocolFinalReminder +} + // CompactSystemPrompt 返回 compact 场景使用的静态 system prompt。 func CompactSystemPrompt(taskStateContract string, summaryFormat string) string { replacer := strings.NewReplacer( diff --git a/internal/promptasset/assets_test.go b/internal/promptasset/assets_test.go index 43c21456..f0a4a21c 100644 --- a/internal/promptasset/assets_test.go +++ b/internal/promptasset/assets_test.go @@ -92,6 +92,12 @@ func TestPlanModePromptTemplates(t *testing.T) { }) } + if !strings.Contains(PlanModePrompt("plan"), "summary_candidate.active_todo_ids") { + t.Fatalf("expected plan prompt to require active todo ownership") + } + if !strings.Contains(PlanModePrompt("build_execute"), "create current-run required todos") { + t.Fatalf("expected build prompt to require direct-build todo bootstrap") + } if got := PlanModePrompt("unknown"); got != "" { t.Fatalf("PlanModePrompt(unknown) = %q, want empty", got) } diff --git a/internal/promptasset/templates/context/plan_mode_build_execute.md b/internal/promptasset/templates/context/plan_mode_build_execute.md index 25a024bc..7a964b5f 100644 --- a/internal/promptasset/templates/context/plan_mode_build_execute.md +++ b/internal/promptasset/templates/context/plan_mode_build_execute.md @@ -4,8 +4,12 @@ You are currently in build execution. - If a current plan summary is attached, use it as guidance by default. - If the summary is insufficient for the current task, consult the attached full plan view when available. - If no current plan is attached, continue using task state, todos, and the conversation context. +- If no current plan and no Todo State are attached, create current-run required todos with `todo_write` before the first substantive tool call for project analysis, documentation writing, code changes, multi-step debugging, or verification work. +- Do not update or complete todo IDs that are not present in the current Todo State; create new current-run todos instead. - Small necessary deviations are allowed, but explain why they are needed. - Do not create or rewrite the current full plan in this stage. - If the current plan appears outdated, explain the mismatch and continue, or recommend switching back to planning. - Do not output `plan_spec` or `summary_candidate` in build execution. -- When you believe the task tied to the current plan is complete, start your reply with a JSON object of the form `{"task_completion":{"completed":true}}`, then continue with the normal user-facing completion message. +- When the task is complete, your final reply MUST start with `{"task_completion":{"completed":true}}` followed by your user-facing message. Without this signal, the runtime will issue up to two protocol reminders and then terminate the run. +- Do NOT output `task_completion` while you still have tool calls to make. Tools always take priority over completion signals. +- Acceptance is terminal: once you signal completion, the runtime performs a final yes/no check against the plan's verify criteria. If it fails, the run ends — there is no retry. diff --git a/internal/promptasset/templates/context/plan_mode_plan.md b/internal/promptasset/templates/context/plan_mode_plan.md index 7531cbce..ff1706c7 100644 --- a/internal/promptasset/templates/context/plan_mode_plan.md +++ b/internal/promptasset/templates/context/plan_mode_plan.md @@ -7,3 +7,9 @@ You are currently in the planning stage. - Only output a JSON object containing `plan_spec` and `summary_candidate` when you are explicitly creating or rewriting the current full plan. - `plan_spec` must include `goal`, `steps`, `constraints`, `verify`, `todos`, and `open_questions`. - `summary_candidate` must include `goal`, `key_steps`, `constraints`, `verify`, and `active_todo_ids`. +- If a Todo State section is attached, decide which non-terminal todos still belong to the current plan. +- Todos that still belong to the current plan must appear in `plan_spec.todos` and their IDs must appear in `summary_candidate.active_todo_ids`. +- Todos that do not belong to the current plan must not be copied into the new plan; create replacement plan-owned todos when ongoing work is still needed. +- `verify` must be an array of structured check objects: `[{"kind":"...", "target":"...", "required":true}]`. +- Supported `kind` values: `output_only` (chat/read-only), `workspace_change` (writes/edits), `command_success` (build/test/lint), `file_exists` (file artifacts), `content_contains` (content checks), `tool_fact` (named tool facts). +- Examples: chat → `[{"kind":"output_only"}]`, fix → `[{"kind":"workspace_change"},{"kind":"command_success","target":"go test ./..."}]`, new file → `[{"kind":"workspace_change"},{"kind":"file_exists","target":"output.go"}]`. diff --git a/internal/promptasset/templates/core/agent_identity.md b/internal/promptasset/templates/core/agent_identity.md index c5c43e22..1547076c 100644 --- a/internal/promptasset/templates/core/agent_identity.md +++ b/internal/promptasset/templates/core/agent_identity.md @@ -43,7 +43,7 @@ Your final answer is only a completion candidate. It does not by itself prove th Distinguish: - `completion_gate`: whether it is reasonable to attempt finalization. - `verification_gate`: whether the actual task requirements are satisfied. -- `acceptance_decision`: the runtime's final accepted/continue/incomplete/failed decision. +- `acceptance_decision`: the runtime's final accepted/failed decision. Acceptance is terminal — there is no "continue" or retry. Do not finalize when any of these are true: - Required todos are pending, in progress, blocked, or failed. @@ -52,4 +52,4 @@ Do not finalize when any of these are true: - Tool results indicate errors, truncation that affects confidence, or unresolved uncertainty. - A subagent finished but the main task has not integrated and verified its result. -If the runtime injects a reminder that completion was not accepted, continue execution and address the unmet condition. Do not argue with the reminder. +If acceptance fails, the task is terminated. Do not try to continue — the run has ended. diff --git a/internal/promptasset/templates/runtime/completion_protocol_final_reminder.md b/internal/promptasset/templates/runtime/completion_protocol_final_reminder.md new file mode 100644 index 00000000..8b1005a3 --- /dev/null +++ b/internal/promptasset/templates/runtime/completion_protocol_final_reminder.md @@ -0,0 +1,5 @@ +[Runtime Control] + +You again stopped calling tools without outputting `task_completion`. + +This is the final protocol reminder: if the task is done, output the structured completion signal. Otherwise, continue calling tools. Missing it again will terminate this run. diff --git a/internal/promptasset/templates/runtime/completion_protocol_reminder.md b/internal/promptasset/templates/runtime/completion_protocol_reminder.md new file mode 100644 index 00000000..44b40cd9 --- /dev/null +++ b/internal/promptasset/templates/runtime/completion_protocol_reminder.md @@ -0,0 +1,5 @@ +[Runtime Control] + +You stopped calling tools without outputting `task_completion`. + +If the task is done, end with the structured completion signal. Otherwise, continue calling tools to make progress. diff --git a/internal/runtime/acceptance/decider.go b/internal/runtime/acceptance/decider.go deleted file mode 100644 index 40dc295a..00000000 --- a/internal/runtime/acceptance/decider.go +++ /dev/null @@ -1,17 +0,0 @@ -package acceptance - -import "neo-code/internal/runtime/controlplane" - -// TerminalStatusFromAcceptance 将 acceptance 决策映射到 runtime 终态枚举。 -func TerminalStatusFromAcceptance(status AcceptanceStatus) controlplane.TerminalStatus { - switch status { - case AcceptanceAccepted: - return controlplane.TerminalStatusCompleted - case AcceptanceFailed: - return controlplane.TerminalStatusFailed - case AcceptanceIncomplete: - return controlplane.TerminalStatusIncomplete - default: - return controlplane.TerminalStatusContinue - } -} diff --git a/internal/runtime/acceptance/decider_test.go b/internal/runtime/acceptance/decider_test.go deleted file mode 100644 index ebaa9b93..00000000 --- a/internal/runtime/acceptance/decider_test.go +++ /dev/null @@ -1,32 +0,0 @@ -package acceptance - -import ( - "testing" - - "neo-code/internal/runtime/controlplane" -) - -func TestTerminalStatusFromAcceptance(t *testing.T) { - t.Parallel() - - cases := []struct { - status AcceptanceStatus - want controlplane.TerminalStatus - }{ - {status: AcceptanceAccepted, want: controlplane.TerminalStatusCompleted}, - {status: AcceptanceFailed, want: controlplane.TerminalStatusFailed}, - {status: AcceptanceIncomplete, want: controlplane.TerminalStatusIncomplete}, - {status: AcceptanceContinue, want: controlplane.TerminalStatusContinue}, - {status: AcceptanceStatus("other"), want: controlplane.TerminalStatusContinue}, - } - - for _, tc := range cases { - tc := tc - t.Run(string(tc.status), func(t *testing.T) { - t.Parallel() - if got := TerminalStatusFromAcceptance(tc.status); got != tc.want { - t.Fatalf("TerminalStatusFromAcceptance(%q) = %q, want %q", tc.status, got, tc.want) - } - }) - } -} diff --git a/internal/runtime/acceptance/engine.go b/internal/runtime/acceptance/engine.go deleted file mode 100644 index 809e03ba..00000000 --- a/internal/runtime/acceptance/engine.go +++ /dev/null @@ -1,140 +0,0 @@ -package acceptance - -import ( - "context" - "fmt" - - "neo-code/internal/runtime/controlplane" - "neo-code/internal/runtime/verify" -) - -// Engine 负责聚合 completion gate 与 verifier gate,并输出唯一的收尾决策。 -type Engine struct { - policy AcceptancePolicy -} - -// NewEngine 创建 acceptance engine。 -func NewEngine(policy AcceptancePolicy) *Engine { - if policy == nil { - policy = DefaultPolicy{} - } - return &Engine{policy: policy} -} - -// EvaluateFinal 执行 final acceptance 主链,输出结构化终态决策。 -func (e *Engine) EvaluateFinal(ctx context.Context, input FinalAcceptanceInput) (AcceptanceDecision, error) { - decision := AcceptanceDecision{ - Status: AcceptanceContinue, - StopReason: controlplane.StopReasonTodoNotConverged, - CompletionBlockedReason: input.CompletionGate.Reason, - UserVisibleSummary: "当前回合尚未达到可收尾条件,继续执行。", - InternalSummary: "completion gate did not pass", - ContinueHint: "There are unfinished required todos or unmet acceptance checks. Continue execution. Do not finalize yet.", - } - if input.CompletionGate.Passed { - verifiers, err := e.policy.ResolveVerifiers(input.VerificationInput) - if err != nil { - return AcceptanceDecision{ - Status: AcceptanceFailed, - StopReason: controlplane.StopReasonVerificationConfigMissing, - ErrorClass: verify.ErrorClassEnvMissing, - CompletionBlockedReason: input.CompletionGate.Reason, - UserVisibleSummary: "验收配置无效,任务失败。", - InternalSummary: fmt.Sprintf("verification profile resolution failed: %v", err), - }, nil - } - orch := verify.Orchestrator{Verifiers: verifiers} - gateDecision, err := orch.RunFinalVerification(ctx, input.VerificationInput) - if err != nil { - return AcceptanceDecision{}, err - } - decision = aggregateVerificationDecision(gateDecision) - } - decision.CompletionBlockedReason = input.CompletionGate.Reason - - if input.NoProgressExceeded && decision.Status == AcceptanceContinue { - decision.Status = AcceptanceIncomplete - decision.StopReason = controlplane.StopReasonNoProgressAfterFinalIntercept - decision.UserVisibleSummary = "多次拦截 final 且无进展,已停止并标记为未完成。" - decision.InternalSummary = "no-progress breaker triggered after repeated final interception" - } - - if input.MaxTurnsReached && decision.Status == AcceptanceContinue { - decision.Status = AcceptanceIncomplete - if decision.StopReason == controlplane.StopReasonVerificationFailed { - decision.StopReason = controlplane.StopReasonMaxTurnExceededWithFailedVerification - } else { - decision.StopReason = controlplane.StopReasonMaxTurnExceededWithUnconvergedTodos - } - decision.UserVisibleSummary = fmt.Sprintf("达到最大轮次限制(%d),任务未完成。", input.MaxTurnsLimit) - decision.InternalSummary = "max turn reached while final was still intercepted" - } - - return decision, nil -} - -// aggregateVerificationDecision 将 verifier gate 的首个非 pass 结果映射为 acceptance 决策。 -func aggregateVerificationDecision(gate verify.VerificationGateDecision) AcceptanceDecision { - first := firstNonPassResult(gate.Results) - if first == nil { - return AcceptanceDecision{ - Status: AcceptanceAccepted, - StopReason: controlplane.StopReasonAccepted, - UserVisibleSummary: "任务通过验收,已完成。", - InternalSummary: "completion gate and verification gate both passed", - VerifierResults: append([]verify.VerificationResult(nil), gate.Results...), - HasProgress: true, - } - } - - switch first.Status { - case verify.VerificationSoftBlock: - return AcceptanceDecision{ - Status: AcceptanceContinue, - StopReason: controlplane.StopReasonTodoNotConverged, - UserVisibleSummary: "仍有未满足的验收条件,继续执行。", - InternalSummary: "first verifier returned soft_block", - ContinueHint: "There are unfinished required todos or unmet acceptance checks. Continue execution. Do not finalize yet.", - VerifierResults: append([]verify.VerificationResult(nil), gate.Results...), - } - case verify.VerificationHardBlock: - reason := controlplane.StopReasonTodoNotConverged - if first.WaitingExternal { - reason = controlplane.StopReasonTodoWaitingExternal - } - return AcceptanceDecision{ - Status: AcceptanceIncomplete, - StopReason: reason, - UserVisibleSummary: "任务仍依赖外部条件,当前以未完成状态结束。", - InternalSummary: "first verifier returned hard_block", - VerifierResults: append([]verify.VerificationResult(nil), gate.Results...), - WaitingExternal: first.WaitingExternal, - } - default: - stopReason := gate.Reason - if stopReason == "" || stopReason == controlplane.StopReasonAccepted { - stopReason = controlplane.StopReasonVerificationFailed - } - return AcceptanceDecision{ - Status: AcceptanceFailed, - StopReason: stopReason, - ErrorClass: first.ErrorClass, - UserVisibleSummary: "验证未通过,任务失败。", - InternalSummary: "first verifier returned fail", - VerifierResults: append([]verify.VerificationResult(nil), gate.Results...), - Retryable: first.Retryable, - } - } -} - -// firstNonPassResult 返回首个非 pass 的 verifier 结果。 -func firstNonPassResult(results []verify.VerificationResult) *verify.VerificationResult { - for _, result := range results { - if result.Status == verify.VerificationPass { - continue - } - cloned := result - return &cloned - } - return nil -} diff --git a/internal/runtime/acceptance/engine_test.go b/internal/runtime/acceptance/engine_test.go deleted file mode 100644 index 78a98e5d..00000000 --- a/internal/runtime/acceptance/engine_test.go +++ /dev/null @@ -1,146 +0,0 @@ -package acceptance - -import ( - "context" - "testing" - - "neo-code/internal/runtime/controlplane" - "neo-code/internal/runtime/verify" - agentsession "neo-code/internal/session" -) - -type staticPolicy struct { - verifiers []verify.FinalVerifier - err error -} - -func (p staticPolicy) ResolveVerifiers(input verify.FinalVerifyInput) ([]verify.FinalVerifier, error) { - _ = input - return p.verifiers, p.err -} - -type staticVerifier struct { - name string - result verify.VerificationResult -} - -func (v staticVerifier) Name() string { return v.name } -func (v staticVerifier) VerifyFinal(ctx context.Context, input verify.FinalVerifyInput) (verify.VerificationResult, error) { - _ = ctx - _ = input - return v.result, nil -} - -func TestEngineEvaluateFinal(t *testing.T) { - t.Parallel() - - makeInput := func() FinalAcceptanceInput { - return FinalAcceptanceInput{ - CompletionGate: CompletionGateDecision{Passed: true}, - VerificationInput: verify.FinalVerifyInput{ - TaskState: verify.TaskStateSnapshot{VerificationProfile: string(agentsession.VerificationProfileTaskOnly)}, - }, - } - } - - t.Run("completion gate false returns continue", func(t *testing.T) { - t.Parallel() - decision, err := NewEngine(staticPolicy{}).EvaluateFinal(context.Background(), FinalAcceptanceInput{ - CompletionGate: CompletionGateDecision{Passed: false}, - }) - if err != nil { - t.Fatalf("EvaluateFinal() error = %v", err) - } - if decision.Status != AcceptanceContinue { - t.Fatalf("status = %q, want continue", decision.Status) - } - }) - - t.Run("invalid profile becomes structured failed decision", func(t *testing.T) { - t.Parallel() - decision, err := NewEngine(staticPolicy{err: context.DeadlineExceeded}).EvaluateFinal(context.Background(), makeInput()) - if err != nil { - t.Fatalf("EvaluateFinal() error = %v", err) - } - if decision.Status != AcceptanceFailed || decision.StopReason != controlplane.StopReasonVerificationConfigMissing { - t.Fatalf("unexpected decision: %+v", decision) - } - }) - - t.Run("soft block returns continue", func(t *testing.T) { - t.Parallel() - decision, err := NewEngine(staticPolicy{ - verifiers: []verify.FinalVerifier{ - staticVerifier{name: "todo", result: verify.VerificationResult{Name: "todo", Status: verify.VerificationSoftBlock}}, - }, - }).EvaluateFinal(context.Background(), makeInput()) - if err != nil { - t.Fatalf("EvaluateFinal() error = %v", err) - } - if decision.Status != AcceptanceContinue { - t.Fatalf("status = %q, want continue", decision.Status) - } - }) - - t.Run("hard block returns incomplete", func(t *testing.T) { - t.Parallel() - decision, err := NewEngine(staticPolicy{ - verifiers: []verify.FinalVerifier{ - staticVerifier{name: "todo", result: verify.VerificationResult{Name: "todo", Status: verify.VerificationHardBlock, WaitingExternal: true}}, - }, - }).EvaluateFinal(context.Background(), makeInput()) - if err != nil { - t.Fatalf("EvaluateFinal() error = %v", err) - } - if decision.Status != AcceptanceIncomplete || decision.StopReason != controlplane.StopReasonTodoWaitingExternal { - t.Fatalf("unexpected decision: %+v", decision) - } - }) - - t.Run("fail returns failed", func(t *testing.T) { - t.Parallel() - decision, err := NewEngine(staticPolicy{ - verifiers: []verify.FinalVerifier{ - staticVerifier{name: "build", result: verify.VerificationResult{Name: "build", Status: verify.VerificationFail, ErrorClass: verify.ErrorClassEnvMissing}}, - }, - }).EvaluateFinal(context.Background(), makeInput()) - if err != nil { - t.Fatalf("EvaluateFinal() error = %v", err) - } - if decision.Status != AcceptanceFailed || decision.StopReason != controlplane.StopReasonVerificationConfigMissing { - t.Fatalf("unexpected decision: %+v", decision) - } - }) - - t.Run("all pass returns accepted", func(t *testing.T) { - t.Parallel() - decision, err := NewEngine(staticPolicy{ - verifiers: []verify.FinalVerifier{ - staticVerifier{name: "todo", result: verify.VerificationResult{Name: "todo", Status: verify.VerificationPass}}, - }, - }).EvaluateFinal(context.Background(), makeInput()) - if err != nil { - t.Fatalf("EvaluateFinal() error = %v", err) - } - if decision.Status != AcceptanceAccepted { - t.Fatalf("status = %q, want accepted", decision.Status) - } - }) - - t.Run("retry exhausted no longer overrides final decision", func(t *testing.T) { - t.Parallel() - input := makeInput() - input.VerificationInput.Todos = []verify.TodoSnapshot{{ID: "todo-1", Required: true, RetryCount: 1, RetryLimit: 1}} - decision, err := NewEngine(staticPolicy{ - verifiers: []verify.FinalVerifier{ - staticVerifier{name: "todo", result: verify.VerificationResult{Name: "todo", Status: verify.VerificationPass}}, - }, - }).EvaluateFinal(context.Background(), input) - if err != nil { - t.Fatalf("EvaluateFinal() error = %v", err) - } - if decision.Status != AcceptanceAccepted || decision.StopReason != controlplane.StopReasonAccepted { - t.Fatalf("unexpected decision: %+v", decision) - } - }) -} diff --git a/internal/runtime/acceptance/error_class.go b/internal/runtime/acceptance/error_class.go deleted file mode 100644 index edda8928..00000000 --- a/internal/runtime/acceptance/error_class.go +++ /dev/null @@ -1,6 +0,0 @@ -package acceptance - -import "neo-code/internal/runtime/verify" - -// ErrorClass 复用 verifier 层统一错误分类枚举。 -type ErrorClass = verify.ErrorClass diff --git a/internal/runtime/acceptance/policy.go b/internal/runtime/acceptance/policy.go deleted file mode 100644 index a5f82062..00000000 --- a/internal/runtime/acceptance/policy.go +++ /dev/null @@ -1,82 +0,0 @@ -package acceptance - -import ( - "fmt" - "strings" - - "neo-code/internal/runtime/verify" - agentsession "neo-code/internal/session" -) - -// AcceptancePolicy 定义 final 验收时 verifier 选择策略。 -type AcceptancePolicy interface { - ResolveVerifiers(input verify.FinalVerifyInput) ([]verify.FinalVerifier, error) -} - -// DefaultPolicy 按 session-owned verification profile 解析 verifier 列表。 -type DefaultPolicy struct { - Executor verify.CommandExecutor -} - -// ResolveVerifiers 依据 verification profile 生成固定 verifier 执行列表。 -func (p DefaultPolicy) ResolveVerifiers(input verify.FinalVerifyInput) ([]verify.FinalVerifier, error) { - profile := agentsession.VerificationProfile(strings.TrimSpace(input.TaskState.VerificationProfile)) - if !profile.Valid() { - return nil, fmt.Errorf("invalid verification profile %q", input.TaskState.VerificationProfile) - } - names := mappedVerifierNames(profile) - if len(names) == 0 { - return nil, fmt.Errorf("verification profile %q has no verifier mapping", profile) - } - verifiers := make([]verify.FinalVerifier, 0, len(names)) - for _, name := range names { - if verifier := p.buildVerifier(name); verifier != nil { - verifiers = append(verifiers, verifier) - } - } - return verifiers, nil -} - -// buildVerifier 基于名称构建 verifier 实例。 -func (p DefaultPolicy) buildVerifier(name string) verify.FinalVerifier { - switch strings.TrimSpace(name) { - case "todo_convergence": - return verify.TodoConvergenceVerifier{} - case "file_exists": - return verify.FileExistsVerifier{} - case "content_match": - return verify.ContentMatchVerifier{} - case "command_success": - return verify.CommandSuccessVerifier{VerifierName: "command_success", Executor: p.Executor} - case "build": - return verify.NewBuildVerifier(p.Executor) - case "test": - return verify.NewTestVerifier(p.Executor) - case "lint": - return verify.NewLintVerifier(p.Executor) - case "typecheck": - return verify.NewTypecheckVerifier(p.Executor) - default: - return nil - } -} - -// mappedVerifierNames 返回 verification profile 对应的 verifier 名称集合。 -func mappedVerifierNames(profile agentsession.VerificationProfile) []string { - switch profile { - case agentsession.VerificationProfileTaskOnly: - return []string{"todo_convergence"} - case agentsession.VerificationProfileCreateFile, agentsession.VerificationProfileDocs: - return []string{"todo_convergence", "file_exists", "content_match"} - case agentsession.VerificationProfileConfig: - return []string{"todo_convergence", "file_exists", "content_match", "command_success"} - case agentsession.VerificationProfileEditCode: - return []string{"todo_convergence", "build", "test", "typecheck"} - case agentsession.VerificationProfileFixBug: - return []string{"todo_convergence", "test", "build", "typecheck"} - case agentsession.VerificationProfileRefactor: - return []string{"todo_convergence", "build", "test", "lint", "typecheck"} - default: - return nil - } -} diff --git a/internal/runtime/acceptance/policy_test.go b/internal/runtime/acceptance/policy_test.go deleted file mode 100644 index df81c5b4..00000000 --- a/internal/runtime/acceptance/policy_test.go +++ /dev/null @@ -1,72 +0,0 @@ -package acceptance - -import ( - "testing" - - "neo-code/internal/runtime/verify" - agentsession "neo-code/internal/session" -) - -func TestMappedVerifierNames(t *testing.T) { - t.Parallel() - - cases := []struct { - profile agentsession.VerificationProfile - want []string - }{ - {profile: agentsession.VerificationProfileTaskOnly, want: []string{"todo_convergence"}}, - {profile: agentsession.VerificationProfileCreateFile, want: []string{"todo_convergence", "file_exists", "content_match"}}, - {profile: agentsession.VerificationProfileConfig, want: []string{"todo_convergence", "file_exists", "content_match", "command_success"}}, - {profile: agentsession.VerificationProfileEditCode, want: []string{"todo_convergence", "build", "test", "typecheck"}}, - {profile: agentsession.VerificationProfileRefactor, want: []string{"todo_convergence", "build", "test", "lint", "typecheck"}}, - } - - for _, tc := range cases { - got := mappedVerifierNames(tc.profile) - if len(got) != len(tc.want) { - t.Fatalf("%s len = %d, want %d", tc.profile, len(got), len(tc.want)) - } - for i := range tc.want { - if got[i] != tc.want[i] { - t.Fatalf("%s[%d] = %q, want %q", tc.profile, i, got[i], tc.want[i]) - } - } - } -} - -func TestDefaultPolicyResolveVerifiers(t *testing.T) { - t.Parallel() - - verifiers, err := (DefaultPolicy{}).ResolveVerifiers(verify.FinalVerifyInput{ - TaskState: verify.TaskStateSnapshot{VerificationProfile: string(agentsession.VerificationProfileEditCode)}, - }) - if err != nil { - t.Fatalf("ResolveVerifiers() error = %v", err) - } - if len(verifiers) != 4 { - t.Fatalf("ResolveVerifiers() len = %d, want 4", len(verifiers)) - } - if verifiers[0].Name() != "todo_convergence" || verifiers[1].Name() != "build" { - t.Fatalf("unexpected verifier order: %s, %s", verifiers[0].Name(), verifiers[1].Name()) - } -} - -func TestDefaultPolicyResolveVerifiersRejectsInvalidProfile(t *testing.T) { - t.Parallel() - - _, err := (DefaultPolicy{}).ResolveVerifiers(verify.FinalVerifyInput{ - TaskState: verify.TaskStateSnapshot{VerificationProfile: "unknown"}, - }) - if err == nil { - t.Fatal("expected invalid profile error") - } -} - -func TestDefaultPolicyResolveVerifiersRejectsMissingProfile(t *testing.T) { - t.Parallel() - - _, err := (DefaultPolicy{}).ResolveVerifiers(verify.FinalVerifyInput{}) - if err == nil { - t.Fatal("expected missing profile error") - } -} diff --git a/internal/runtime/acceptance/stop_reason.go b/internal/runtime/acceptance/stop_reason.go deleted file mode 100644 index 47892eb4..00000000 --- a/internal/runtime/acceptance/stop_reason.go +++ /dev/null @@ -1,6 +0,0 @@ -package acceptance - -import "neo-code/internal/runtime/controlplane" - -// StopReason 复用控制面统一停止原因枚举,避免 acceptance 层引入平行真源。 -type StopReason = controlplane.StopReason diff --git a/internal/runtime/acceptance/types.go b/internal/runtime/acceptance/types.go deleted file mode 100644 index b57db9a8..00000000 --- a/internal/runtime/acceptance/types.go +++ /dev/null @@ -1,54 +0,0 @@ -package acceptance - -import ( - "neo-code/internal/runtime/controlplane" - "neo-code/internal/runtime/decider" - "neo-code/internal/runtime/verify" -) - -// CompletionGateDecision 表示 completion gate 评估结果。 -type CompletionGateDecision struct { - Passed bool `json:"passed"` - Reason string `json:"reason,omitempty"` -} - -// AcceptanceStatus 表示 final 验收的统一决策状态。 -type AcceptanceStatus string - -const ( - AcceptanceAccepted AcceptanceStatus = "accepted" - AcceptanceContinue AcceptanceStatus = "continue" - AcceptanceIncomplete AcceptanceStatus = "incomplete" - AcceptanceFailed AcceptanceStatus = "failed" -) - -// AcceptanceDecision 表示 runtime beforeAcceptFinal 的结构化输出。 -type AcceptanceDecision struct { - Status AcceptanceStatus `json:"status"` - StopReason controlplane.StopReason `json:"stop_reason,omitempty"` - ErrorClass verify.ErrorClass `json:"error_class,omitempty"` - CompletionPassed bool `json:"completion_passed,omitempty"` - VerificationPassed bool `json:"verification_passed,omitempty"` - CompletionBlockedReason string `json:"completion_blocked_reason,omitempty"` - MissingFacts []decider.MissingFact `json:"missing_facts,omitempty"` - RequiredNextActions []decider.RequiredAction `json:"required_next_actions,omitempty"` - RequiredInput *decider.RequiredInput `json:"required_input,omitempty"` - IntentHint decider.TaskKind `json:"intent_hint,omitempty"` - EffectiveTaskKind decider.TaskKind `json:"effective_task_kind,omitempty"` - UserVisibleSummary string `json:"user_visible_summary,omitempty"` - InternalSummary string `json:"internal_summary,omitempty"` - ContinueHint string `json:"continue_hint,omitempty"` - VerifierResults []verify.VerificationResult `json:"verifier_results,omitempty"` - HasProgress bool `json:"has_progress,omitempty"` - Retryable bool `json:"retryable,omitempty"` - WaitingExternal bool `json:"waiting_external,omitempty"` -} - -// FinalAcceptanceInput 表示 beforeAcceptFinal 需要的输入快照。 -type FinalAcceptanceInput struct { - CompletionGate CompletionGateDecision `json:"completion_gate"` - VerificationInput verify.FinalVerifyInput `json:"verification_input"` - NoProgressExceeded bool `json:"no_progress_exceeded,omitempty"` - MaxTurnsReached bool `json:"max_turns_reached,omitempty"` - MaxTurnsLimit int `json:"max_turns_limit,omitempty"` -} diff --git a/internal/runtime/acceptance_events.go b/internal/runtime/acceptance_events.go deleted file mode 100644 index 4b5e8297..00000000 --- a/internal/runtime/acceptance_events.go +++ /dev/null @@ -1,34 +0,0 @@ -package runtime - -import ( - "strings" - - "neo-code/internal/runtime/acceptance" -) - -// emitAcceptanceDecisionEvents 将验收决策及其 verifier 轨迹统一转换为运行时事件,保证观测链路一致。 -func (s *Service) emitAcceptanceDecisionEvents(state *runState, decision acceptance.AcceptanceDecision) { - for _, result := range decision.VerifierResults { - s.emitRunScopedOptional(EventVerificationStageFinished, state, VerificationStageFinishedPayload{ - Name: result.Name, - Status: result.Status, - Summary: result.Summary, - Reason: result.Reason, - ErrorClass: result.ErrorClass, - }) - } - s.emitRunScopedOptional(EventVerificationFinished, state, VerificationFinishedPayload{ - AcceptanceStatus: decision.Status, - StopReason: decision.StopReason, - ErrorClass: decision.ErrorClass, - }) - s.emitRunScopedOptional(EventAcceptanceDecided, state, AcceptanceDecidedPayload{ - Status: decision.Status, - StopReason: decision.StopReason, - ErrorClass: decision.ErrorClass, - CompletionBlockedReason: strings.TrimSpace(decision.CompletionBlockedReason), - UserVisibleSummary: decision.UserVisibleSummary, - InternalSummary: decision.InternalSummary, - ContinueHint: decision.ContinueHint, - }) -} diff --git a/internal/runtime/acceptance_service.go b/internal/runtime/acceptance_service.go deleted file mode 100644 index 98c4ee9e..00000000 --- a/internal/runtime/acceptance_service.go +++ /dev/null @@ -1,357 +0,0 @@ -package runtime - -import ( - "context" - "fmt" - "strings" - - "neo-code/internal/runtime/acceptance" - "neo-code/internal/runtime/controlplane" - "neo-code/internal/runtime/decider" - runtimefacts "neo-code/internal/runtime/facts" - "neo-code/internal/runtime/verify" - agentsession "neo-code/internal/session" -) - -// acceptanceServiceInput 收敛一次最终验收裁决所需的最小输入。 -type acceptanceServiceInput struct { - RunID string - SessionID string - TaskKind decider.TaskKind - UserGoal string - CompletionPassed bool - CompletionBlockedReason string - Facts runtimefacts.RuntimeFacts - Todos decider.TodoSnapshot - Progress decider.ProgressSnapshot - LastAssistantText string - HookAnnotations []string - HookGuards []decider.HookGuardSignal - NoProgressStreak int - MaxNoProgress int - VerificationProfile agentsession.VerificationProfile - VerificationInput verify.FinalVerifyInput -} - -// acceptanceService 负责生成 runtime 唯一终态裁决输出。 -type acceptanceService struct{} - -// Decide 统一执行 completion/verification/decider 聚合,并输出 AcceptanceDecision。 -func (s *acceptanceService) Decide(ctx context.Context, input acceptanceServiceInput) (acceptance.AcceptanceDecision, error) { - output := acceptance.AcceptanceDecision{ - Status: acceptance.AcceptanceContinue, - StopReason: controlplane.StopReasonTodoNotConverged, - CompletionPassed: input.CompletionPassed, - VerificationPassed: false, - CompletionBlockedReason: strings.TrimSpace(input.CompletionBlockedReason), - } - verificationGate, err := runVerificationGate(ctx, input) - if err != nil { - return acceptance.AcceptanceDecision{}, err - } - output.VerificationPassed = verificationGate.Passed - output.VerifierResults = append([]verify.VerificationResult(nil), verificationGate.Results...) - - noProgressExceeded := input.MaxNoProgress > 0 && input.NoProgressStreak >= input.MaxNoProgress - decision := decider.Decide(decider.DecisionInput{ - RunID: strings.TrimSpace(input.RunID), - SessionID: strings.TrimSpace(input.SessionID), - TaskKind: input.TaskKind, - UserGoal: strings.TrimSpace(input.UserGoal), - Facts: input.Facts, - Todos: input.Todos, - Progress: input.Progress, - LastAssistantText: strings.TrimSpace(input.LastAssistantText), - CompletionPassed: input.CompletionPassed, - CompletionReason: strings.TrimSpace(input.CompletionBlockedReason), - NoProgressExceeded: noProgressExceeded, - HookAnnotations: append([]string(nil), input.HookAnnotations...), - HookGuards: append([]decider.HookGuardSignal(nil), input.HookGuards...), - }) - - output.MissingFacts = append([]decider.MissingFact(nil), decision.MissingFacts...) - output.RequiredNextActions = append([]decider.RequiredAction(nil), decision.RequiredNextActions...) - if decision.RequiredInput != nil { - cloned := *decision.RequiredInput - if len(cloned.Details) > 0 { - details := make(map[string]any, len(cloned.Details)) - for k, v := range cloned.Details { - details[k] = v - } - cloned.Details = details - } - output.RequiredInput = &cloned - } - output.IntentHint = decision.IntentHint - output.EffectiveTaskKind = decision.EffectiveTaskKind - output.UserVisibleSummary = strings.TrimSpace(decision.UserVisibleSummary) - output.InternalSummary = strings.TrimSpace(decision.InternalSummary) - output.ContinueHint = strings.TrimSpace(buildDeciderContinueHint(decision)) - output.StopReason = toControlplaneStopReason(decision.StopReason) - output.ErrorClass = "" - - if output.StopReason == "" { - output.StopReason = controlplane.StopReasonTodoNotConverged - } - if noProgressExceeded && decision.Status == decider.DecisionIncomplete { - output.StopReason = controlplane.StopReasonNoProgressAfterFinalIntercept - } - - // accepted 必须同时通过 completion 与 verification gate。 - if input.CompletionPassed && verificationGate.Passed && decision.Status == decider.DecisionAccepted { - output.Status = acceptance.AcceptanceAccepted - output.StopReason = controlplane.StopReasonAccepted - output.ContinueHint = "" - output.CompletionPassed = true - output.VerificationPassed = true - return output, nil - } - - // verification gate 全部通过时信任其结果:即使 decider 基于启发式返回 continue, - // verification gate 已实际运行所有 profile 指定的 verifier 且全部 pass,应直接 accepted。 - // 避免 decider 与 verification gate 数据源不一致导致死循环。 - if input.CompletionPassed && verificationGate.Passed { - output.Status = acceptance.AcceptanceAccepted - output.StopReason = controlplane.StopReasonAccepted - output.ContinueHint = "" - output.CompletionPassed = true - output.VerificationPassed = true - return output, nil - } - - if input.CompletionPassed && !verificationGate.Passed { - return mergeVerificationFailure(output, verificationGate), nil - } - - switch decision.Status { - case decider.DecisionAccepted: - // completion 不通过时即便 decider accepted,也必须继续。 - output.Status = acceptance.AcceptanceContinue - case decider.DecisionFailed, decider.DecisionBlocked: - output.Status = acceptance.AcceptanceFailed - if output.StopReason == "" { - output.StopReason = controlplane.StopReasonVerificationFailed - } - case decider.DecisionIncomplete: - output.Status = acceptance.AcceptanceIncomplete - if output.StopReason == "" { - output.StopReason = controlplane.StopReasonNoProgressAfterFinalIntercept - } - default: - output.Status = acceptance.AcceptanceContinue - } - if output.Status == acceptance.AcceptanceContinue && output.ContinueHint == "" { - output.ContinueHint = finalContinueReminder - } - // 死循环兜底:多轮 final 被拦截且无进展 + 存在 open required todo → 追加强制清理指令 - if output.Status == acceptance.AcceptanceContinue && input.NoProgressStreak >= 2 && input.Todos.Summary.RequiredOpen > 0 { - staleHint := buildStaleTodoResetHint(input.Todos.Summary.RequiredOpen, input.NoProgressStreak) - if output.ContinueHint == "" { - output.ContinueHint = staleHint - } else { - output.ContinueHint = output.ContinueHint + "\n\n" + staleHint - } - } - if input.VerificationInput.RuntimeState.MaxTurnsReached && output.Status == acceptance.AcceptanceContinue { - output.Status = acceptance.AcceptanceIncomplete - if output.StopReason == controlplane.StopReasonVerificationFailed { - output.StopReason = controlplane.StopReasonMaxTurnExceededWithFailedVerification - } else { - output.StopReason = controlplane.StopReasonMaxTurnExceededWithUnconvergedTodos - } - } - output.ErrorClass = normalizeAcceptanceErrorClass(output.ErrorClass, input, output) - return output, nil -} - -// runVerificationGate 执行 verifier gate;completion 未通过时仅回填必要证据,不执行重 verifier。 -func runVerificationGate(ctx context.Context, input acceptanceServiceInput) (verify.VerificationGateDecision, error) { - if !input.CompletionPassed { - results := make([]verify.VerificationResult, 0, 1) - if strings.EqualFold(strings.TrimSpace(input.CompletionBlockedReason), string(controlplane.CompletionBlockedReasonPendingTodo)) { - if synthetic := synthesizeTodoConvergenceEvidence(toSessionTodos(input.Todos)); synthetic != nil { - results = append(results, *synthetic) - } - } - return verify.VerificationGateDecision{ - Passed: false, - Reason: controlplane.StopReasonTodoNotConverged, - Results: results, - }, nil - } - if !input.VerificationProfile.Valid() { - return verify.VerificationGateDecision{ - Passed: false, - Reason: controlplane.StopReasonVerificationConfigMissing, - Results: []verify.VerificationResult{{ - Name: "verification_profile", - Status: verify.VerificationFail, - Summary: "verification profile invalid", - Reason: fmt.Sprintf("invalid verification profile %q", input.VerificationProfile), - ErrorClass: verify.ErrorClassEnvMissing, - }}, - }, nil - } - - policy := acceptance.DefaultPolicy{Executor: verify.PolicyCommandExecutor{}} - verifiers, err := policy.ResolveVerifiers(input.VerificationInput) - if err != nil { - return verify.VerificationGateDecision{ - Passed: false, - Reason: controlplane.StopReasonVerificationConfigMissing, - Results: []verify.VerificationResult{{ - Name: "verification_profile", - Status: verify.VerificationFail, - Summary: "verification profile resolution failed", - Reason: err.Error(), - ErrorClass: verify.ErrorClassEnvMissing, - }}, - }, nil - } - orch := verify.Orchestrator{Verifiers: verifiers} - return orch.RunFinalVerification(ctx, input.VerificationInput) -} - -// mergeVerificationFailure 统一把 verification gate 非通过映射到终态决策。 -func mergeVerificationFailure( - base acceptance.AcceptanceDecision, - gate verify.VerificationGateDecision, -) acceptance.AcceptanceDecision { - out := base - out.VerificationPassed = gate.Passed - out.VerifierResults = append([]verify.VerificationResult(nil), gate.Results...) - out.StopReason = gate.Reason - - first := firstNonPassVerifierResult(gate.Results) - if gate.Passed || first == nil { - return out - } - out.ErrorClass = first.ErrorClass - switch first.Status { - case verify.VerificationSoftBlock: - out.Status = acceptance.AcceptanceContinue - if out.StopReason == "" { - out.StopReason = controlplane.StopReasonTodoNotConverged - } - if out.ContinueHint == "" { - out.ContinueHint = finalContinueReminder - } - case verify.VerificationHardBlock: - out.Status = acceptance.AcceptanceIncomplete - if first.WaitingExternal { - out.StopReason = controlplane.StopReasonTodoWaitingExternal - } - default: - out.Status = acceptance.AcceptanceFailed - if out.StopReason == "" || out.StopReason == controlplane.StopReasonAccepted { - out.StopReason = controlplane.StopReasonVerificationFailed - } - if out.ErrorClass == "" { - out.ErrorClass = verify.ErrorClassUnknown - } - } - out.ErrorClass = normalizeAcceptanceErrorClass(out.ErrorClass, acceptanceServiceInput{}, out) - return out -} - -// normalizeAcceptanceErrorClass 统一补齐终态 error_class,避免 TUI/Gateway 出现 unknown/empty 推断歧义。 -func normalizeAcceptanceErrorClass( - current verify.ErrorClass, - input acceptanceServiceInput, - decision acceptance.AcceptanceDecision, -) verify.ErrorClass { - if current != "" { - return current - } - switch decision.StopReason { - case controlplane.StopReasonVerificationConfigMissing: - return verify.ErrorClassEnvMissing - case controlplane.StopReasonVerificationExecutionDenied: - return verify.ErrorClassPermissionDenied - case controlplane.StopReasonVerificationExecutionError: - return verify.ErrorClassUnknown - case controlplane.StopReasonRequiredTodoFailed: - return verify.ErrorClassUnknown - case controlplane.StopReasonNoProgressAfterFinalIntercept: - return verify.ErrorClassUnknown - case controlplane.StopReasonVerificationFailed: - if input.TaskKind == decider.TaskKindSubAgent && len(input.Facts.SubAgents.Failed) > 0 { - return verify.ErrorClass("subagent_failed") - } - if input.TaskKind == decider.TaskKindWorkspaceWrite { - if errClass := latestToolErrorClass(input.Facts.Errors.ToolErrors, "filesystem_write_file"); errClass != "" { - return verify.ErrorClass(errClass) - } - } - if errClass := latestToolErrorClass(input.Facts.Errors.ToolErrors, "spawn_subagent"); errClass != "" { - return verify.ErrorClass(errClass) - } - if errClass := latestToolErrorClass(input.Facts.Errors.ToolErrors, "filesystem_write_file"); errClass != "" { - return verify.ErrorClass(errClass) - } - } - if decision.Status == acceptance.AcceptanceFailed || decision.Status == acceptance.AcceptanceIncomplete { - return verify.ErrorClassUnknown - } - return "" -} - -// latestToolErrorClass 返回目标工具最近一次非空错误分类。 -func latestToolErrorClass(errors []runtimefacts.ToolErrorFact, tool string) string { - target := strings.TrimSpace(tool) - for i := len(errors) - 1; i >= 0; i-- { - entry := errors[i] - if target != "" && !strings.EqualFold(strings.TrimSpace(entry.Tool), target) { - continue - } - errClass := strings.TrimSpace(entry.ErrorClass) - if errClass != "" { - return errClass - } - } - return "" -} - -// buildStaleTodoResetHint 构造死循环兜底指令:当多轮 final 被拦截且无进展时,强制要求模型清理 stale todo。 -func buildStaleTodoResetHint(requiredOpen, noProgressStreak int) string { - var b strings.Builder - b.WriteString("\n") - b.WriteString(fmt.Sprintf("CRITICAL: You have been blocked for %d consecutive final attempts with %d unfinished required todo(s).\n", noProgressStreak, requiredOpen)) - b.WriteString("If these todos are NO LONGER RELEVANT to the user's CURRENT request,\n") - b.WriteString("you MUST mark them canceled using todo_write set_status=canceled RIGHT NOW.\n") - b.WriteString("Do NOT attempt to complete stale todos that belong to a PREVIOUS task.\n") - b.WriteString("After canceling irrelevant todos, proceed with the user's current request.\n") - b.WriteString("") - return b.String() -} - -func firstNonPassVerifierResult(results []verify.VerificationResult) *verify.VerificationResult { - for _, result := range results { - if result.Status == verify.VerificationPass { - continue - } - cloned := result - return &cloned - } - return nil -} - -func toSessionTodos(snapshot decider.TodoSnapshot) []agentsession.TodoItem { - if len(snapshot.Items) == 0 { - return nil - } - out := make([]agentsession.TodoItem, 0, len(snapshot.Items)) - for _, item := range snapshot.Items { - required := item.Required - status := agentsession.TodoStatus(strings.TrimSpace(item.Status)) - out = append(out, agentsession.TodoItem{ - ID: strings.TrimSpace(item.ID), - Content: strings.TrimSpace(item.Content), - Status: status, - Required: &required, - Artifacts: append([]string(nil), item.Artifacts...), - FailureReason: strings.TrimSpace(item.FailureReason), - }) - } - return out -} diff --git a/internal/runtime/acceptance_service_test.go b/internal/runtime/acceptance_service_test.go deleted file mode 100644 index c78cb6e4..00000000 --- a/internal/runtime/acceptance_service_test.go +++ /dev/null @@ -1,513 +0,0 @@ -package runtime - -import ( - "context" - "strings" - "testing" - "time" - - "neo-code/internal/config" - providertypes "neo-code/internal/provider/types" - "neo-code/internal/runtime/acceptance" - "neo-code/internal/runtime/controlplane" - "neo-code/internal/runtime/decider" - runtimefacts "neo-code/internal/runtime/facts" - runtimehooks "neo-code/internal/runtime/hooks" - "neo-code/internal/runtime/verify" - agentsession "neo-code/internal/session" -) - -func TestBeforeCompletionDecisionAcceptanceHooksOnOffParity(t *testing.T) { - t.Parallel() - - snapshot := TurnBudgetSnapshot{Config: config.StaticDefaults().Clone(), Workdir: t.TempDir()} - assistant := providertypes.Message{ - Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}, - } - - offService := &Service{events: make(chan RuntimeEvent, 16)} - offState := newRunState("run-hooks-off", agentsession.New("hooks-off")) - offState.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - offDecision, err := offService.runBeforeCompletionDecisionAcceptance( - context.Background(), - &offState, - snapshot, - assistant, - snapshot.Workdir, - true, - false, - providertypes.RoleAssistant, - ) - if err != nil { - t.Fatalf("hooks-off decision error = %v", err) - } - - onService := &Service{events: make(chan RuntimeEvent, 16)} - baseRegistry := runtimehooks.NewRegistry() - userRegistry := runtimehooks.NewRegistry() - repoRegistry := runtimehooks.NewRegistry() - if err := userRegistry.Register(runtimehooks.HookSpec{ - ID: "user-note", - Point: runtimehooks.HookPointBeforeCompletionDecision, - Scope: runtimehooks.HookScopeUser, - Source: runtimehooks.HookSourceUser, - Handler: func(_ context.Context, _ runtimehooks.HookContext) runtimehooks.HookResult { - return runtimehooks.HookResult{Status: runtimehooks.HookResultPass, Message: "note"} - }, - }); err != nil { - t.Fatalf("register user hook: %v", err) - } - onService.SetHookExecutor(composeRuntimeHookExecutors( - runtimehooks.NewExecutor(baseRegistry, nil, time.Second), - runtimehooks.NewExecutor(userRegistry, nil, time.Second), - runtimehooks.NewExecutor(repoRegistry, nil, time.Second), - )) - onState := newRunState("run-hooks-on", agentsession.New("hooks-on")) - onState.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - onDecision, err := onService.runBeforeCompletionDecisionAcceptance( - context.Background(), - &onState, - snapshot, - assistant, - snapshot.Workdir, - true, - false, - providertypes.RoleAssistant, - ) - if err != nil { - t.Fatalf("hooks-on decision error = %v", err) - } - - if offDecision.Status != onDecision.Status || offDecision.StopReason != onDecision.StopReason { - t.Fatalf("hooks parity mismatch: off=%+v on=%+v", offDecision, onDecision) - } - - offContinue, err := offService.runBeforeCompletionDecisionAcceptance( - context.Background(), - &offState, - snapshot, - assistant, - snapshot.Workdir, - false, - false, - providertypes.RoleAssistant, - ) - if err != nil { - t.Fatalf("hooks-off continue decision error = %v", err) - } - onContinue, err := onService.runBeforeCompletionDecisionAcceptance( - context.Background(), - &onState, - snapshot, - assistant, - snapshot.Workdir, - false, - false, - providertypes.RoleAssistant, - ) - if err != nil { - t.Fatalf("hooks-on continue decision error = %v", err) - } - if offContinue.Status != onContinue.Status || offContinue.StopReason != onContinue.StopReason { - t.Fatalf("hooks continue parity mismatch: off=%+v on=%+v", offContinue, onContinue) - } -} - -func TestAcceptanceDecisionRequiresCompletionAndVerification(t *testing.T) { - t.Parallel() - - service := &Service{events: make(chan RuntimeEvent, 16)} - snapshot := TurnBudgetSnapshot{Config: config.StaticDefaults().Clone(), Workdir: t.TempDir()} - assistant := providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}} - - t.Run("completion_pass_but_verification_fail_not_accepted", func(t *testing.T) { - state := newRunState("run-verify-fail", agentsession.New("verify-fail")) - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileCreateFile - state.session.TaskState.KeyArtifacts = []string{"missing.txt"} - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, assistant, true, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal error = %v", err) - } - if decision.Status == acceptance.AcceptanceAccepted { - t.Fatalf("unexpected accepted decision: %+v", decision) - } - if !decision.CompletionPassed || decision.VerificationPassed { - t.Fatalf("expected completion=true verification=false, got %+v", decision) - } - if len(decision.VerifierResults) == 0 { - t.Fatalf("expected verification trace in decision") - } - }) - - t.Run("completion_fail_not_accepted_even_if_task_only", func(t *testing.T) { - state := newRunState("run-completion-fail", agentsession.New("completion-fail")) - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, assistant, false, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal error = %v", err) - } - if decision.Status == acceptance.AcceptanceAccepted { - t.Fatalf("unexpected accepted decision: %+v", decision) - } - if decision.CompletionPassed { - t.Fatalf("expected completion=false, got %+v", decision) - } - }) - - t.Run("accepted_requires_both_true", func(t *testing.T) { - state := newRunState("run-accepted", agentsession.New("accepted")) - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, assistant, true, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal error = %v", err) - } - if decision.Status != acceptance.AcceptanceAccepted { - t.Fatalf("status=%q want accepted", decision.Status) - } - if !decision.CompletionPassed || !decision.VerificationPassed { - t.Fatalf("accepted must satisfy completion+verification, got %+v", decision) - } - }) - - t.Run("completed_required_todo_list_does_not_block_acceptance", func(t *testing.T) { - state := newRunState("run-completed-todo", agentsession.New("completed-todo")) - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - required := true - state.session.Todos = []agentsession.TodoItem{{ - ID: "todo-done", - Content: "done", - Status: agentsession.TodoStatusCompleted, - Required: &required, - }} - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, assistant, true, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal error = %v", err) - } - if decision.Status != acceptance.AcceptanceAccepted { - t.Fatalf("status=%q want accepted, decision=%+v", decision.Status, decision) - } - if !decision.CompletionPassed || !decision.VerificationPassed { - t.Fatalf("completed todo list should pass completion+verification, got %+v", decision) - } - }) -} - -func TestBeforeCompletionDecisionUserRepoCannotDirectlyTerminal(t *testing.T) { - t.Parallel() - - service := &Service{events: make(chan RuntimeEvent, 16)} - baseRegistry := runtimehooks.NewRegistry() - userRegistry := runtimehooks.NewRegistry() - if err := userRegistry.Register(runtimehooks.HookSpec{ - ID: "user-guard", - Point: runtimehooks.HookPointBeforeCompletionDecision, - Scope: runtimehooks.HookScopeUser, - Source: runtimehooks.HookSourceUser, - Handler: func(_ context.Context, _ runtimehooks.HookContext) runtimehooks.HookResult { - return runtimehooks.HookResult{Status: runtimehooks.HookResultFailed, Message: "guard"} - }, - }); err != nil { - t.Fatalf("register user guard hook: %v", err) - } - service.SetHookExecutor(composeRuntimeHookExecutors( - runtimehooks.NewExecutor(baseRegistry, nil, time.Second), - runtimehooks.NewExecutor(userRegistry, nil, time.Second), - nil, - )) - - state := newRunState("run-user-guard", agentsession.New("user-guard")) - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - snapshot := TurnBudgetSnapshot{Config: config.StaticDefaults().Clone(), Workdir: t.TempDir()} - decision, err := service.runBeforeCompletionDecisionAcceptance( - context.Background(), - &state, - snapshot, - providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}}, - snapshot.Workdir, - true, - false, - providertypes.RoleAssistant, - ) - if err != nil { - t.Fatalf("runBeforeCompletionDecisionAcceptance error = %v", err) - } - if decision.Status != acceptance.AcceptanceAccepted { - t.Fatalf("user guard should not directly terminal-block acceptance path, got %+v", decision) - } - if !strings.Contains(decision.InternalSummary, "hook signals consumed") { - t.Fatalf("expected hook signal to be consumed by acceptance input, got %q", decision.InternalSummary) - } -} - -func TestVerificationTraceEmitsStageEvents(t *testing.T) { - t.Parallel() - - service := &Service{events: make(chan RuntimeEvent, 32)} - state := newRunState("run-verify-stage-events", agentsession.New("verify-stage-events")) - decision := acceptance.AcceptanceDecision{ - Status: acceptance.AcceptanceContinue, - StopReason: controlplane.StopReasonVerificationFailed, - ErrorClass: "content_mismatch", - VerifierResults: []verify.VerificationResult{ - { - Name: "content_match", - Status: verify.VerificationSoftBlock, - Summary: "missing expected token", - Reason: "content mismatch", - ErrorClass: "content_mismatch", - }, - }, - } - service.emitAcceptanceDecisionEvents(&state, decision) - events := collectRuntimeEvents(service.Events()) - stageCount := 0 - for _, evt := range events { - if evt.Type == EventVerificationStageFinished { - stageCount++ - } - } - if stageCount == 0 { - t.Fatal("expected verification_stage_finished events from acceptance decision trace") - } -} - -func TestVerificationFailureProducesStopReasonAndErrorClass(t *testing.T) { - t.Parallel() - - service := &Service{events: make(chan RuntimeEvent, 16)} - state := newRunState("run-invalid-profile", agentsession.New("invalid-profile")) - state.session.TaskState.VerificationProfile = "bad_profile" - snapshot := TurnBudgetSnapshot{Config: config.StaticDefaults().Clone(), Workdir: t.TempDir()} - decision, err := service.beforeAcceptFinal( - context.Background(), - &state, - snapshot, - providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}}, - true, - beforeCompletionHookSignals{}, - ) - if err != nil { - t.Fatalf("beforeAcceptFinal error = %v", err) - } - if decision.Status != acceptance.AcceptanceFailed { - t.Fatalf("status=%q want failed", decision.Status) - } - if decision.StopReason != controlplane.StopReasonVerificationConfigMissing { - t.Fatalf("stop reason=%q want verification_config_missing", decision.StopReason) - } - if decision.ErrorClass == "" { - t.Fatalf("verification failure must keep non-empty error_class: %+v", decision) - } -} - -func TestChatAnswerAcceptancePassesWithoutHeavyVerification(t *testing.T) { - t.Parallel() - - service := &Service{events: make(chan RuntimeEvent, 16)} - state := newRunState("run-chat-answer", agentsession.New("chat-answer")) - state.taskKind = decider.TaskKindChatAnswer - state.userGoal = "你好" - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - - snapshot := TurnBudgetSnapshot{Config: config.StaticDefaults().Clone(), Workdir: t.TempDir()} - decision, err := service.beforeAcceptFinal( - context.Background(), - &state, - snapshot, - providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("你好")}}, - true, - beforeCompletionHookSignals{}, - ) - if err != nil { - t.Fatalf("beforeAcceptFinal error = %v", err) - } - if !decision.CompletionPassed || !decision.VerificationPassed { - t.Fatalf("chat answer should pass completion+verification gate, got %+v", decision) - } - if decision.Status != acceptance.AcceptanceAccepted { - t.Fatalf("status=%q want accepted", decision.Status) - } - if decision.StopReason != controlplane.StopReasonAccepted { - t.Fatalf("stop reason=%q want accepted", decision.StopReason) - } -} - -func TestNormalizeAcceptanceErrorClassCoverage(t *testing.T) { - t.Parallel() - - testInput := acceptanceServiceInput{ - TaskKind: decider.TaskKindWorkspaceWrite, - Facts: runtimefacts.RuntimeFacts{ - Errors: runtimefacts.ErrorFacts{ - ToolErrors: []runtimefacts.ToolErrorFact{{ - Tool: "filesystem_write_file", - ErrorClass: "permission_denied", - }}, - }, - }, - } - cases := []struct { - name string - input acceptanceServiceInput - decision acceptance.AcceptanceDecision - want verify.ErrorClass - }{ - { - name: "verification_config_missing", - input: acceptanceServiceInput{}, - decision: acceptance.AcceptanceDecision{StopReason: controlplane.StopReasonVerificationConfigMissing, Status: acceptance.AcceptanceFailed}, - want: verify.ErrorClassEnvMissing, - }, - { - name: "verification_execution_denied", - input: acceptanceServiceInput{}, - decision: acceptance.AcceptanceDecision{StopReason: controlplane.StopReasonVerificationExecutionDenied, Status: acceptance.AcceptanceFailed}, - want: verify.ErrorClassPermissionDenied, - }, - { - name: "verification_execution_error", - input: acceptanceServiceInput{}, - decision: acceptance.AcceptanceDecision{StopReason: controlplane.StopReasonVerificationExecutionError, Status: acceptance.AcceptanceFailed}, - want: verify.ErrorClassUnknown, - }, - { - name: "required_todo_failed", - input: acceptanceServiceInput{}, - decision: acceptance.AcceptanceDecision{StopReason: controlplane.StopReasonRequiredTodoFailed, Status: acceptance.AcceptanceFailed}, - want: verify.ErrorClassUnknown, - }, - { - name: "no_progress_after_final_intercept", - input: acceptanceServiceInput{}, - decision: acceptance.AcceptanceDecision{StopReason: controlplane.StopReasonNoProgressAfterFinalIntercept, Status: acceptance.AcceptanceIncomplete}, - want: verify.ErrorClassUnknown, - }, - { - name: "subagent failed", - input: acceptanceServiceInput{ - TaskKind: decider.TaskKindSubAgent, - Facts: runtimefacts.RuntimeFacts{ - SubAgents: runtimefacts.SubAgentFacts{ - Failed: []runtimefacts.SubAgentFact{{TaskID: "sa-1"}}, - }, - }, - }, - decision: acceptance.AcceptanceDecision{StopReason: controlplane.StopReasonVerificationFailed, Status: acceptance.AcceptanceFailed}, - want: verify.ErrorClass("subagent_failed"), - }, - { - name: "workspace_write_hard_failure", - input: testInput, - decision: acceptance.AcceptanceDecision{StopReason: controlplane.StopReasonVerificationFailed, Status: acceptance.AcceptanceFailed}, - want: verify.ErrorClass("permission_denied"), - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - got := normalizeAcceptanceErrorClass("", tc.input, tc.decision) - if got != tc.want { - t.Fatalf("normalizeAcceptanceErrorClass() = %q, want %q", got, tc.want) - } - }) - } -} - -func TestNoProgressThresholdProducesIncomplete(t *testing.T) { - t.Parallel() - - service := &Service{events: make(chan RuntimeEvent, 16)} - state := newRunState("run-no-progress", agentsession.New("no-progress")) - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - state.finalInterceptStreak = config.DefaultMaxNoProgressStreak - state.mustUseToolAfterFinalContinue = true - state.noToolAfterFinalContinueStreak = config.DefaultMaxNoProgressStreak - - snapshot := TurnBudgetSnapshot{Config: config.StaticDefaults().Clone(), Workdir: t.TempDir()} - decision, err := service.beforeAcceptFinal( - context.Background(), - &state, - snapshot, - providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("已完成")}}, - false, - beforeCompletionHookSignals{}, - ) - if err != nil { - t.Fatalf("beforeAcceptFinal error = %v", err) - } - if decision.Status != acceptance.AcceptanceIncomplete { - t.Fatalf("status=%q want incomplete", decision.Status) - } - if decision.StopReason != controlplane.StopReasonNoProgressAfterFinalIntercept { - t.Fatalf("stop_reason=%q want no_progress_after_final_intercept", decision.StopReason) - } -} - -func TestRunVerificationGateSkipsProfileValidationWhenCompletionBlocked(t *testing.T) { - t.Parallel() - - gate, err := runVerificationGate(context.Background(), acceptanceServiceInput{ - CompletionPassed: false, - CompletionBlockedReason: string(controlplane.CompletionBlockedReasonPendingTodo), - VerificationProfile: "invalid_profile", - Todos: decider.TodoSnapshot{ - Items: []decider.TodoViewItem{{ - ID: "todo-1", - Content: "x", - Status: "pending", - Required: true, - }}, - Summary: decider.TodoSummary{ - RequiredTotal: 1, - RequiredOpen: 1, - }, - }, - }) - if err != nil { - t.Fatalf("runVerificationGate error = %v", err) - } - if gate.Reason != controlplane.StopReasonTodoNotConverged { - t.Fatalf("reason=%q want todo_not_converged", gate.Reason) - } - if len(gate.Results) == 0 { - t.Fatal("expected synthetic todo convergence evidence when completion is blocked") - } -} - -func TestBeforeAcceptFinalMarksIncompleteWhenFinalInterceptHitsMaxTurns(t *testing.T) { - t.Parallel() - - service := &Service{events: make(chan RuntimeEvent, 16)} - cfg := config.StaticDefaults().Clone() - cfg.Runtime.MaxTurns = 1 - snapshot := TurnBudgetSnapshot{Config: cfg, Workdir: t.TempDir()} - - state := newRunState("run-max-turn-final-intercept", agentsession.New("max-turn-final-intercept")) - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - required := true - state.session.Todos = []agentsession.TodoItem{{ - ID: "todo-1", - Content: "pending", - Status: agentsession.TodoStatusPending, - Required: &required, - }} - - decision, err := service.beforeAcceptFinal( - context.Background(), - &state, - snapshot, - providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}}, - true, - beforeCompletionHookSignals{}, - ) - if err != nil { - t.Fatalf("beforeAcceptFinal error = %v", err) - } - if decision.Status != acceptance.AcceptanceIncomplete { - t.Fatalf("status=%q want incomplete", decision.Status) - } - if decision.StopReason != controlplane.StopReasonMaxTurnExceededWithUnconvergedTodos { - t.Fatalf("stop_reason=%q want max_turn_exceeded_with_unconverged_todos", decision.StopReason) - } -} diff --git a/internal/runtime/acceptgate/checks.go b/internal/runtime/acceptgate/checks.go index 60031537..cdec8f51 100644 --- a/internal/runtime/acceptgate/checks.go +++ b/internal/runtime/acceptgate/checks.go @@ -79,7 +79,7 @@ func checkWorkspaceChange(input Input, check agentsession.AcceptCheck) CheckResu } for _, item := range input.Facts.Files.Exists { switch strings.TrimSpace(item.Source) { - case "filesystem_write_file", "filesystem_write_file_noop", "filesystem_edit", "bash", "workspace_write": + case "filesystem_write_file", "filesystem_edit", "bash", "workspace_write": return pass(check) } } diff --git a/internal/runtime/acceptgate/gate.go b/internal/runtime/acceptgate/gate.go index 1cb3870f..b35d41ab 100644 --- a/internal/runtime/acceptgate/gate.go +++ b/internal/runtime/acceptgate/gate.go @@ -65,15 +65,21 @@ func Evaluate(ctx context.Context, input Input) Report { checks := input.PlanVerify.Normalize() if len(checks) == 0 { - checks = agentsession.AcceptChecks{{Kind: agentsession.AcceptCheckOutputOnly, Required: true}} + checks = agentsession.AcceptChecks{{Kind: agentsession.AcceptCheckOutputOnly}} } for _, check := range checks { - report.add(evaluateAcceptCheck(input, check)) + result := evaluateAcceptCheck(input, check) + if !check.RequiredValue() { + report.addOptional(result) + continue + } + report.add(result) } report.finalize() return report } +// add 记录必需验收项结果,并在失败时更新终态原因。 func (r *Report) add(result CheckResult) { if strings.TrimSpace(result.Name) == "" { return @@ -97,6 +103,15 @@ func (r *Report) add(result CheckResult) { } } +// addOptional 保留可选验收项结果,但不让可选失败改变终态。 +func (r *Report) addOptional(result CheckResult) { + if strings.TrimSpace(result.Name) == "" { + return + } + r.Results = append(r.Results, result) +} + +// finalize 汇总逐项失败原因,形成对上层展示稳定的终态摘要。 func (r *Report) finalize() { if r.Outcome == OutcomeAccepted { r.StopReason = controlplane.StopReasonAccepted diff --git a/internal/runtime/acceptgate/gate_test.go b/internal/runtime/acceptgate/gate_test.go index d22b617b..15aefae0 100644 --- a/internal/runtime/acceptgate/gate_test.go +++ b/internal/runtime/acceptgate/gate_test.go @@ -2,6 +2,7 @@ package acceptgate import ( "context" + "encoding/json" "testing" "neo-code/internal/runtime/controlplane" @@ -69,6 +70,11 @@ func TestEvaluateWorkspaceChangeUsesRuntimeFactsOnly(t *testing.T) { if report := Evaluate(context.Background(), input); report.Outcome != OutcomeFailed { t.Fatalf("read-only fact report = %+v, want failed", report) } + + input.Facts.Files.Exists = []runtimefacts.FileExistFact{{Path: "internal/foo.go", Source: "filesystem_write_file_noop"}} + if report := Evaluate(context.Background(), input); report.Outcome != OutcomeFailed { + t.Fatalf("noop write fact report = %+v, want failed", report) + } } func TestEvaluateFileAndContentFacts(t *testing.T) { @@ -126,6 +132,27 @@ func TestEvaluateToolFactAndUnknownKind(t *testing.T) { } } +func TestEvaluateOptionalUnknownKindDoesNotFail(t *testing.T) { + t.Parallel() + + optional := false + var checks agentsession.AcceptChecks + if err := json.Unmarshal([]byte(`[{"kind":"output_only"},{"kind":"future_check","required":false}]`), &checks); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + checks = append(checks, agentsession.AcceptCheck{Kind: "go_literal_optional", Required: &optional}) + report := Evaluate(context.Background(), Input{ + PlanVerify: checks, + LastAssistantText: "done", + }) + if report.Outcome != OutcomeAccepted { + t.Fatalf("report = %+v, want accepted", report) + } + if len(report.Results) != 5 { + t.Fatalf("results len = %d, want 5", len(report.Results)) + } +} + func TestEvaluateTodoPriority(t *testing.T) { t.Parallel() diff --git a/internal/runtime/acceptgate_runtime.go b/internal/runtime/acceptgate_runtime.go index 4ba00269..70b2a8f9 100644 --- a/internal/runtime/acceptgate_runtime.go +++ b/internal/runtime/acceptgate_runtime.go @@ -4,14 +4,22 @@ import ( "context" "strings" + "neo-code/internal/promptasset" providertypes "neo-code/internal/provider/types" - "neo-code/internal/runtime/acceptance" "neo-code/internal/runtime/acceptgate" runtimefacts "neo-code/internal/runtime/facts" agentsession "neo-code/internal/session" ) -const completionProtocolReminder = "[Runtime Control]\n你当前没有调用工具,也没有输出 task_completion。若任务已完成,请按结构化完成信号结束;否则继续调用工具推进。" +const missingCompletionSignalLimit = 6 + +// completionProtocolReminderForStreak 根据连续缺失完成信号的次数返回对应协议提示。 +func completionProtocolReminderForStreak(streak int) string { + if streak >= missingCompletionSignalLimit-1 { + return promptasset.CompletionProtocolFinalReminder() + } + return promptasset.CompletionProtocolReminder() +} // evaluateAcceptGate 从运行态提取事实快照,并执行最终 Accept Gate。 func (s *Service) evaluateAcceptGate(ctx context.Context, state *runState, assistantMessage providertypes.Message) acceptgate.Report { @@ -75,9 +83,9 @@ func selectPlanOwnedTodos(plan *agentsession.PlanArtifact, todos []agentsession. // emitAcceptGateReport 将 Accept Gate 报告发布为统一 acceptance_decided 事件。 func (s *Service) emitAcceptGateReport(state *runState, report acceptgate.Report) { - status := acceptance.AcceptanceFailed + status := string(acceptgate.OutcomeFailed) if report.Outcome == acceptgate.OutcomeAccepted { - status = acceptance.AcceptanceAccepted + status = string(acceptgate.OutcomeAccepted) } s.emitRunScopedOptional(EventAcceptanceDecided, state, AcceptanceDecidedPayload{ Status: status, diff --git a/internal/runtime/before_completion_orchestrator.go b/internal/runtime/before_completion_orchestrator.go deleted file mode 100644 index 77974edf..00000000 --- a/internal/runtime/before_completion_orchestrator.go +++ /dev/null @@ -1,160 +0,0 @@ -package runtime - -import ( - "context" - "strings" - - providertypes "neo-code/internal/provider/types" - "neo-code/internal/runtime/acceptance" - "neo-code/internal/runtime/decider" - runtimehooks "neo-code/internal/runtime/hooks" -) - -// beforeCompletionHookSignals 收敛 before_completion_decision 阶段 user/repo hook 的可消费信号。 -type beforeCompletionHookSignals struct { - Annotations []string - Guards []decider.HookGuardSignal -} - -// runBeforeCompletionDecisionAcceptance 执行 before_completion_decision 专用编排: -// 1) 先执行 user/repo hooks 收集 annotation/guard signal; -// 2) 再执行普通 internal hooks 用于观测; -// 3) 最后由 runtime 内部 AcceptanceService 作为 before_completion_decision 的收口裁决阶段,生成唯一 AcceptanceDecision。 -// AcceptanceDecision 走强类型 runtime 内部路径,不通过通用 HookResult metadata 承载。 -func (s *Service) runBeforeCompletionDecisionAcceptance( - ctx context.Context, - state *runState, - snapshot TurnBudgetSnapshot, - assistant providertypes.Message, - workdir string, - completionPassed bool, - hasToolCalls bool, - assistantRole string, -) (acceptance.AcceptanceDecision, error) { - if s == nil { - return acceptance.AcceptanceDecision{}, nil - } - - point := runtimehooks.HookPointBeforeCompletionDecision - hookInput := s.buildRunHookContext( - state, - runtimehooks.HookContext{ - Metadata: map[string]any{ - "completion_passed": completionPassed, - "has_tool_calls": hasToolCalls, - "assistant_role": strings.TrimSpace(assistantRole), - "workdir": strings.TrimSpace(workdir), - }, - }, - ) - scopedCtx := withRuntimeHookEnvelope(ctx, hookRuntimeEnvelope{ - RunID: firstNonBlank(hookRunIDFromState(state), hookInput.RunID), - SessionID: firstNonBlank(hookSessionIDFromState(state), hookInput.SessionID), - Turn: hookTurnFromState(state), - Phase: hookPhaseFromState(state), - }) - - signals := beforeCompletionHookSignals{} - if s.hookExecutor != nil { - baseExecutor, userExecutor, repoExecutor := splitHookExecutors(s.hookExecutor) - - for _, item := range []struct { - executor HookExecutor - source runtimehooks.HookSource - }{ - {executor: userExecutor, source: runtimehooks.HookSourceUser}, - {executor: repoExecutor, source: runtimehooks.HookSourceRepo}, - } { - if item.executor == nil { - continue - } - output := item.executor.Run(scopedCtx, point, hookInput.Clone()) - annotations, guards := collectBeforeCompletionSignals(output, item.source) - signals.Annotations = append(signals.Annotations, annotations...) - signals.Guards = append(signals.Guards, guards...) - s.recordUserHookAnnotations(state, output) - } - - // internal hooks 在该点位最后执行;其结果仅用于观测,不参与 user/repo signal 收集。 - if baseExecutor != nil { - output := baseExecutor.Run(scopedCtx, point, hookInput.Clone()) - s.recordUserHookAnnotations(state, output) - } - } - s.emitRunScopedOptional(EventVerificationStarted, state, VerificationStartedPayload{ - CompletionPassed: completionPassed, - CompletionBlockedReason: strings.TrimSpace(string(state.completion.CompletionBlockedReason)), - }) - // 收口裁决阶段:消费 completion/facts/todo/verification/user-repo signals,生成唯一终态裁决。 - return s.beforeAcceptFinal(ctx, state, snapshot, assistant, completionPassed, signals) -} - -// buildRunHookContext 构造带 run/session 元数据的 hook 输入。 -func (s *Service) buildRunHookContext(state *runState, input runtimehooks.HookContext) runtimehooks.HookContext { - runID := firstNonBlank(hookRunIDFromState(state), input.RunID) - sessionID := firstNonBlank(hookSessionIDFromState(state), input.SessionID) - input.RunID = firstNonBlank(input.RunID, runID) - input.SessionID = firstNonBlank(input.SessionID, sessionID) - if input.Metadata == nil { - input.Metadata = make(map[string]any, 8) - } - input.Metadata["run_id"] = input.RunID - input.Metadata["session_id"] = input.SessionID - if state != nil { - input.Metadata["runtime_run_token"] = state.runToken - if _, exists := input.Metadata["phase"]; !exists { - input.Metadata["phase"] = hookPhaseFromState(state) - } - input.Metadata["turn"] = hookTurnFromState(state) - } - return input -} - -// splitHookExecutors 拆解 composeRuntimeHookExecutors 形成的链,恢复 internal/user/repo 三段执行器。 -func splitHookExecutors(executor HookExecutor) (base HookExecutor, user HookExecutor, repo HookExecutor) { - switch typed := executor.(type) { - case *repoComposedHookExecutor: - subBase, subUser, _ := splitHookExecutors(typed.base) - return subBase, subUser, typed.repo - case *userComposedHookExecutor: - subBase, _, subRepo := splitHookExecutors(typed.base) - return subBase, typed.user, subRepo - default: - return executor, nil, nil - } -} - -// collectBeforeCompletionSignals 从 user/repo hook 结果提取 annotation 与 guard 信号。 -func collectBeforeCompletionSignals( - output runtimehooks.RunOutput, - defaultSource runtimehooks.HookSource, -) ([]string, []decider.HookGuardSignal) { - if len(output.Results) == 0 { - return nil, nil - } - annotations := make([]string, 0, len(output.Results)) - guards := make([]decider.HookGuardSignal, 0, len(output.Results)) - for _, result := range output.Results { - source := strings.TrimSpace(string(result.Source)) - if source == "" { - source = strings.TrimSpace(string(defaultSource)) - } - message := strings.TrimSpace(result.Message) - errText := strings.TrimSpace(result.Error) - if message != "" { - annotations = append(annotations, message) - } - - isGuard := result.Status == runtimehooks.HookResultFailed || result.Metadata.GuardSignal - if !isGuard { - continue - } - guard := decider.HookGuardSignal{ - HookID: strings.TrimSpace(result.HookID), - Source: source, - Message: firstNonBlank(message, errText), - } - guards = append(guards, guard) - } - return annotations, guards -} diff --git a/internal/runtime/controlplane/progress.go b/internal/runtime/controlplane/progress.go index 0e9c4cdb..a2827fa4 100644 --- a/internal/runtime/controlplane/progress.go +++ b/internal/runtime/controlplane/progress.go @@ -203,9 +203,9 @@ func isExplorationProgress(runState RunState, flags evidenceFlags) bool { func explorationWindowForPhase(runState RunState) int { switch runState { case RunStatePlan: - return 4 + return 10 case RunStateExecute: - return 2 + return 6 default: return 0 } diff --git a/internal/runtime/controlplane/progress_test.go b/internal/runtime/controlplane/progress_test.go index fe450eda..22dbdda9 100644 --- a/internal/runtime/controlplane/progress_test.go +++ b/internal/runtime/controlplane/progress_test.go @@ -68,7 +68,7 @@ func TestEvaluateProgressExplorationExhaustionStartsNoProgress(t *testing.T) { state := ProgressState{ LastScore: ProgressScore{ - ExplorationStreak: 4, + ExplorationStreak: 11, NoProgressStreak: 1, }, } diff --git a/internal/runtime/decider/decide.go b/internal/runtime/decider/decide.go deleted file mode 100644 index 38577bdb..00000000 --- a/internal/runtime/decider/decide.go +++ /dev/null @@ -1,723 +0,0 @@ -package decider - -import ( - "fmt" - "path/filepath" - "regexp" - "strings" - - "neo-code/internal/runtime/facts" -) - -// TODO(runtime-control-plane): FinalDecider 当前同时承担了意图推断、有效任务类型推导、事实解释、 -// 终态裁决与下一步动作合成。待验收循环稳定后拆分为 IntentInferer、EffectiveTaskKindDeriver、 -// AcceptanceProfile 与 NextActionPlanner,降低单模块职责耦合。 -// -// Decide 执行最终终态裁决,作为 runtime 的唯一决策入口。 -func Decide(input DecisionInput) Decision { - intent := InferTaskIntent(input.UserGoal) - hint := input.TaskKind - if strings.TrimSpace(string(hint)) == "" { - hint = intent.Hint - } - effectiveTaskKind := DeriveEffectiveTaskKind(hint, input.Facts, input.Todos) - - var decision Decision - if input.Todos.Summary.RequiredFailed > 0 { - decision = Decision{ - Status: DecisionFailed, - StopReason: "required_todo_failed", - UserVisibleSummary: "存在 required todo 失败,任务已终止。", - InternalSummary: "required todo entered failed terminal state", - } - return finalizeDecision(decision, hint, effectiveTaskKind, input) - } - if input.NoProgressExceeded { - decision = Decision{ - Status: DecisionIncomplete, - StopReason: "no_progress_after_final_intercept", - UserVisibleSummary: "连续多轮缺少新事实,任务以未完成结束。", - InternalSummary: "no progress exceeded while final intercepted", - } - return finalizeDecision(decision, hint, effectiveTaskKind, input) - } - if !input.CompletionPassed { - decision = continueWithCompletionReason(input) - return finalizeDecision(decision, hint, effectiveTaskKind, input) - } - - switch effectiveTaskKind { - case TaskKindTodoState: - decision = decideTodoState(input) - case TaskKindWorkspaceWrite: - decision = decideWorkspaceWrite(input) - case TaskKindSubAgent: - decision = decideSubAgent(input) - case TaskKindReadOnly: - decision = decideReadOnly(input) - case TaskKindMixed: - decision = decideMixed(input) - case TaskKindChatAnswer: - fallthrough - default: - decision = Decision{ - Status: DecisionAccepted, - StopReason: "accepted", - UserVisibleSummary: "任务完成。", - InternalSummary: "chat answer accepted by completion gate", - } - } - return finalizeDecision(decision, hint, effectiveTaskKind, input) -} - -// continueWithCompletionReason 把 completion gate 阻塞转成可执行缺失事实提示。 -func continueWithCompletionReason(input DecisionInput) Decision { - reason := strings.TrimSpace(input.CompletionReason) - switch reason { - case "pending_todo": - openTodos := collectOpenRequiredTodos(input.Todos.Items) - if len(openTodos) == 0 { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{Kind: "required_todo_terminal"}}, - RequiredInput: &RequiredInput{ - Kind: "missing_required_todo_id", - Message: "缺少 required todo 标识,无法推进状态收敛。", - }, - UserVisibleSummary: "仍有 required todo 未收敛,但当前无法确定待推进项。", - InternalSummary: "completion blocked by pending_todo without resolvable open todo id", - } - } - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "required_todo_terminal", - Target: strings.Join(openTodos, ","), - Details: map[string]any{"open_required_ids": openTodos}, - }}, - RequiredNextActions: []RequiredAction{{ - Tool: "todo_write", - ArgsHint: map[string]any{ - "action": "set_status", - "id": firstOrEmpty(openTodos), - "status": "completed", - }, - }}, - UserVisibleSummary: "仍有 required todo 未收敛,需要继续推进 todo 状态。", - InternalSummary: "completion blocked by pending_todo", - } - case "unverified_write": - target, expectedContent, ok := selectVerificationTarget(input) - if !ok { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "file_written", - Details: map[string]any{ - "reason": "cannot infer target path/content from user goal", - }, - }}, - RequiredInput: &RequiredInput{ - Kind: "missing_file_target_or_content", - Message: "无法从当前任务中确定要验证的文件路径或内容,需要用户补充。", - }, - UserVisibleSummary: "写入事实尚未完成验证,但当前缺少可执行验证目标。", - InternalSummary: "completion blocked by unverified_write without resolvable verification target", - } - } - return buildWriteVerificationDecision( - target, - expectedContent, - "写入事实尚未完成验证,需要补充 verification facts。", - "completion blocked by unverified_write", - ) - case "post_execute_closure_required": - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "post_execute_closure", - Target: "latest_tool_results", - }}, - RequiredInput: &RequiredInput{ - Kind: "missing_post_execute_closure", - Message: "需要基于最新工具结果补充闭环信息后再尝试完成。", - }, - UserVisibleSummary: "请先基于最新工具结果完成闭环,再尝试最终收尾。", - InternalSummary: "completion blocked by post_execute_closure_required", - } - default: - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - UserVisibleSummary: "仍缺少可验证事实,请继续调用工具推进任务。", - InternalSummary: "completion gate blocked without classified reason", - } - } -} - -// decideTodoState 依据 todo 快照判定状态类任务。 -func decideTodoState(input DecisionInput) Decision { - if input.Todos.Summary.Total == 0 && len(input.Facts.Todos.CreatedIDs) == 0 { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "todo_created", - }}, - RequiredInput: &RequiredInput{ - Kind: "missing_todo_content", - Message: "尚未提供 Todo 内容,需要用户补充待办事项。", - }, - UserVisibleSummary: "尚未创建目标 Todo,请先调用 todo_write。", - InternalSummary: "todo_state task missing created todo facts", - } - } - if input.Todos.Summary.RequiredOpen > 0 { - openIDs := collectOpenRequiredTodos(input.Todos.Items) - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "required_todo_terminal", - Target: strings.Join(openIDs, ","), - Details: map[string]any{"open_required_ids": openIDs}, - }}, - RequiredNextActions: []RequiredAction{{ - Tool: "todo_write", - ArgsHint: map[string]any{ - "action": "set_status", - "id": firstOrEmpty(openIDs), - "status": "completed", - }, - }}, - UserVisibleSummary: "Todo 已创建但 required 项仍未完成。", - InternalSummary: "todo_state task still has open required todos", - } - } - return Decision{ - Status: DecisionAccepted, - StopReason: "accepted", - UserVisibleSummary: "Todo 状态已满足任务目标。", - InternalSummary: "todo_state facts satisfied", - } -} - -// decideWorkspaceWrite 依据写入与验证事实判定文件任务。 -func decideWorkspaceWrite(input DecisionInput) Decision { - if len(input.Facts.Files.Written) == 0 { - if hasSatisfiedWorkspaceWriteWithoutNewWrite(input) { - return Decision{ - Status: DecisionAccepted, - StopReason: "accepted", - UserVisibleSummary: "目标文件状态已满足,无需重复写入。", - InternalSummary: "workspace_write satisfied by noop_write verification facts", - } - } - if !hasExplicitFileTarget(input.UserGoal) { - return Decision{ - Status: DecisionAccepted, - StopReason: "accepted", - UserVisibleSummary: "任务未声明明确文件目标,已按通用编辑任务收尾。", - InternalSummary: "workspace_write downgraded to generic edit due missing explicit file target", - } - } - errorDetail := latestToolErrorDetail(input.Facts.Errors.ToolErrors, "filesystem_write_file") - details := map[string]any{} - if errorDetail != "" { - details["last_write_error"] = errorDetail - } - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "file_written", - Details: details, - }}, - RequiredInput: &RequiredInput{ - Kind: "missing_file_target_or_content", - Message: "无法从当前任务中确定要写入的文件路径或内容,需要用户补充。", - }, - UserVisibleSummary: "还没有写入事实,请先执行文件写入。", - InternalSummary: "workspace_write task missing file_written fact", - } - } - target, expectedContent, ok := selectVerificationTarget(input) - if !ok { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "verification_passed", - Details: map[string]any{ - "reason": "missing resolvable verification target", - }, - }}, - RequiredInput: &RequiredInput{ - Kind: "missing_file_target_or_content", - Message: "无法确定当前需要验收的写入目标,请补充文件路径或重试写入。", - }, - UserVisibleSummary: "已检测到写入事实,但无法确定验收目标。", - InternalSummary: "workspace_write has writes but cannot resolve verification target", - } - } - if hasWorkspaceWriteHardFailure(input.Facts.Errors.ToolErrors, target) { - return Decision{ - Status: DecisionFailed, - StopReason: "verification_failed", - UserVisibleSummary: "文件写入出现持续失败,任务终止。请检查路径权限或写入策略。", - InternalSummary: "workspace_write hard failure detected from tool error facts", - } - } - if !hasVerificationForTarget(input.Facts, target) { - return buildWriteVerificationDecision( - target, - expectedContent, - "已写入文件但尚未形成通过的验证事实。", - "workspace_write task missing", - ) - } - return Decision{ - Status: DecisionAccepted, - StopReason: "accepted", - UserVisibleSummary: "文件写入与验证事实已满足。", - InternalSummary: "workspace_write facts satisfied", - } -} - -// decideSubAgent 依据子代理启动/完成事实判定子代理任务。 -func decideSubAgent(input DecisionInput) Decision { - if len(input.Facts.SubAgents.Started) == 0 { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "subagent_started", - }}, - RequiredInput: &RequiredInput{ - Kind: "missing_subagent_instruction", - Message: "需要明确的子代理任务指令后才能执行 spawn_subagent。", - }, - UserVisibleSummary: "尚未产生子代理启动事实,请显式调用 spawn_subagent。", - InternalSummary: "subagent task missing start fact", - } - } - if len(input.Facts.SubAgents.Failed) > 0 && len(input.Facts.SubAgents.Completed) == 0 { - return Decision{ - Status: DecisionFailed, - StopReason: "verification_failed", - UserVisibleSummary: "子代理执行失败,任务终止。", - InternalSummary: "subagent task failed without completion fact", - } - } - if len(input.Facts.SubAgents.Completed) == 0 { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - UserVisibleSummary: "子代理已启动但尚未完成。", - InternalSummary: "subagent task started but no completed fact", - } - } - if isWriteIntentGoal(input.UserGoal) && !hasSubAgentArtifactEvidence(input.Facts) { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "subagent_artifact_or_file_fact", - Target: "workspace_artifact", - }}, - RequiredInput: &RequiredInput{ - Kind: "missing_subagent_artifact_path", - Message: "需要提供子代理产物路径或可验证的文件目标。", - }, - UserVisibleSummary: "子代理已完成,但缺少可验证的产物事实。", - InternalSummary: "subagent completed without artifact/file evidence for write-intent goal", - } - } - return Decision{ - Status: DecisionAccepted, - StopReason: "accepted", - UserVisibleSummary: "子代理完成事实已满足。", - InternalSummary: "subagent task completed facts satisfied", - } -} - -// decideReadOnly 判定只读任务是否可结束。 -func decideReadOnly(input DecisionInput) Decision { - if len(input.Facts.Files.Exists) == 0 && len(input.Facts.Commands.Executed) == 0 && len(input.LastAssistantText) == 0 { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - UserVisibleSummary: "尚无可验证读取事实,请先执行只读工具。", - InternalSummary: "read_only task has no read/search facts", - } - } - return Decision{ - Status: DecisionAccepted, - StopReason: "accepted", - UserVisibleSummary: "只读分析任务已完成。", - InternalSummary: "read_only facts satisfied", - } -} - -// decideMixed 对混合任务采用保守策略:必须同时具备状态推进与至少一个验证事实。 -func decideMixed(input DecisionInput) Decision { - if len(input.Facts.Verification.Passed) == 0 { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - UserVisibleSummary: "混合任务尚未形成验证通过事实。", - InternalSummary: "mixed task missing verification passed facts", - } - } - if input.Todos.Summary.RequiredOpen > 0 { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - UserVisibleSummary: "混合任务 required todo 尚未收敛。", - InternalSummary: "mixed task has open required todos", - } - } - return Decision{ - Status: DecisionAccepted, - StopReason: "accepted", - UserVisibleSummary: "混合任务事实已满足。", - InternalSummary: "mixed task satisfied by verification + todo closure", - } -} - -// buildWriteVerificationDecision 统一构造写入后缺少验证事实时的继续决策。 -func buildWriteVerificationDecision(target string, expectedContent string, userSummary string, internalPrefix string) Decision { - scope := fmt.Sprintf("artifact:%s", target) - if expectedContent != "" { - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "verification_passed", - Target: target, - Expected: expectedContent, - }}, - RequiredNextActions: []RequiredAction{{ - Tool: "filesystem_read_file", - ArgsHint: map[string]any{ - "path": target, - "expect_contains": []string{expectedContent}, - "verification_scope": scope, - }, - }}, - UserVisibleSummary: userSummary, - InternalSummary: internalPrefix + " content verification facts bound to target artifact", - } - } - return Decision{ - Status: DecisionContinue, - StopReason: "todo_not_converged", - MissingFacts: []MissingFact{{ - Kind: "file_exists", - Target: target, - }}, - RequiredNextActions: []RequiredAction{{ - Tool: "filesystem_glob", - ArgsHint: map[string]any{ - "pattern": target, - "expect_min_matches": 1, - "verification_scope": scope, - }, - }}, - UserVisibleSummary: userSummary, - InternalSummary: internalPrefix + " existence verification facts bound to target artifact", - } -} - -// collectOpenRequiredTodos 收集 required 且未终态的 todo id。 -func collectOpenRequiredTodos(items []TodoViewItem) []string { - ids := make([]string, 0) - for _, item := range items { - if !item.Required { - continue - } - switch strings.ToLower(strings.TrimSpace(item.Status)) { - case "completed", "failed", "canceled": - continue - default: - if id := strings.TrimSpace(item.ID); id != "" { - ids = append(ids, id) - } - } - } - return ids -} - -// firstOrEmpty 返回首个元素,不存在时返回空串。 -func firstOrEmpty(values []string) string { - if len(values) == 0 { - return "" - } - return values[0] -} - -// hasVerificationForTarget 判断目标文件是否已经有通过的验证事实,避免跨文件误判 accepted。 -func hasVerificationForTarget(allFacts facts.RuntimeFacts, targetPath string) bool { - target := strings.TrimSpace(targetPath) - if target == "" { - return false - } - targetArtifactScope := "artifact:" + target - normalizedTarget := strings.ToLower(filepath.Clean(target)) - - for _, fact := range allFacts.Verification.Passed { - scope := strings.TrimSpace(fact.Scope) - if scope == "" { - continue - } - normalizedScope := strings.ToLower(filepath.Clean(scope)) - if strings.EqualFold(scope, target) || strings.EqualFold(scope, targetArtifactScope) || normalizedScope == normalizedTarget { - return true - } - if strings.HasPrefix(strings.ToLower(scope), "artifact:") { - normalized := strings.TrimPrefix(scope, "artifact:") - cleaned := strings.TrimSpace(normalized) - if strings.EqualFold(cleaned, target) || strings.ToLower(filepath.Clean(cleaned)) == normalizedTarget { - return true - } - } - } - for _, fact := range allFacts.Files.ContentMatch { - if !fact.VerificationPassed { - continue - } - if strings.EqualFold(strings.TrimSpace(fact.Path), target) || - strings.ToLower(filepath.Clean(strings.TrimSpace(fact.Path))) == normalizedTarget { - return true - } - } - return false -} - -// latestToolErrorDetail 返回指定工具的最新错误摘要,便于构造可执行 continue 提示。 -func latestToolErrorDetail(errors []facts.ToolErrorFact, toolName string) string { - targetTool := strings.TrimSpace(toolName) - for i := len(errors) - 1; i >= 0; i-- { - fact := errors[i] - if !strings.EqualFold(strings.TrimSpace(fact.Tool), targetTool) { - continue - } - content := strings.TrimSpace(fact.Content) - if content == "" { - content = strings.TrimSpace(fact.ErrorClass) - } - if content != "" { - return content - } - } - return "" -} - -// hasWorkspaceWriteHardFailure 判断写入目标是否出现高置信不可恢复错误,防止无意义循环重试。 -func hasWorkspaceWriteHardFailure(errors []facts.ToolErrorFact, targetPath string) bool { - target := strings.TrimSpace(targetPath) - if target == "" { - return false - } - targetLower := strings.ToLower(target) - targetBaseLower := strings.ToLower(strings.TrimSpace(filepath.Base(target))) - errorCount := 0 - for _, fact := range errors { - if !strings.EqualFold(strings.TrimSpace(fact.Tool), "filesystem_write_file") { - continue - } - content := strings.ToLower(strings.TrimSpace(fact.Content)) - if content == "" { - content = strings.ToLower(strings.TrimSpace(fact.ErrorClass)) - } - mentionsTarget := strings.Contains(content, targetLower) || - (targetBaseLower != "" && strings.Contains(content, targetBaseLower)) - if !mentionsTarget { - continue - } - if strings.Contains(content, "permission denied") || - strings.Contains(content, "path not allowed") || strings.Contains(content, "no such file") { - errorCount++ - } - } - return errorCount >= 2 -} - -// isWriteIntentGoal 判断用户目标是否显式要求产物写入。 -func isWriteIntentGoal(goal string) bool { - return containsAny(strings.ToLower(strings.TrimSpace(goal)), - "创建文件", "写入", "修改文件", "新增文件", "create file", "write file", "edit file", "update file", ".txt", ".go", ".md", ".json") -} - -// hasExplicitFileTarget 判断用户目标是否包含可定位文件目标,避免对泛化“编辑一下”任务过度拦截。 -func hasExplicitFileTarget(goal string) bool { - normalized := strings.ToLower(strings.TrimSpace(goal)) - return containsAny( - normalized, - ".txt", ".go", ".md", ".json", ".yaml", ".yml", ".ts", ".tsx", ".py", "/", - "readme", "package.json", - ) -} - -// hasSatisfiedWorkspaceWriteWithoutNewWrite 判断未产生新写入时是否已有可验收写入结果。 -func hasSatisfiedWorkspaceWriteWithoutNewWrite(input DecisionInput) bool { - goalPaths := extractGoalPaths(input.UserGoal) - for _, goalPath := range goalPaths { - if hasVerificationForTarget(input.Facts, goalPath) { - return true - } - } - if len(goalPaths) > 0 { - return false - } - target, _, ok := selectVerificationTarget(input) - return ok && hasVerificationForTarget(input.Facts, target) -} - -// hasSubAgentArtifactEvidence 判断子代理任务是否已有可验证产物事实。 -func hasSubAgentArtifactEvidence(allFacts facts.RuntimeFacts) bool { - for _, fact := range allFacts.SubAgents.Completed { - if len(fact.Artifacts) > 0 { - return true - } - } - if len(allFacts.Files.Written) > 0 || len(allFacts.Files.Exists) > 0 || len(allFacts.Files.ContentMatch) > 0 { - return true - } - return false -} - -// latestWriteVerificationHint 返回最适合下一步验证动作的写入目标与期望内容(若可用)。 -func latestWriteVerificationHint(allFacts facts.RuntimeFacts, preferredPath string) (string, string) { - normalizedPreferred := strings.TrimSpace(preferredPath) - writes := allFacts.Files.Written - for i := len(writes) - 1; i >= 0; i-- { - fact := writes[i] - path := strings.TrimSpace(fact.Path) - if path == "" { - continue - } - if normalizedPreferred != "" && !strings.EqualFold(path, normalizedPreferred) { - continue - } - return path, strings.TrimSpace(fact.ExpectedContent) - } - if normalizedPreferred != "" { - return normalizedPreferred, "" - } - return "", "" -} - -// finalizeDecision 统一补全决策元信息,确保快照可观测 hint 与 effective kind。 -func finalizeDecision(decision Decision, intentHint TaskKind, effective TaskKind, input DecisionInput) Decision { - if len(input.HookAnnotations) > 0 || len(input.HookGuards) > 0 { - detail := fmt.Sprintf( - "hook signals consumed (annotations=%d guards=%d)", - len(input.HookAnnotations), - len(input.HookGuards), - ) - if strings.TrimSpace(decision.InternalSummary) == "" { - decision.InternalSummary = detail - } else { - decision.InternalSummary = strings.TrimSpace(decision.InternalSummary) + "; " + detail - } - } - decision.IntentHint = intentHint - decision.EffectiveTaskKind = effective - return decision -} - -var filePathPattern = regexp.MustCompile(`(?i)(?:^|[\s"'` + "`" + `])([a-z0-9_\-./]+\.[a-z0-9]{1,8})(?:$|[\s"'` + "`" + `,;:])`) - -// selectVerificationTarget 选择当前回合应验证的写入目标,避免回退到历史首条写入。 -func selectVerificationTarget(input DecisionInput) (path string, expectedContent string, ok bool) { - goalPaths := extractGoalPaths(input.UserGoal) - for i := len(goalPaths) - 1; i >= 0; i-- { - goalPath := goalPaths[i] - for j := len(input.Facts.Files.Written) - 1; j >= 0; j-- { - writeFact := input.Facts.Files.Written[j] - if strings.EqualFold(strings.TrimSpace(writeFact.Path), goalPath) { - return strings.TrimSpace(writeFact.Path), strings.TrimSpace(writeFact.ExpectedContent), true - } - } - } - for i := len(input.Facts.Files.Written) - 1; i >= 0; i-- { - writeFact := input.Facts.Files.Written[i] - target := strings.TrimSpace(writeFact.Path) - if target == "" { - continue - } - if !hasVerificationForTarget(input.Facts, target) { - return target, strings.TrimSpace(writeFact.ExpectedContent), true - } - } - for i := len(input.Facts.Files.Written) - 1; i >= 0; i-- { - writeFact := input.Facts.Files.Written[i] - target := strings.TrimSpace(writeFact.Path) - if target != "" { - return target, strings.TrimSpace(writeFact.ExpectedContent), true - } - } - for i := len(goalPaths) - 1; i >= 0; i-- { - goalPath := strings.TrimSpace(goalPaths[i]) - if goalPath != "" && hasVerificationForTarget(input.Facts, goalPath) { - return goalPath, "", true - } - } - for i := len(input.Facts.Files.ContentMatch) - 1; i >= 0; i-- { - matchFact := input.Facts.Files.ContentMatch[i] - if !matchFact.VerificationPassed { - continue - } - target := strings.TrimSpace(matchFact.Path) - if target == "" { - continue - } - return target, firstOrEmpty(matchFact.ExpectedContains), true - } - for i := len(input.Facts.Verification.Passed) - 1; i >= 0; i-- { - verifyFact := input.Facts.Verification.Passed[i] - scope := strings.TrimSpace(verifyFact.Scope) - if scope == "" { - continue - } - target := scope - if strings.HasPrefix(strings.ToLower(target), "artifact:") { - target = strings.TrimSpace(target[len("artifact:"):]) - } - if target == "" { - continue - } - return target, "", true - } - return "", "", false -} - -// extractGoalPaths 从用户目标文本提取可能的文件路径。 -func extractGoalPaths(goal string) []string { - matches := filePathPattern.FindAllStringSubmatch(strings.TrimSpace(goal), -1) - if len(matches) == 0 { - return nil - } - seen := make(map[string]struct{}, len(matches)) - out := make([]string, 0, len(matches)) - for _, match := range matches { - if len(match) < 2 { - continue - } - value := strings.TrimSpace(match[1]) - if value == "" { - continue - } - if _, exists := seen[strings.ToLower(value)]; exists { - continue - } - seen[strings.ToLower(value)] = struct{}{} - out = append(out, value) - } - return out -} diff --git a/internal/runtime/decider/decide_additional_test.go b/internal/runtime/decider/decide_additional_test.go deleted file mode 100644 index 017421e6..00000000 --- a/internal/runtime/decider/decide_additional_test.go +++ /dev/null @@ -1,155 +0,0 @@ -package decider - -import ( - "testing" - - runtimefacts "neo-code/internal/runtime/facts" -) - -func TestContinueWithCompletionReasonBranches(t *testing.T) { - t.Parallel() - - t.Run("pending_todo without open ids requires input", func(t *testing.T) { - t.Parallel() - decision := continueWithCompletionReason(DecisionInput{ - CompletionReason: "pending_todo", - Todos: TodoSnapshot{ - Items: []TodoViewItem{{ID: "x", Required: true, Status: "completed"}}, - }, - }) - if decision.RequiredInput == nil || decision.RequiredInput.Kind != "missing_required_todo_id" { - t.Fatalf("required input = %+v", decision.RequiredInput) - } - }) - - t.Run("unverified_write without target requires input", func(t *testing.T) { - t.Parallel() - decision := continueWithCompletionReason(DecisionInput{ - CompletionReason: "unverified_write", - }) - if decision.RequiredInput == nil || decision.RequiredInput.Kind != "missing_file_target_or_content" { - t.Fatalf("required input = %+v", decision.RequiredInput) - } - }) - - t.Run("post_execute_closure_required maps to closure missing fact", func(t *testing.T) { - t.Parallel() - decision := continueWithCompletionReason(DecisionInput{ - CompletionReason: "post_execute_closure_required", - }) - if len(decision.MissingFacts) == 0 || decision.MissingFacts[0].Kind != "post_execute_closure" { - t.Fatalf("missing facts = %+v", decision.MissingFacts) - } - }) -} - -func TestDecideTaskSpecificBranches(t *testing.T) { - t.Parallel() - - t.Run("todo_state without creation facts requests todo_write", func(t *testing.T) { - t.Parallel() - decision := decideTodoState(DecisionInput{}) - if decision.RequiredInput == nil || decision.RequiredInput.Kind != "missing_todo_content" { - t.Fatalf("required input = %+v", decision.RequiredInput) - } - }) - - t.Run("mixed accepts when verification passed and required todos closed", func(t *testing.T) { - t.Parallel() - decision := decideMixed(DecisionInput{ - Facts: runtimefacts.RuntimeFacts{ - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_read_file", Scope: "artifact:a.txt"}}, - }, - }, - Todos: TodoSnapshot{Summary: TodoSummary{RequiredOpen: 0}}, - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want accepted", decision.Status) - } - }) - - t.Run("subagent failed without completion returns failed", func(t *testing.T) { - t.Parallel() - decision := decideSubAgent(DecisionInput{ - Facts: runtimefacts.RuntimeFacts{ - SubAgents: runtimefacts.SubAgentFacts{ - Started: []runtimefacts.SubAgentFact{{TaskID: "sa-1"}}, - Failed: []runtimefacts.SubAgentFact{{TaskID: "sa-1"}}, - }, - }, - }) - if decision.Status != DecisionFailed { - t.Fatalf("status = %q, want failed", decision.Status) - } - }) -} - -func TestHelperFunctionsBranches(t *testing.T) { - t.Parallel() - - if got := latestToolErrorDetail([]runtimefacts.ToolErrorFact{{Tool: "filesystem_write_file", ErrorClass: "timeout"}}, "filesystem_write_file"); got != "timeout" { - t.Fatalf("latestToolErrorDetail = %q, want timeout", got) - } - - if !hasVerificationForTarget(runtimefacts.RuntimeFacts{ - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Scope: "artifact:./docs/../docs/readme.md"}}, - }, - }, "docs/readme.md") { - t.Fatal("expected normalized artifact scope match") - } - - if got := extractGoalPaths(`edit "A.TXT", then update a.txt and b.md`); len(got) != 2 { - t.Fatalf("extractGoalPaths len = %d, want 2", len(got)) - } -} - -func TestHelperFunctionsAdditionalBranches(t *testing.T) { - t.Parallel() - - if got := firstOrEmpty(nil); got != "" { - t.Fatalf("firstOrEmpty(nil) = %q, want empty", got) - } - if got := firstOrEmpty([]string{"a", "b"}); got != "a" { - t.Fatalf("firstOrEmpty = %q, want a", got) - } - - open := collectOpenRequiredTodos([]TodoViewItem{ - {ID: "todo-1", Required: true, Status: "pending"}, - {ID: "todo-2", Required: true, Status: "in_progress"}, - {ID: "todo-3", Required: true, Status: "completed"}, - {ID: "", Required: true, Status: "pending"}, - }) - if len(open) != 2 || open[0] != "todo-1" || open[1] != "todo-2" { - t.Fatalf("collectOpenRequiredTodos = %#v, want [todo-1 todo-2]", open) - } - - if got := latestToolErrorDetail([]runtimefacts.ToolErrorFact{ - {Tool: "filesystem_write_file", Content: " last content "}, - }, "filesystem_write_file"); got != "last content" { - t.Fatalf("latestToolErrorDetail(content fallback) = %q, want trimmed content", got) - } - - if hasWorkspaceWriteHardFailure([]runtimefacts.ToolErrorFact{ - {Tool: "filesystem_write_file", ErrorClass: "permission_denied", Content: "permission denied for /tmp/other.txt"}, - {Tool: "filesystem_write_file", ErrorClass: "permission_denied", Content: "permission denied for /tmp/other2.txt"}, - }, "/tmp/target.txt") { - t.Fatal("hasWorkspaceWriteHardFailure should require target correlation") - } - - target, expected, ok := selectVerificationTarget(DecisionInput{ - UserGoal: "please edit ./docs/readme.md and include hello", - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{ - {Path: "a.md", WorkspaceWrite: true}, - {Path: "docs/readme.md", WorkspaceWrite: true, ExpectedContent: "hello"}, - }, - }, - }, - }) - if !ok || target != "docs/readme.md" || expected != "hello" { - t.Fatalf("selectVerificationTarget = target=%q expected=%q ok=%v", target, expected, ok) - } -} diff --git a/internal/runtime/decider/decide_test.go b/internal/runtime/decider/decide_test.go deleted file mode 100644 index c7a72177..00000000 --- a/internal/runtime/decider/decide_test.go +++ /dev/null @@ -1,624 +0,0 @@ -package decider - -import ( - "encoding/json" - "strings" - "testing" - - runtimefacts "neo-code/internal/runtime/facts" -) - -func assertDecisionStatus(t *testing.T, decision Decision, want DecisionStatus) { - t.Helper() - if decision.Status != want { - t.Fatalf("status = %q, want %q", decision.Status, want) - } -} - -func TestDecideRequiredTodoFailedStopsImmediately(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindTodoState, - Todos: TodoSnapshot{ - Summary: TodoSummary{RequiredFailed: 1}, - }, - }) - - if decision.Status != DecisionFailed { - t.Fatalf("status = %q, want %q", decision.Status, DecisionFailed) - } - if decision.StopReason != "required_todo_failed" { - t.Fatalf("stop_reason = %q, want required_todo_failed", decision.StopReason) - } -} - -func TestDecideUsesEffectiveTaskKindFromFacts(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindChatAnswer, - CompletionPassed: true, - UserGoal: "你好", - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{{Path: "test.txt", WorkspaceWrite: true}}, - }, - }, - }) - if decision.EffectiveTaskKind != TaskKindWorkspaceWrite { - t.Fatalf("effective kind = %q, want %q", decision.EffectiveTaskKind, TaskKindWorkspaceWrite) - } - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } -} - -func TestDecideNoProgressExceededReturnsIncomplete(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - NoProgressExceeded: true, - }) - - if decision.Status != DecisionIncomplete { - t.Fatalf("status = %q, want %q", decision.Status, DecisionIncomplete) - } - if decision.StopReason != "no_progress_after_final_intercept" { - t.Fatalf("stop_reason = %q, want no_progress_after_final_intercept", decision.StopReason) - } -} - -func TestDecideCompletionBlockedReasonPendingTodo(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindTodoState, - CompletionPassed: false, - CompletionReason: "pending_todo", - Todos: TodoSnapshot{ - Items: []TodoViewItem{ - {ID: "todo-1", Required: true, Status: "pending"}, - }, - }, - }) - - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - if len(decision.MissingFacts) == 0 || decision.MissingFacts[0].Kind != "required_todo_terminal" { - t.Fatalf("missing facts = %+v", decision.MissingFacts) - } - if len(decision.RequiredNextActions) == 0 || decision.RequiredNextActions[0].Tool != "todo_write" { - t.Fatalf("required actions = %+v", decision.RequiredNextActions) - } -} - -func TestDecideWorkspaceWriteNeedsVerificationThenAccepts(t *testing.T) { - continueDecision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{{Path: "test.txt", Bytes: 1, WorkspaceWrite: true}}, - }, - }, - }) - if continueDecision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", continueDecision.Status, DecisionContinue) - } - if len(continueDecision.MissingFacts) == 0 || continueDecision.MissingFacts[0].Kind != "file_exists" { - t.Fatalf("missing facts = %+v, want file_exists", continueDecision.MissingFacts) - } - if len(continueDecision.RequiredNextActions) == 0 || continueDecision.RequiredNextActions[0].Tool != "filesystem_glob" { - t.Fatalf("required actions = %+v, want filesystem_glob", continueDecision.RequiredNextActions) - } - - acceptedDecision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{{Path: "test.txt", Bytes: 1, WorkspaceWrite: true}}, - }, - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_read_file", Scope: "artifact:test.txt"}}, - }, - }, - }) - if acceptedDecision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", acceptedDecision.Status, DecisionAccepted) - } -} - -func TestDecideWorkspaceWriteNoopSatisfiedByVerificationFacts(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - UserGoal: "创建 2.txt 内容为 2", - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Exists: []runtimefacts.FileExistFact{{Path: "2.txt", Source: "filesystem_write_file_noop"}}, - ContentMatch: []runtimefacts.FileContentMatchFact{{ - Path: "2.txt", - Scope: "artifact:2.txt", - ExpectedContains: []string{"2"}, - VerificationPassed: true, - }}, - }, - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_write_file", Scope: "artifact:2.txt"}}, - }, - }, - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", decision.Status, DecisionAccepted) - } -} - -func TestDecideWorkspaceWriteRepeatedNoopShouldStayAccepted(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - UserGoal: "创建 2.txt 内容为 2", - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{ - {Path: "2.txt", Bytes: 1, WorkspaceWrite: true, ExpectedContent: "2"}, - }, - Exists: []runtimefacts.FileExistFact{ - {Path: "2.txt", Source: "filesystem_write_file"}, - {Path: "2.txt", Source: "filesystem_write_file_noop"}, - }, - ContentMatch: []runtimefacts.FileContentMatchFact{{ - Path: "2.txt", - Scope: "artifact:2.txt", - ExpectedContains: []string{"2"}, - VerificationPassed: true, - }}, - }, - Verification: runtimefacts.VerificationFacts{ - Performed: []runtimefacts.VerificationFact{{Tool: "filesystem_write_file", Scope: "artifact:2.txt"}}, - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_write_file", Scope: "artifact:2.txt"}}, - }, - }, - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", decision.Status, DecisionAccepted) - } -} - -func TestDecideWorkspaceWriteVerificationMustBindTarget(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{{Path: "test.txt", Bytes: 1, WorkspaceWrite: true}}, - }, - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_read_file", Scope: "artifact:other.txt"}}, - }, - }, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - if len(decision.MissingFacts) == 0 || decision.MissingFacts[0].Target != "test.txt" { - t.Fatalf("missing facts = %+v, want target test.txt", decision.MissingFacts) - } -} - -func TestDecideWorkspaceWriteVerificationShouldNotMatchByBasenameOnly(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{{Path: "src/readme.md", Bytes: 1, WorkspaceWrite: true}}, - }, - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_read_file", Scope: "artifact:docs/readme.md"}}, - }, - }, - }) - assertDecisionStatus(t, decision, DecisionContinue) - if len(decision.MissingFacts) == 0 || decision.MissingFacts[0].Target != "src/readme.md" { - t.Fatalf("missing facts = %+v, want target src/readme.md", decision.MissingFacts) - } -} - -func TestDecideCompletionBlockedUnverifiedWriteUsesExpectedContentWhenAvailable(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: false, - CompletionReason: "unverified_write", - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{ - {Path: "2.txt", Bytes: 1, WorkspaceWrite: true, ExpectedContent: "2"}, - }, - }, - }, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - if len(decision.RequiredNextActions) == 0 || decision.RequiredNextActions[0].Tool != "filesystem_read_file" { - t.Fatalf("required actions = %+v, want filesystem_read_file", decision.RequiredNextActions) - } - expectContains, _ := decision.RequiredNextActions[0].ArgsHint["expect_contains"].([]string) - if len(expectContains) != 1 || expectContains[0] != "2" { - t.Fatalf("expect_contains = %#v, want [\"2\"]", decision.RequiredNextActions[0].ArgsHint["expect_contains"]) - } -} - -func TestDecideCompletionBlockedUnverifiedWriteFallsBackToExistsWhenContentUnknown(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: false, - CompletionReason: "unverified_write", - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{ - {Path: "2.txt", Bytes: 1, WorkspaceWrite: true}, - }, - }, - }, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - if len(decision.MissingFacts) == 0 || decision.MissingFacts[0].Kind != "file_exists" { - t.Fatalf("missing facts = %+v, want file_exists", decision.MissingFacts) - } - if len(decision.RequiredNextActions) == 0 || decision.RequiredNextActions[0].Tool != "filesystem_glob" { - t.Fatalf("required actions = %+v, want filesystem_glob", decision.RequiredNextActions) - } -} - -func TestDecideWorkspaceWriteHardFailureStops(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{{Path: "Z:/not-exist/test.txt", Bytes: 1, WorkspaceWrite: true}}, - }, - Errors: runtimefacts.ErrorFacts{ - ToolErrors: []runtimefacts.ToolErrorFact{ - {Tool: "filesystem_write_file", ErrorClass: "permission_denied", Content: "permission denied for Z:/not-exist/test.txt"}, - {Tool: "filesystem_write_file", ErrorClass: "permission_denied", Content: "permission denied for Z:/not-exist/test.txt"}, - }, - }, - }, - }) - if decision.Status != DecisionFailed { - t.Fatalf("status = %q, want %q", decision.Status, DecisionFailed) - } -} - -func TestDecideWorkspaceWriteHardFailureRequiresTargetCorrelation(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{{Path: "target/test.txt", Bytes: 1, WorkspaceWrite: true}}, - }, - Errors: runtimefacts.ErrorFacts{ - ToolErrors: []runtimefacts.ToolErrorFact{ - {Tool: "filesystem_write_file", ErrorClass: "permission_denied", Content: "permission denied for other/path.txt"}, - {Tool: "filesystem_write_file", ErrorClass: "permission_denied", Content: "permission denied for another/path.txt"}, - }, - }, - }, - }) - assertDecisionStatus(t, decision, DecisionContinue) -} - -func TestDecideWorkspaceWriteWithoutExplicitTargetFallsBackToAccepted(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - UserGoal: "edit file", - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", decision.Status, DecisionAccepted) - } -} - -func TestLatestWriteVerificationHintBranches(t *testing.T) { - facts := runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{ - {Path: "a.txt", ExpectedContent: "A"}, - {Path: "b.txt", ExpectedContent: "B"}, - }, - }, - } - path, expected := latestWriteVerificationHint(facts, "b.txt") - if path != "b.txt" || expected != "B" { - t.Fatalf("hint for preferred path = (%q,%q), want (b.txt,B)", path, expected) - } - - path, expected = latestWriteVerificationHint(facts, "missing.txt") - if path != "missing.txt" || expected != "" { - t.Fatalf("fallback preferred hint = (%q,%q), want (missing.txt,\"\")", path, expected) - } - - path, expected = latestWriteVerificationHint(facts, "") - if path != "b.txt" || expected != "B" { - t.Fatalf("latest hint = (%q,%q), want (b.txt,B)", path, expected) - } -} - -func TestDecideWorkspaceWriteMissingFactsShouldRequestInputNotPlaceholderAction(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - UserGoal: "请帮我修一下", - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", decision.Status, DecisionAccepted) - } - - decision = Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: false, - CompletionReason: "unverified_write", - UserGoal: "请帮我修一下", - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - if decision.RequiredInput == nil { - t.Fatalf("required_input is nil") - } - if len(decision.RequiredNextActions) != 0 { - t.Fatalf("required actions = %+v, want empty", decision.RequiredNextActions) - } -} - -func TestDecideWorkspaceWriteSelectsLatestMentionedTarget(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - UserGoal: "创建 2.txt 内容为 2", - Facts: runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{ - {Path: "1.txt", WorkspaceWrite: true, ExpectedContent: "1"}, - {Path: "2.txt", WorkspaceWrite: true, ExpectedContent: "2"}, - }, - }, - }, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - if len(decision.MissingFacts) == 0 || decision.MissingFacts[0].Target != "2.txt" { - t.Fatalf("missing facts = %+v, want target 2.txt", decision.MissingFacts) - } -} - -func TestDecideRequiredNextActionsShouldNotContainPlaceholders(t *testing.T) { - decisions := []Decision{ - Decide(DecisionInput{ - TaskKind: TaskKindSubAgent, - CompletionPassed: true, - UserGoal: "用 subagent 创建 test1.txt 内容为 1", - }), - Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: false, - CompletionReason: "unverified_write", - UserGoal: "请继续", - }), - Decide(DecisionInput{ - TaskKind: TaskKindTodoState, - CompletionPassed: true, - UserGoal: "创建 todo", - }), - } - for i, decision := range decisions { - payload, err := json.Marshal(decision.RequiredNextActions) - if err != nil { - t.Fatalf("marshal required_next_actions[%d] failed: %v", i, err) - } - serialized := string(payload) - if strings.Contains(serialized, "<") || strings.Contains(serialized, ">") { - t.Fatalf("required_next_actions[%d] contains placeholder: %s", i, serialized) - } - } -} - -func TestDecideSubAgentRequiresCompletedFact(t *testing.T) { - continueDecision := Decide(DecisionInput{ - TaskKind: TaskKindSubAgent, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - SubAgents: runtimefacts.SubAgentFacts{ - Started: []runtimefacts.SubAgentFact{{TaskID: "sa-1", Role: "reviewer"}}, - }, - }, - }) - if continueDecision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", continueDecision.Status, DecisionContinue) - } - - failedDecision := Decide(DecisionInput{ - TaskKind: TaskKindSubAgent, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - SubAgents: runtimefacts.SubAgentFacts{ - Started: []runtimefacts.SubAgentFact{{TaskID: "sa-1", Role: "reviewer"}}, - Failed: []runtimefacts.SubAgentFact{{TaskID: "sa-1", Role: "reviewer"}}, - }, - }, - }) - if failedDecision.Status != DecisionFailed { - t.Fatalf("status = %q, want %q", failedDecision.Status, DecisionFailed) - } - - acceptedDecision := Decide(DecisionInput{ - TaskKind: TaskKindSubAgent, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - SubAgents: runtimefacts.SubAgentFacts{ - Started: []runtimefacts.SubAgentFact{{TaskID: "sa-1", Role: "reviewer"}}, - Completed: []runtimefacts.SubAgentFact{{TaskID: "sa-1", Role: "reviewer"}}, - }, - }, - }) - if acceptedDecision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", acceptedDecision.Status, DecisionAccepted) - } -} - -func TestDecideSubAgentWriteIntentNeedsArtifactEvidence(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindSubAgent, - CompletionPassed: true, - UserGoal: "用 subagent 创建 test1.txt,内容为 1", - Facts: runtimefacts.RuntimeFacts{ - SubAgents: runtimefacts.SubAgentFacts{ - Started: []runtimefacts.SubAgentFact{{TaskID: "sa-1", Role: "coder"}}, - Completed: []runtimefacts.SubAgentFact{{TaskID: "sa-1", Role: "coder"}}, - }, - }, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - if len(decision.MissingFacts) == 0 || decision.MissingFacts[0].Kind != "subagent_artifact_or_file_fact" { - t.Fatalf("missing facts = %+v", decision.MissingFacts) - } - - accepted := Decide(DecisionInput{ - TaskKind: TaskKindSubAgent, - CompletionPassed: true, - UserGoal: "用 subagent 创建 test1.txt,内容为 1", - Facts: runtimefacts.RuntimeFacts{ - SubAgents: runtimefacts.SubAgentFacts{ - Started: []runtimefacts.SubAgentFact{{TaskID: "sa-1", Role: "coder"}}, - Completed: []runtimefacts.SubAgentFact{{ - TaskID: "sa-1", Role: "coder", Artifacts: []string{"test1.txt"}, - }}, - }, - }, - }) - if accepted.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", accepted.Status, DecisionAccepted) - } -} - -func TestDecideTodoStateAcceptsWithoutFileVerification(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindTodoState, - CompletionPassed: true, - Todos: TodoSnapshot{ - Items: []TodoViewItem{ - {ID: "todo-1", Content: "创建 Todo", Status: "pending", Required: false}, - }, - Summary: TodoSummary{Total: 1, RequiredTotal: 0}, - }, - Facts: runtimefacts.RuntimeFacts{ - Todos: runtimefacts.TodoFacts{CreatedIDs: []string{"todo-1"}}, - }, - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", decision.Status, DecisionAccepted) - } -} - -func TestDecideReadOnlyBranches(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindReadOnly, - CompletionPassed: true, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - - decision = Decide(DecisionInput{ - TaskKind: TaskKindReadOnly, - CompletionPassed: true, - LastAssistantText: "analysis done", - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", decision.Status, DecisionAccepted) - } - - decision = Decide(DecisionInput{ - TaskKind: TaskKindReadOnly, - CompletionPassed: true, - Facts: runtimefacts.RuntimeFacts{ - Commands: runtimefacts.CommandFacts{ - Executed: []runtimefacts.CommandFact{{Tool: "bash", Command: "ls", ExitCode: 0, Succeeded: true}}, - }, - }, - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", decision.Status, DecisionAccepted) - } -} - -func TestDecideMixedBranches(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindMixed, - CompletionPassed: true, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - - decision = Decide(DecisionInput{ - TaskKind: TaskKindMixed, - CompletionPassed: true, - Todos: TodoSnapshot{ - Summary: TodoSummary{RequiredOpen: 1}, - }, - Facts: runtimefacts.RuntimeFacts{ - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_glob", Scope: "artifact:test.txt"}}, - }, - }, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - - decision = Decide(DecisionInput{ - TaskKind: TaskKindMixed, - CompletionPassed: true, - LastAssistantText: "analysis done", - Facts: runtimefacts.RuntimeFacts{ - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_glob", Scope: "artifact:test.txt"}}, - }, - }, - }) - if decision.Status != DecisionAccepted { - t.Fatalf("status = %q, want %q", decision.Status, DecisionAccepted) - } -} - -func TestDecideWorkspaceWriteInjectsLatestToolErrorIntoMissingFact(t *testing.T) { - decision := Decide(DecisionInput{ - TaskKind: TaskKindWorkspaceWrite, - CompletionPassed: true, - UserGoal: "please update README.md", - Facts: runtimefacts.RuntimeFacts{ - Errors: runtimefacts.ErrorFacts{ - ToolErrors: []runtimefacts.ToolErrorFact{ - {Tool: "filesystem_write_file", ErrorClass: "permission_denied", Content: "first error"}, - {Tool: "filesystem_write_file", ErrorClass: "generic_error", Content: ""}, - }, - }, - }, - }) - if decision.Status != DecisionContinue { - t.Fatalf("status = %q, want %q", decision.Status, DecisionContinue) - } - if len(decision.MissingFacts) == 0 { - t.Fatalf("missing facts = %+v", decision.MissingFacts) - } - details := decision.MissingFacts[0].Details - if details["last_write_error"] != "generic_error" { - t.Fatalf("last_write_error = %#v, want generic_error", details["last_write_error"]) - } -} diff --git a/internal/runtime/decider/infer.go b/internal/runtime/decider/infer.go deleted file mode 100644 index c9a24adc..00000000 --- a/internal/runtime/decider/infer.go +++ /dev/null @@ -1,125 +0,0 @@ -package decider - -import ( - "strings" - - runtimefacts "neo-code/internal/runtime/facts" -) - -// InferTaskKind 通过规则推断任务类型,避免依赖模型分类。 -func InferTaskKind(goal string) TaskKind { - return InferTaskIntent(goal).Hint -} - -// InferTaskIntent 基于用户文本推断弱意图,仅作 hint,不作为强验收依据。 -func InferTaskIntent(goal string) TaskIntent { - text := strings.ToLower(strings.TrimSpace(goal)) - if text == "" { - return TaskIntent{Hint: TaskKindChatAnswer, Confidence: 0.2, Reasons: []string{"empty_goal"}} - } - - hasTodo := containsAny(text, "todo", "待办") - hasSubAgent := containsAny(text, "subagent", "子代理") - hasWriteVerb := containsAny( - text, - "创建文件", "写入", "修改文件", "编辑文件", "新增文件", "补丁", "修复代码", - "create file", "write file", "edit file", "update file", "apply patch", - ) - hasFileTarget := containsAny(text, ".txt", ".go", ".md", ".json", ".yaml", ".yml", ".ts", ".tsx") - hasNamedWriteTarget := containsAny(text, "readme", "package.json") - hasWriteIntentToken := containsAny(text, "创建", "写", "改", "补", "edit", "write", "update", "create", "modify") - hasWrite := hasWriteVerb || ((hasFileTarget || hasNamedWriteTarget) && hasWriteIntentToken) - hasRead := containsAny( - text, - "读取", "查看", "看看", "总结", "分析", "检索", "搜索", "审查", "review", "verify", "验证", "校验", "怎么修", - "read", "grep", "glob", "list", "inspect", "analyze", "summarize", - ) - hasReviewIntent := containsAny(text, "review", "审查", "总结", "分析", "analyze", "summarize") - hasPlan := containsAny(text, "计划", "规划", "plan", "todo 列表", "todo list") - hasTodoAction := containsAny(text, "创建 todo", "更新 todo", "完成 todo", "标记 todo", "todo") - - intent := TaskIntent{Hint: TaskKindChatAnswer, Confidence: 0.35, Reasons: []string{"fallback_chat"}} - switch { - case hasSubAgent && hasWrite: - intent.Hint = TaskKindSubAgent - intent.Confidence = 0.9 - intent.Reasons = []string{"subagent_keyword", "write_intent"} - case hasSubAgent: - intent.Hint = TaskKindSubAgent - intent.Confidence = 0.82 - intent.Reasons = []string{"subagent_keyword"} - case hasTodo && hasTodoAction: - intent.Hint = TaskKindTodoState - intent.Confidence = 0.78 - intent.Reasons = []string{"todo_action_keyword"} - case hasPlan && hasTodo && !hasWrite: - intent.Hint = TaskKindTodoState - intent.Confidence = 0.84 - intent.Reasons = []string{"todo_keyword", "plan_keyword"} - case hasWrite && hasReviewIntent: - intent.Hint = TaskKindMixed - intent.Confidence = 0.75 - intent.Reasons = []string{"write_intent", "read_intent"} - case hasWrite: - intent.Hint = TaskKindWorkspaceWrite - intent.Confidence = 0.72 - intent.Reasons = []string{"write_intent"} - case hasRead: - intent.Hint = TaskKindReadOnly - intent.Confidence = 0.7 - intent.Reasons = []string{"read_intent"} - } - return intent -} - -// DeriveEffectiveTaskKind 基于运行事实修正任务类型;文本 hint 仅作回退。 -func DeriveEffectiveTaskKind(hint TaskKind, allFacts runtimefacts.RuntimeFacts, todos TodoSnapshot) TaskKind { - hasWrite := len(allFacts.Files.Written) > 0 || len(allFacts.Files.ContentMatch) > 0 - if !hasWrite && hint == TaskKindWorkspaceWrite && hasArtifactVerificationPassed(allFacts) { - hasWrite = true - } - hasVerification := len(allFacts.Verification.Passed) > 0 - hasSubAgent := len(allFacts.SubAgents.Started) > 0 || len(allFacts.SubAgents.Completed) > 0 || len(allFacts.SubAgents.Failed) > 0 - hasTodo := todos.Summary.Total > 0 || len(allFacts.Todos.CreatedIDs) > 0 || len(allFacts.Todos.CompletedIDs) > 0 || len(allFacts.Todos.FailedIDs) > 0 - hasRead := len(allFacts.Files.Exists) > 0 || len(allFacts.Commands.Executed) > 0 - - switch { - case hasSubAgent && (hasWrite || hasTodo || hasVerification): - return TaskKindMixed - case hasSubAgent: - return TaskKindSubAgent - case hasWrite && (hasTodo || hasVerification || hasRead): - return TaskKindWorkspaceWrite - case hasWrite: - return TaskKindWorkspaceWrite - case hasTodo && !hasWrite: - return TaskKindTodoState - case hasRead || hasVerification: - return TaskKindReadOnly - case strings.TrimSpace(string(hint)) != "": - return hint - default: - return TaskKindChatAnswer - } -} - -// hasArtifactVerificationPassed 判断是否存在与产物路径绑定的通过验证事实。 -func hasArtifactVerificationPassed(allFacts runtimefacts.RuntimeFacts) bool { - for _, fact := range allFacts.Verification.Passed { - scope := strings.TrimSpace(fact.Scope) - if strings.HasPrefix(strings.ToLower(scope), "artifact:") { - return true - } - } - return false -} - -// containsAny 判断文本是否包含任一关键词。 -func containsAny(text string, keywords ...string) bool { - for _, keyword := range keywords { - if strings.Contains(text, strings.ToLower(strings.TrimSpace(keyword))) { - return true - } - } - return false -} diff --git a/internal/runtime/decider/infer_test.go b/internal/runtime/decider/infer_test.go deleted file mode 100644 index b6c8fe05..00000000 --- a/internal/runtime/decider/infer_test.go +++ /dev/null @@ -1,117 +0,0 @@ -package decider - -import ( - "testing" - - runtimefacts "neo-code/internal/runtime/facts" -) - -func TestInferTaskKind(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - goal string - want TaskKind - }{ - { - name: "todo plan", - goal: "请创建 todo 列表并规划后续任务", - want: TaskKindTodoState, - }, - { - name: "workspace write", - goal: "创建文件 test.txt 并写入 1", - want: TaskKindWorkspaceWrite, - }, - { - name: "review read only", - goal: "review README.md 并总结风险", - want: TaskKindReadOnly, - }, - { - name: "mixed write and review", - goal: "edit main.go then review changes", - want: TaskKindMixed, - }, - { - name: "subagent explicit", - goal: "用 subagent 创建 test1.txt,内容为 1", - want: TaskKindSubAgent, - }, - { - name: "chat answer fallback", - goal: "什么是 NeoCode", - want: TaskKindChatAnswer, - }, - { - name: "greeting chat answer", - goal: "你好", - want: TaskKindChatAnswer, - }, - { - name: "bug fix discussion should not be workspace write", - goal: "帮我看看这个 bug 怎么修", - want: TaskKindReadOnly, - }, - { - name: "review implementation should be read only", - goal: "review this implementation and suggest fixes", - want: TaskKindReadOnly, - }, - { - name: "update readme should be workspace write", - goal: "把 README 补一下", - want: TaskKindWorkspaceWrite, - }, - { - name: "todo creation should be todo state", - goal: "创建一个 Todo,内容是 1", - want: TaskKindTodoState, - }, - { - name: "todo content contains write target should still be todo state hint", - goal: "创建一个 Todo,内容是创建 test.txt 内容为 1", - want: TaskKindTodoState, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - got := InferTaskKind(tt.goal) - if got != tt.want { - t.Fatalf("InferTaskKind(%q) = %q, want %q", tt.goal, got, tt.want) - } - }) - } -} - -func TestDeriveEffectiveTaskKindFactsOverrideHint(t *testing.T) { - t.Parallel() - - if got := DeriveEffectiveTaskKind(TaskKindChatAnswer, runtimefacts.RuntimeFacts{ - Files: runtimefacts.FileFacts{ - Written: []runtimefacts.FileWriteFact{{Path: "a.txt", WorkspaceWrite: true}}, - }, - }, TodoSnapshot{}); got != TaskKindWorkspaceWrite { - t.Fatalf("effective kind = %q, want %q", got, TaskKindWorkspaceWrite) - } - - if got := DeriveEffectiveTaskKind(TaskKindWorkspaceWrite, runtimefacts.RuntimeFacts{ - Commands: runtimefacts.CommandFacts{ - Executed: []runtimefacts.CommandFact{{Tool: "bash", Command: "ls", Succeeded: true}}, - }, - }, TodoSnapshot{}); got != TaskKindReadOnly { - t.Fatalf("effective kind = %q, want %q", got, TaskKindReadOnly) - } - - if got := DeriveEffectiveTaskKind(TaskKindWorkspaceWrite, runtimefacts.RuntimeFacts{ - Verification: runtimefacts.VerificationFacts{ - Passed: []runtimefacts.VerificationFact{{Tool: "filesystem_write_file", Scope: "artifact:2.txt"}}, - }, - }, TodoSnapshot{}); got != TaskKindWorkspaceWrite { - t.Fatalf("effective kind = %q, want %q when artifact verification exists", got, TaskKindWorkspaceWrite) - } -} diff --git a/internal/runtime/decider/types.go b/internal/runtime/decider/types.go deleted file mode 100644 index d45eba92..00000000 --- a/internal/runtime/decider/types.go +++ /dev/null @@ -1,134 +0,0 @@ -package decider - -import "neo-code/internal/runtime/facts" - -// TaskKind 描述任务验收的主类型。 -type TaskKind string - -const ( - // TaskKindChatAnswer 表示普通问答任务。 - TaskKindChatAnswer TaskKind = "chat_answer" - // TaskKindTodoState 表示 todo 状态任务。 - TaskKindTodoState TaskKind = "todo_state" - // TaskKindWorkspaceWrite 表示工作区写入任务。 - TaskKindWorkspaceWrite TaskKind = "workspace_write" - // TaskKindSubAgent 表示显式子代理任务。 - TaskKindSubAgent TaskKind = "subagent" - // TaskKindReadOnly 表示只读分析任务。 - TaskKindReadOnly TaskKind = "read_only" - // TaskKindMixed 表示混合任务。 - TaskKindMixed TaskKind = "mixed" -) - -// DecisionStatus 表示终态决策状态。 -type DecisionStatus string - -const ( - // DecisionAccepted 表示满足收尾条件。 - DecisionAccepted DecisionStatus = "accepted" - // DecisionContinue 表示仍需继续执行。 - DecisionContinue DecisionStatus = "continue" - // DecisionFailed 表示任务失败终止。 - DecisionFailed DecisionStatus = "failed" - // DecisionBlocked 表示被外部条件阻塞。 - DecisionBlocked DecisionStatus = "blocked" - // DecisionIncomplete 表示长时间无进展后未完成终止。 - DecisionIncomplete DecisionStatus = "incomplete" -) - -// MissingFact 描述 continue 场景下缺失的客观事实。 -type MissingFact struct { - Kind string `json:"kind"` - Target string `json:"target,omitempty"` - Expected string `json:"expected,omitempty"` - Details map[string]any `json:"details,omitempty"` -} - -// RequiredAction 描述下一轮建议工具动作。 -type RequiredAction struct { - Tool string `json:"tool"` - ArgsHint map[string]any `json:"args_hint,omitempty"` -} - -// RequiredInput 描述继续执行前必须补充的人类输入。 -type RequiredInput struct { - Kind string `json:"kind"` - Message string `json:"message"` - Details map[string]any `json:"details,omitempty"` -} - -// HookGuardSignal 描述 before_completion_decision user/repo hook 产生的守卫信号。 -type HookGuardSignal struct { - HookID string `json:"hook_id,omitempty"` - Source string `json:"source,omitempty"` - Message string `json:"message,omitempty"` -} - -// TaskIntent 描述由用户文本推断出的弱意图线索。 -type TaskIntent struct { - Hint TaskKind `json:"hint,omitempty"` - Confidence float64 `json:"confidence,omitempty"` - Reasons []string `json:"reasons,omitempty"` -} - -// Decision 描述最终裁决结果。 -type Decision struct { - Status DecisionStatus `json:"status"` - StopReason string `json:"stop_reason,omitempty"` - MissingFacts []MissingFact `json:"missing_facts,omitempty"` - RequiredNextActions []RequiredAction `json:"required_next_actions,omitempty"` - RequiredInput *RequiredInput `json:"required_input,omitempty"` - IntentHint TaskKind `json:"intent_hint,omitempty"` - EffectiveTaskKind TaskKind `json:"effective_task_kind,omitempty"` - UserVisibleSummary string `json:"user_visible_summary,omitempty"` - InternalSummary string `json:"internal_summary,omitempty"` -} - -// TodoViewItem 描述决策所需 todo 快照条目。 -type TodoViewItem struct { - ID string - Content string - Status string - Required bool - Artifacts []string - FailureReason string - BlockedReason string - Revision int64 -} - -// TodoSummary 描述决策所需 todo 汇总。 -type TodoSummary struct { - Total int - RequiredTotal int - RequiredCompleted int - RequiredFailed int - RequiredOpen int -} - -// TodoSnapshot 描述决策所需 todo 快照。 -type TodoSnapshot struct { - Items []TodoViewItem - Summary TodoSummary -} - -// ProgressSnapshot 描述 no-progress 判定所需信息。 -type ProgressSnapshot struct { - FactCount int -} - -// DecisionInput 描述终态裁决输入。 -type DecisionInput struct { - RunID string - SessionID string - TaskKind TaskKind - UserGoal string - Facts facts.RuntimeFacts - Todos TodoSnapshot - Progress ProgressSnapshot - LastAssistantText string - CompletionPassed bool - CompletionReason string - NoProgressExceeded bool - HookAnnotations []string - HookGuards []HookGuardSignal -} diff --git a/internal/runtime/events.go b/internal/runtime/events.go index 0e4dbbed..0809b092 100644 --- a/internal/runtime/events.go +++ b/internal/runtime/events.go @@ -3,7 +3,6 @@ package runtime import ( "time" - "neo-code/internal/runtime/acceptance" "neo-code/internal/runtime/acceptgate" "neo-code/internal/runtime/controlplane" "neo-code/internal/runtime/verify" @@ -78,9 +77,9 @@ type VerificationStageFinishedPayload struct { // VerificationFinishedPayload 描述整体验证流程结束事件。 type VerificationFinishedPayload struct { - AcceptanceStatus acceptance.AcceptanceStatus `json:"acceptance_status"` - StopReason controlplane.StopReason `json:"stop_reason,omitempty"` - ErrorClass verify.ErrorClass `json:"error_class,omitempty"` + AcceptanceStatus string `json:"acceptance_status"` + StopReason controlplane.StopReason `json:"stop_reason,omitempty"` + ErrorClass verify.ErrorClass `json:"error_class,omitempty"` } // VerificationCompletedPayload 描述验证通过并可完成的事件。 @@ -96,15 +95,15 @@ type VerificationFailedPayload struct { // AcceptanceDecidedPayload 描述 acceptance engine 决议结果。 type AcceptanceDecidedPayload struct { - Status acceptance.AcceptanceStatus `json:"status"` - StopReason controlplane.StopReason `json:"stop_reason,omitempty"` - ErrorClass verify.ErrorClass `json:"error_class,omitempty"` - CompletionBlockedReason string `json:"completion_blocked_reason,omitempty"` - UserVisibleSummary string `json:"user_visible_summary,omitempty"` - InternalSummary string `json:"internal_summary,omitempty"` - ContinueHint string `json:"continue_hint,omitempty"` - Summary string `json:"summary,omitempty"` - Results []acceptgate.CheckResult `json:"results,omitempty"` + Status string `json:"status"` + StopReason controlplane.StopReason `json:"stop_reason,omitempty"` + ErrorClass verify.ErrorClass `json:"error_class,omitempty"` + CompletionBlockedReason string `json:"completion_blocked_reason,omitempty"` + UserVisibleSummary string `json:"user_visible_summary,omitempty"` + InternalSummary string `json:"internal_summary,omitempty"` + ContinueHint string `json:"continue_hint,omitempty"` + Summary string `json:"summary,omitempty"` + Results []acceptgate.CheckResult `json:"results,omitempty"` } // LedgerReconciledPayload 为账本对账预留负载。 @@ -524,16 +523,16 @@ type RunDiffSummaryPayload struct { // UserQuestionRequestedPayload 描述 ask_user 提问事件负载。 type UserQuestionRequestedPayload struct { - RequestID string `json:"request_id"` - QuestionID string `json:"question_id"` - Title string `json:"title"` - Description string `json:"description"` - Kind string `json:"kind"` - Options []any `json:"options,omitempty"` - Required bool `json:"required"` - AllowSkip bool `json:"allow_skip"` - MaxChoices int `json:"max_choices,omitempty"` - TimeoutSec int `json:"timeout_sec,omitempty"` + RequestID string `json:"request_id"` + QuestionID string `json:"question_id"` + Title string `json:"title"` + Description string `json:"description"` + Kind string `json:"kind"` + Options []any `json:"options,omitempty"` + Required bool `json:"required"` + AllowSkip bool `json:"allow_skip"` + MaxChoices int `json:"max_choices,omitempty"` + TimeoutSec int `json:"timeout_sec,omitempty"` } // UserQuestionResolvedPayload 描述 ask_user 回答/跳过/超时事件负载。 diff --git a/internal/runtime/facts/collector.go b/internal/runtime/facts/collector.go index fa1ba230..a4a78f60 100644 --- a/internal/runtime/facts/collector.go +++ b/internal/runtime/facts/collector.go @@ -110,7 +110,7 @@ func (c *Collector) ApplyToolResult(toolName string, result tools.ToolResult) { c.applyVerificationFacts(name, result) } -// applyWorkspaceWritePathFacts 将工具 metadata 中声明的写入路径转成可被 decider 验收的文件事实。 +// applyWorkspaceWritePathFacts 将工具 metadata 中声明的写入路径转成可被 Accept Gate 消费的文件事实。 func (c *Collector) applyWorkspaceWritePathFacts(result tools.ToolResult, source string) { if !result.Facts.WorkspaceWrite { return diff --git a/internal/runtime/final_acceptance.go b/internal/runtime/final_acceptance.go deleted file mode 100644 index c55353e8..00000000 --- a/internal/runtime/final_acceptance.go +++ /dev/null @@ -1,812 +0,0 @@ -package runtime - -import ( - "context" - "encoding/json" - "fmt" - "slices" - "strings" - - providertypes "neo-code/internal/provider/types" - "neo-code/internal/runtime/acceptance" - "neo-code/internal/runtime/controlplane" - "neo-code/internal/runtime/decider" - runtimefacts "neo-code/internal/runtime/facts" - "neo-code/internal/runtime/verify" - agentsession "neo-code/internal/session" -) - -const finalContinueReminder = "There are unfinished required todos or unmet acceptance checks. Continue execution. Do not finalize yet." - -// beforeAcceptFinal 在 runtime 接受模型 final 前执行唯一的 completion/verifier/acceptance 闭环。 -func (s *Service) beforeAcceptFinal( - ctx context.Context, - state *runState, - snapshot TurnBudgetSnapshot, - assistant providertypes.Message, - completionPassed bool, - signals beforeCompletionHookSignals, -) (acceptance.AcceptanceDecision, error) { - if state == nil { - return acceptance.AcceptanceDecision{}, nil - } - - maxNoProgress := resolveNoProgressStreakLimit(snapshot.Config.Runtime) - noProgressStreak := state.finalInterceptStreak - if noProgressStreak < 0 { - noProgressStreak = 0 - } - if state.mustUseToolAfterFinalContinue && state.noToolAfterFinalContinueStreak > noProgressStreak { - noProgressStreak = state.noToolAfterFinalContinueStreak - } - - input := s.buildAcceptanceServiceInput( - state, - snapshot, - assistant, - completionPassed, - signals, - noProgressStreak, - maxNoProgress, - ) - service := &acceptanceService{} - acceptanceDecision, err := service.Decide(ctx, input) - if err != nil { - return acceptance.AcceptanceDecision{}, err - } - deciderDecision := toDeciderDecisionFromAcceptance(acceptanceDecision) - state.mu.Lock() - state.lastDeciderDecision = deciderDecision - pendingFinalProgress := state.pendingFinalProgress - state.mu.Unlock() - s.emitRunScopedOptional(EventDecisionMade, state, deciderDecision) - s.emitRuntimeSnapshotUpdated(ctx, state, "decision_made") - if acceptanceDecision.Status == acceptance.AcceptanceContinue && pendingFinalProgress { - acceptanceDecision.HasProgress = true - } - return acceptanceDecision, nil -} - -// buildAcceptanceServiceInput 从当前运行态抽取 AcceptanceService 所需输入。 -func (s *Service) buildAcceptanceServiceInput( - state *runState, - snapshot TurnBudgetSnapshot, - assistant providertypes.Message, - completionPassed bool, - signals beforeCompletionHookSignals, - noProgressStreak int, - maxNoProgress int, -) acceptanceServiceInput { - state.mu.Lock() - taskKind := state.taskKind - userGoal := state.userGoal - completionReason := strings.TrimSpace(string(state.completion.CompletionBlockedReason)) - verificationProfile := state.session.TaskState.VerificationProfile - sessionMessages := append([]providertypes.Message(nil), state.session.Messages...) - sessionTodos := cloneTodosForPersistence(state.session.Todos) - sessionTaskState := state.session.TaskState - todoSnapshot := buildTodoSnapshotFromItems(sessionTodos) - factsSnapshot := runtimefacts.RuntimeFacts{} - if state.factsCollector != nil { - factsSnapshot = state.factsCollector.Snapshot() - } - taskID := strings.TrimSpace(state.taskID) - runID := strings.TrimSpace(state.runID) - sessionID := strings.TrimSpace(state.session.ID) - turn := state.turn - maxTurnsReached := state.maxTurnsReached - maxTurns := resolveRuntimeMaxTurns(snapshot.Config.Runtime) - state.mu.Unlock() - - if strings.TrimSpace(userGoal) == "" { - userGoal = renderPartsForVerification(assistant.Parts) - } - if strings.TrimSpace(string(taskKind)) == "" { - taskKind = decider.InferTaskKind(userGoal) - } - if todoSnapshot.Summary.RequiredOpen > 0 { - completionPassed = false - if completionReason == "" { - completionReason = string(controlplane.CompletionBlockedReasonPendingTodo) - } - } - if !maxTurnsReached && maxTurns > 0 && turn+1 >= maxTurns { - maxTurnsReached = true - } - verifyInput := verify.FinalVerifyInput{ - SessionID: sessionID, - RunID: runID, - TaskID: taskID, - Workdir: snapshot.Workdir, - Messages: buildVerifyMessages(sessionMessages), - Todos: buildVerifyTodos(sessionTodos), - LastAssistantFinal: renderPartsForVerification(assistant.Parts), - TaskState: buildVerifyTaskState(sessionTaskState), - RuntimeState: verify.RuntimeStateSnapshot{ - Turn: turn, - MaxTurns: maxTurns, - MaxTurnsReached: maxTurnsReached, - FinalInterceptStreak: noProgressStreak, - }, - VerificationConfig: snapshot.Config.Runtime.Verification.Clone(), - } - return acceptanceServiceInput{ - RunID: runID, - SessionID: sessionID, - TaskKind: taskKind, - UserGoal: userGoal, - CompletionPassed: completionPassed, - CompletionBlockedReason: completionReason, - Facts: factsSnapshot, - Todos: toDeciderTodoSnapshot(todoSnapshot), - Progress: toDeciderProgress(factsSnapshot), - LastAssistantText: renderPartsForVerification(assistant.Parts), - HookAnnotations: append([]string(nil), signals.Annotations...), - HookGuards: append([]decider.HookGuardSignal(nil), signals.Guards...), - NoProgressStreak: noProgressStreak, - MaxNoProgress: maxNoProgress, - VerificationProfile: verificationProfile, - VerificationInput: verifyInput, - } -} - -// toDeciderDecisionFromAcceptance 将统一 acceptance 决策投影为 runtime snapshot 兼容的 decider 视图。 -func toDeciderDecisionFromAcceptance(decision acceptance.AcceptanceDecision) decider.Decision { - status := decider.DecisionContinue - switch decision.Status { - case acceptance.AcceptanceAccepted: - status = decider.DecisionAccepted - case acceptance.AcceptanceFailed: - status = decider.DecisionFailed - case acceptance.AcceptanceIncomplete: - status = decider.DecisionIncomplete - } - return decider.Decision{ - Status: status, - StopReason: strings.TrimSpace(string(decision.StopReason)), - MissingFacts: append([]decider.MissingFact(nil), decision.MissingFacts...), - RequiredNextActions: append([]decider.RequiredAction(nil), decision.RequiredNextActions...), - RequiredInput: cloneRequiredInput(decision.RequiredInput), - IntentHint: decision.IntentHint, - EffectiveTaskKind: decision.EffectiveTaskKind, - UserVisibleSummary: strings.TrimSpace(decision.UserVisibleSummary), - InternalSummary: strings.TrimSpace(decision.InternalSummary), - } -} - -func cloneRequiredInput(in *decider.RequiredInput) *decider.RequiredInput { - if in == nil { - return nil - } - cloned := *in - if len(in.Details) == 0 { - return &cloned - } - cloned.Details = make(map[string]any, len(in.Details)) - for k, v := range in.Details { - cloned.Details[k] = v - } - return &cloned -} - -// mapDeciderDecisionToAcceptance 把 FinalDecider 裁决映射到 acceptance 协议。 -// Deprecated: 仅保留给 legacy 回滚对照与测试使用;P7 主链直接消费 AcceptanceService.Decide 产物。 -func mapDeciderDecisionToAcceptance(decision decider.Decision) acceptance.AcceptanceDecision { - out := acceptance.AcceptanceDecision{ - StopReason: toControlplaneStopReason(decision.StopReason), - RequiredInput: cloneRequiredInput(decision.RequiredInput), - IntentHint: decision.IntentHint, - EffectiveTaskKind: decision.EffectiveTaskKind, - UserVisibleSummary: strings.TrimSpace(decision.UserVisibleSummary), - InternalSummary: strings.TrimSpace(decision.InternalSummary), - ContinueHint: buildDeciderContinueHint(decision), - } - switch decision.Status { - case decider.DecisionAccepted: - out.Status = acceptance.AcceptanceAccepted - if out.StopReason == "" { - out.StopReason = controlplane.StopReasonAccepted - } - case decider.DecisionFailed, decider.DecisionBlocked: - out.Status = acceptance.AcceptanceFailed - if out.StopReason == "" { - out.StopReason = controlplane.StopReasonVerificationFailed - } - case decider.DecisionIncomplete: - out.Status = acceptance.AcceptanceIncomplete - if out.StopReason == "" { - out.StopReason = controlplane.StopReasonNoProgressAfterFinalIntercept - } - default: - out.Status = acceptance.AcceptanceContinue - if out.StopReason == "" { - out.StopReason = controlplane.StopReasonTodoNotConverged - } - } - return out -} - -// toDeciderTodoSnapshot 转换 runtime todo 快照到 decider 输入结构。 -func toDeciderTodoSnapshot(snapshot TodoSnapshot) decider.TodoSnapshot { - out := decider.TodoSnapshot{ - Summary: decider.TodoSummary{ - Total: snapshot.Summary.Total, - RequiredTotal: snapshot.Summary.RequiredTotal, - RequiredCompleted: snapshot.Summary.RequiredCompleted, - RequiredFailed: snapshot.Summary.RequiredFailed, - RequiredOpen: snapshot.Summary.RequiredOpen, - }, - } - if len(snapshot.Items) == 0 { - return out - } - out.Items = make([]decider.TodoViewItem, 0, len(snapshot.Items)) - for _, item := range snapshot.Items { - out.Items = append(out.Items, decider.TodoViewItem{ - ID: strings.TrimSpace(item.ID), - Content: strings.TrimSpace(item.Content), - Status: strings.TrimSpace(item.Status), - Required: item.Required, - Artifacts: append([]string(nil), item.Artifacts...), - FailureReason: strings.TrimSpace(item.FailureReason), - BlockedReason: strings.TrimSpace(item.BlockedReason), - Revision: item.Revision, - }) - } - return out -} - -// toDeciderProgress 构建 decider 所需的最小进度快照。 -func toDeciderProgress(factsSnapshot runtimefacts.RuntimeFacts) decider.ProgressSnapshot { - return decider.ProgressSnapshot{ - FactCount: max(0, factsSnapshot.Progress.ObservedFactCount), - } -} - -// toControlplaneStopReason 把 decider stop reason 映射为 controlplane 枚举。 -func toControlplaneStopReason(reason string) controlplane.StopReason { - normalized := strings.TrimSpace(reason) - switch normalized { - case string(controlplane.StopReasonAccepted): - return controlplane.StopReasonAccepted - case string(controlplane.StopReasonTodoNotConverged): - return controlplane.StopReasonTodoNotConverged - case string(controlplane.StopReasonNoProgressAfterFinalIntercept): - return controlplane.StopReasonNoProgressAfterFinalIntercept - case string(controlplane.StopReasonRequiredTodoFailed): - return controlplane.StopReasonRequiredTodoFailed - case string(controlplane.StopReasonVerificationFailed): - return controlplane.StopReasonVerificationFailed - case string(controlplane.StopReasonTodoWaitingExternal): - return controlplane.StopReasonTodoWaitingExternal - case string(controlplane.StopReasonVerificationConfigMissing): - return controlplane.StopReasonVerificationConfigMissing - case string(controlplane.StopReasonVerificationExecutionDenied): - return controlplane.StopReasonVerificationExecutionDenied - case string(controlplane.StopReasonVerificationExecutionError): - return controlplane.StopReasonVerificationExecutionError - default: - return "" - } -} - -// buildDeciderContinueHint 生成 FinalDecider continue 场景下的结构化执行提示。 -func buildDeciderContinueHint(decision decider.Decision) string { - if decision.Status != decider.DecisionContinue { - return "" - } - var builder strings.Builder - builder.WriteString("\n") - if summary := strings.TrimSpace(decision.UserVisibleSummary); summary != "" { - builder.WriteString("") - builder.WriteString(xmlEscape(summary)) - builder.WriteString("\n") - } - if len(decision.MissingFacts) > 0 { - builder.WriteString("\n") - for _, fact := range decision.MissingFacts { - builder.WriteString(fmt.Sprintf( - "%s\n", - xmlEscape(strings.TrimSpace(fact.Kind)), - xmlEscape(strings.TrimSpace(fact.Target)), - xmlEscape(evidenceJSONPreview(fact.Details)), - )) - } - builder.WriteString("\n") - } - if len(decision.RequiredNextActions) > 0 { - builder.WriteString("\n") - for _, action := range decision.RequiredNextActions { - builder.WriteString(fmt.Sprintf( - "%s\n", - xmlEscape(strings.TrimSpace(action.Tool)), - xmlEscape(evidenceJSONPreview(action.ArgsHint)), - )) - } - builder.WriteString("\n") - } - builder.WriteString("Do not claim completion with plain text. Call tools to produce objective facts before final response.\n") - builder.WriteString("") - return strings.TrimSpace(builder.String()) -} - -// beforeAcceptFinalLegacy 是历史 acceptance/verify 实现,仅用于回滚对照与测试覆盖。 -// Deprecated: P7 主链不再调用该路径,最终裁决统一走 beforeAcceptFinal -> AcceptanceService。 -func (s *Service) beforeAcceptFinalLegacy( - ctx context.Context, - state *runState, - snapshot TurnBudgetSnapshot, - assistant providertypes.Message, - completionPassed bool, -) (acceptance.AcceptanceDecision, error) { - if state == nil { - return acceptance.AcceptanceDecision{}, nil - } - - verificationCfg := snapshot.Config.Runtime.Verification.Clone() - policy := acceptance.DefaultPolicy{ - Executor: verify.PolicyCommandExecutor{}, - } - engine := acceptance.NewEngine(policy) - - maxNoProgress := resolveNoProgressStreakLimit(snapshot.Config.Runtime) - noProgressStreak := state.finalInterceptStreak - if noProgressStreak < 0 { - noProgressStreak = 0 - } - if state.mustUseToolAfterFinalContinue && state.noToolAfterFinalContinueStreak > noProgressStreak { - noProgressStreak = state.noToolAfterFinalContinueStreak - } - maxTurnsLimit := state.maxTurnsLimit - maxTurnsReached := state.maxTurnsReached - if !maxTurnsReached { - resolvedMaxTurns := resolveRuntimeMaxTurns(snapshot.Config.Runtime) - if resolvedMaxTurns > 0 && state.turn+1 >= resolvedMaxTurns { - maxTurnsReached = true - maxTurnsLimit = resolvedMaxTurns - } - } - - input := acceptance.FinalAcceptanceInput{ - CompletionGate: acceptance.CompletionGateDecision{ - Passed: completionPassed, - Reason: string(state.completion.CompletionBlockedReason), - }, - VerificationInput: verify.FinalVerifyInput{ - SessionID: state.session.ID, - RunID: state.runID, - TaskID: state.taskID, - Workdir: snapshot.Workdir, - Messages: buildVerifyMessages(state.session.Messages), - Todos: buildVerifyTodos(state.session.Todos), - LastAssistantFinal: renderPartsForVerification(assistant.Parts), - ToolResults: nil, - TaskState: buildVerifyTaskState(state.session.TaskState), - RuntimeState: verify.RuntimeStateSnapshot{ - Turn: state.turn, - MaxTurns: resolveRuntimeMaxTurns(snapshot.Config.Runtime), - MaxTurnsReached: maxTurnsReached, - FinalInterceptStreak: noProgressStreak, - }, - VerificationConfig: verificationCfg, - }, - NoProgressExceeded: noProgressStreak >= maxNoProgress, - MaxTurnsReached: maxTurnsReached, - MaxTurnsLimit: maxTurnsLimit, - } - - decision, err := engine.EvaluateFinal(ctx, input) - if err != nil { - return acceptance.AcceptanceDecision{}, err - } - if decision.Status == acceptance.AcceptanceContinue && len(decision.VerifierResults) == 0 { - if synthetic := synthesizeTodoConvergenceEvidence(state.session.Todos); synthetic != nil { - decision.VerifierResults = append(decision.VerifierResults, *synthetic) - } - } - if decision.Status == acceptance.AcceptanceContinue && state.pendingFinalProgress { - decision.HasProgress = true - } - if strings.TrimSpace(decision.CompletionBlockedReason) == "" { - decision.CompletionBlockedReason = strings.TrimSpace(string(state.completion.CompletionBlockedReason)) - } - if decision.Status == acceptance.AcceptanceContinue { - decision.ContinueHint = buildAcceptanceContinueHint(decision) - } - return decision, nil -} - -// synthesizeTodoConvergenceEvidence 在 completion gate 拦截且 verifier 未运行时,回填 todo 证据供 continue hint 使用。 -func synthesizeTodoConvergenceEvidence(todos []agentsession.TodoItem) *verify.VerificationResult { - if len(todos) == 0 { - return nil - } - pendingIDs := make([]string, 0) - inProgressIDs := make([]string, 0) - blockedIDs := make([]string, 0) - statusByID := make(map[string]string) - artifactsByID := make(map[string][]string) - checksByID := make(map[string][]verify.TodoContentCheckSnapshot) - - for _, todo := range todos { - if !todo.RequiredValue() { - continue - } - id := strings.TrimSpace(todo.ID) - if id == "" { - continue - } - status := strings.TrimSpace(string(todo.Status)) - statusByID[id] = status - switch status { - case string(agentsession.TodoStatusPending): - pendingIDs = append(pendingIDs, id) - case string(agentsession.TodoStatusInProgress): - inProgressIDs = append(inProgressIDs, id) - case string(agentsession.TodoStatusBlocked): - blockedIDs = append(blockedIDs, id) - } - if len(todo.Artifacts) > 0 { - artifactsByID[id] = append([]string(nil), todo.Artifacts...) - } - if len(todo.ContentChecks) > 0 { - checksByID[id] = buildVerifyTodoContentChecks(todo.ContentChecks) - } - } - - if len(pendingIDs) == 0 && len(inProgressIDs) == 0 && len(blockedIDs) == 0 { - return nil - } - slices.Sort(pendingIDs) - slices.Sort(inProgressIDs) - slices.Sort(blockedIDs) - - return &verify.VerificationResult{ - Name: "todo_convergence", - Status: verify.VerificationSoftBlock, - Summary: "required todos are not converged", - Reason: "required todos are still pending, in progress, or blocked", - Evidence: map[string]any{ - "pending_ids": pendingIDs, - "in_progress_ids": inProgressIDs, - "blocked_ids": blockedIDs, - "todo_statuses": statusByID, - "todo_artifacts": artifactsByID, - "todo_checks": checksByID, - }, - } -} - -// buildAcceptanceContinueHint 构造带 verifier 证据的 continue 提示,强制下一轮先补工具事实再尝试 final。 -func buildAcceptanceContinueHint(decision acceptance.AcceptanceDecision) string { - const actionDirective = "Do not claim completion with plain text. Next turn MUST call todo_write and/or verification tools to add objective facts before any final response." - blockedReason := strings.TrimSpace(decision.CompletionBlockedReason) - if len(decision.VerifierResults) == 0 && blockedReason == "" { - if base := strings.TrimSpace(decision.ContinueHint); base != "" { - return strings.TrimSpace(base + "\n" + actionDirective) - } - return strings.TrimSpace(finalContinueReminder + "\n" + actionDirective) - } - - var builder strings.Builder - builder.WriteString("\n") - if blockedReason != "" { - builder.WriteString(fmt.Sprintf("%s\n", xmlEscape(blockedReason))) - } - builder.WriteString("") - builder.WriteString(actionDirective) - builder.WriteString("\n") - - if section := renderCompletionBlockedReasonHintSection(blockedReason, decision.VerifierResults); section != "" { - builder.WriteString(section) - } - if section := renderTodoConvergenceHintSection(decision.VerifierResults); section != "" { - builder.WriteString(section) - } - if section := renderVerifierFailureHintSection(decision.VerifierResults); section != "" { - builder.WriteString(section) - } - builder.WriteString("") - return strings.TrimSpace(builder.String()) -} - -// renderCompletionBlockedReasonHintSection 根据 completion gate 阻塞原因输出结构化执行指令。 -func renderCompletionBlockedReasonHintSection( - blockedReason string, - results []verify.VerificationResult, -) string { - switch strings.TrimSpace(blockedReason) { - case string(controlplane.CompletionBlockedReasonPendingTodo): - pending := extractPendingTodoIDs(results) - if len(pending) == 0 { - return "Use todo_write to move required todos to terminal states, then retry acceptance.\n" - } - return fmt.Sprintf( - "%sUse todo_write to close these required todos before final response.\n", - strings.Join(pending, ","), - ) - case string(controlplane.CompletionBlockedReasonUnverifiedWrite): - return "Produce VerificationPerformed and VerificationPassed facts via verification tools before final response.\n" - case string(controlplane.CompletionBlockedReasonPostExecuteClosureRequired): - return "First close loop from latest tool results (todo updates/artifact checks), then retry final acceptance.\n" - default: - return "" - } -} - -// extractPendingTodoIDs 从 verifier 证据提取 required 未收敛 todo 列表。 -func extractPendingTodoIDs(results []verify.VerificationResult) []string { - for _, result := range results { - if strings.TrimSpace(result.Name) != "todo_convergence" { - continue - } - evidence := result.Evidence - if len(evidence) == 0 { - return nil - } - ids := append([]string{}, evidenceStringList(evidence["pending_ids"])...) - ids = append(ids, evidenceStringList(evidence["in_progress_ids"])...) - ids = append(ids, evidenceStringList(evidence["blocked_ids"])...) - return normalizeEvidenceList(ids) - } - return nil -} - -// renderTodoConvergenceHintSection 渲染 todo_convergence 证据,明确 pending/in_progress/blocked 清单。 -func renderTodoConvergenceHintSection(results []verify.VerificationResult) string { - for _, result := range results { - if strings.TrimSpace(result.Name) != "todo_convergence" { - continue - } - evidence := result.Evidence - if len(evidence) == 0 { - return "" - } - pending := evidenceStringList(evidence["pending_ids"]) - inProgress := evidenceStringList(evidence["in_progress_ids"]) - blocked := evidenceStringList(evidence["blocked_ids"]) - waitingExternal := evidenceStringList(evidence["waiting_external_ids"]) - statuses := evidenceJSONPreview(evidence["todo_statuses"]) - artifacts := evidenceJSONPreview(evidence["todo_artifacts"]) - checks := evidenceJSONPreview(evidence["todo_checks"]) - - var builder strings.Builder - builder.WriteString("\n") - builder.WriteString(fmt.Sprintf("%s\n", strings.Join(pending, ","))) - builder.WriteString(fmt.Sprintf("%s\n", strings.Join(inProgress, ","))) - builder.WriteString(fmt.Sprintf("%s\n", strings.Join(blocked, ","))) - if len(waitingExternal) > 0 { - builder.WriteString(fmt.Sprintf("%s\n", strings.Join(waitingExternal, ","))) - } - if statuses != "" { - builder.WriteString(fmt.Sprintf("%s\n", xmlEscape(statuses))) - } - if artifacts != "" { - builder.WriteString(fmt.Sprintf("%s\n", xmlEscape(artifacts))) - } - if checks != "" { - builder.WriteString(fmt.Sprintf("%s\n", xmlEscape(checks))) - } - builder.WriteString("For each listed todo, use todo_write status transitions and attach artifacts/check facts via tools. Do not finalize yet.\n") - builder.WriteString("\n") - return builder.String() - } - return "" -} - -// renderVerifierFailureHintSection 渲染非通过 verifier 的摘要,避免 continue 只有泛化提醒。 -func renderVerifierFailureHintSection(results []verify.VerificationResult) string { - nonPass := make([]verify.VerificationResult, 0, len(results)) - for _, result := range results { - if result.Status == verify.VerificationPass { - continue - } - nonPass = append(nonPass, result) - } - if len(nonPass) == 0 { - return "" - } - sortVerificationResults(nonPass) - - var builder strings.Builder - builder.WriteString("\n") - for _, result := range nonPass { - builder.WriteString(fmt.Sprintf( - "%s%s\n", - xmlEscape(strings.TrimSpace(result.Name)), - xmlEscape(string(result.Status)), - xmlEscape(strings.TrimSpace(result.Summary)), - xmlEscape(strings.TrimSpace(result.Reason)), - )) - } - builder.WriteString("\n") - return builder.String() -} - -// evidenceStringList 将 verifier evidence 中的字符串列表统一提取为去重、去空白后的有序值。 -func evidenceStringList(value any) []string { - switch typed := value.(type) { - case []string: - return normalizeEvidenceList(typed) - case []any: - values := make([]string, 0, len(typed)) - for _, item := range typed { - switch raw := item.(type) { - case string: - values = append(values, raw) - default: - if encoded, err := json.Marshal(raw); err == nil { - values = append(values, string(encoded)) - } - } - } - return normalizeEvidenceList(values) - default: - return nil - } -} - -// evidenceJSONPreview 将 evidence 任意结构转成紧凑 JSON 文本,便于作为提示中的可执行事实。 -func evidenceJSONPreview(value any) string { - if value == nil { - return "" - } - encoded, err := json.Marshal(value) - if err != nil { - return "" - } - return strings.TrimSpace(string(encoded)) -} - -// normalizeEvidenceList 对 evidence 文本列表做去重与排序,保证提示稳定可测。 -func normalizeEvidenceList(values []string) []string { - if len(values) == 0 { - return nil - } - seen := make(map[string]struct{}, len(values)) - normalized := make([]string, 0, len(values)) - for _, value := range values { - trimmed := strings.TrimSpace(value) - if trimmed == "" { - continue - } - if _, exists := seen[trimmed]; exists { - continue - } - seen[trimmed] = struct{}{} - normalized = append(normalized, trimmed) - } - if len(normalized) == 0 { - return nil - } - slices.Sort(normalized) - return normalized -} - -// sortVerificationResults 保证 verifier 输出顺序稳定,减少提示抖动。 -func sortVerificationResults(results []verify.VerificationResult) { - slices.SortFunc(results, func(a verify.VerificationResult, b verify.VerificationResult) int { - return strings.Compare(strings.TrimSpace(a.Name), strings.TrimSpace(b.Name)) - }) -} - -// xmlEscape 对可见提示中的 verifier 文本做最小转义,避免破坏 XML 结构。 -func xmlEscape(value string) string { - replacer := strings.NewReplacer( - "&", "&", - "<", "<", - ">", ">", - "\"", """, - "'", "'", - ) - return replacer.Replace(value) -} - -// recordAcceptanceTerminal 将 acceptance 输出映射为 runtime 唯一终态记录。 -func recordAcceptanceTerminal(state *runState, decision acceptance.AcceptanceDecision) { - if state == nil { - return - } - status := acceptance.TerminalStatusFromAcceptance(decision.Status) - state.markTerminalDecision(status, decision.StopReason, strings.TrimSpace(decision.InternalSummary)) -} - -// buildVerifyTodos 将 session todo 转换为 verifier 快照。 -func buildVerifyTodos(items []agentsession.TodoItem) []verify.TodoSnapshot { - if len(items) == 0 { - return nil - } - todos := make([]verify.TodoSnapshot, 0, len(items)) - for _, item := range items { - todos = append(todos, verify.TodoSnapshot{ - ID: strings.TrimSpace(item.ID), - Content: strings.TrimSpace(item.Content), - Status: strings.TrimSpace(string(item.Status)), - Required: item.RequiredValue(), - BlockedReason: strings.TrimSpace(string(item.BlockedReason)), - Acceptance: append([]string(nil), item.Acceptance...), - Artifacts: append([]string(nil), item.Artifacts...), - Supersedes: append([]string(nil), item.Supersedes...), - ContentChecks: buildVerifyTodoContentChecks(item.ContentChecks), - RetryCount: item.RetryCount, - RetryLimit: item.RetryLimit, - FailureReason: strings.TrimSpace(item.FailureReason), - }) - } - return todos -} - -// buildVerifyTodoContentChecks 将 session 内容校验规则转换为 verifier 快照。 -func buildVerifyTodoContentChecks(items []agentsession.TodoContentCheck) []verify.TodoContentCheckSnapshot { - if len(items) == 0 { - return nil - } - checks := make([]verify.TodoContentCheckSnapshot, 0, len(items)) - for _, item := range items { - checks = append(checks, verify.TodoContentCheckSnapshot{ - Artifact: strings.TrimSpace(item.Artifact), - Contains: append([]string(nil), item.Contains...), - }) - } - return checks -} - -// buildVerifyTaskState 将 task_state 中与验收相关的结构化字段投影给 verifier。 -func buildVerifyTaskState(state agentsession.TaskState) verify.TaskStateSnapshot { - return verify.TaskStateSnapshot{ - VerificationProfile: string(state.VerificationProfile), - KeyArtifacts: append([]string(nil), state.KeyArtifacts...), - } -} - -// buildVerifyMessages 将会话消息压缩为 verifier 所需的最小快照。 -func buildVerifyMessages(messages []providertypes.Message) []verify.MessageLike { - if len(messages) == 0 { - return nil - } - out := make([]verify.MessageLike, 0, len(messages)) - for _, message := range messages { - out = append(out, verify.MessageLike{ - Role: strings.TrimSpace(message.Role), - Content: renderPartsForVerification(message.Parts), - }) - } - return out -} - -// renderPartsForVerification 将消息分片合并为 verifier 侧可读文本。 -func renderPartsForVerification(parts []providertypes.ContentPart) string { - if len(parts) == 0 { - return "" - } - segments := make([]string, 0, len(parts)) - for _, part := range parts { - if part.Kind != providertypes.ContentPartText { - continue - } - text := strings.TrimSpace(part.Text) - if text == "" { - continue - } - segments = append(segments, text) - } - return strings.Join(segments, "\n") -} - -// applyAcceptanceResultProgress 根据 acceptance 输出更新 final 拦截计数唯一真相源。 -func applyAcceptanceResultProgress(state *runState, decision acceptance.AcceptanceDecision) { - if state == nil { - return - } - switch decision.Status { - case acceptance.AcceptanceContinue: - if state.pendingFinalProgress { - state.finalInterceptStreak = 0 - } else { - state.finalInterceptStreak++ - } - default: - state.finalInterceptStreak = 0 - } - state.pendingFinalProgress = false -} diff --git a/internal/runtime/final_acceptance_additional_test.go b/internal/runtime/final_acceptance_additional_test.go deleted file mode 100644 index b706002b..00000000 --- a/internal/runtime/final_acceptance_additional_test.go +++ /dev/null @@ -1,107 +0,0 @@ -package runtime - -import ( - "context" - "strings" - "testing" - - "neo-code/internal/config" - providertypes "neo-code/internal/provider/types" - "neo-code/internal/runtime/acceptance" - "neo-code/internal/runtime/controlplane" - "neo-code/internal/runtime/decider" - "neo-code/internal/runtime/verify" - agentsession "neo-code/internal/session" -) - -func TestFinalAcceptanceMappingAndLegacyPaths(t *testing.T) { - t.Parallel() - - t.Run("map decider statuses", func(t *testing.T) { - t.Parallel() - got := mapDeciderDecisionToAcceptance(decider.Decision{Status: decider.DecisionBlocked}) - if got.Status != acceptance.AcceptanceFailed || got.StopReason != controlplane.StopReasonVerificationFailed { - t.Fatalf("blocked mapping = %+v", got) - } - got = mapDeciderDecisionToAcceptance(decider.Decision{Status: decider.DecisionContinue}) - if got.Status != acceptance.AcceptanceContinue || got.StopReason != controlplane.StopReasonTodoNotConverged { - t.Fatalf("continue mapping = %+v", got) - } - }) - - t.Run("projection keeps required input and task kinds", func(t *testing.T) { - t.Parallel() - required := &decider.RequiredInput{ - Kind: "missing_file_target_or_content", - Message: "need target path", - Details: map[string]any{"path": "test.txt"}, - } - projected := toDeciderDecisionFromAcceptance(acceptance.AcceptanceDecision{ - Status: acceptance.AcceptanceContinue, - StopReason: controlplane.StopReasonTodoNotConverged, - RequiredInput: required, - IntentHint: decider.TaskKindWorkspaceWrite, - EffectiveTaskKind: decider.TaskKindWorkspaceWrite, - }) - if projected.RequiredInput == nil || projected.RequiredInput.Kind != "missing_file_target_or_content" { - t.Fatalf("required input lost in projection: %+v", projected) - } - if projected.IntentHint != decider.TaskKindWorkspaceWrite || projected.EffectiveTaskKind != decider.TaskKindWorkspaceWrite { - t.Fatalf("task kind hints lost in projection: %+v", projected) - } - }) - - t.Run("legacy path adds continue hint", func(t *testing.T) { - t.Parallel() - service := &Service{} - state := newRunState("run-legacy", agentsession.New("legacy")) - required := true - state.session.Todos = []agentsession.TodoItem{ - {ID: "todo-1", Status: agentsession.TodoStatusPending, Required: &required}, - } - snapshot := TurnBudgetSnapshot{Config: config.StaticDefaults().Clone(), Workdir: t.TempDir()} - decision, err := service.beforeAcceptFinalLegacy(context.Background(), &state, snapshot, providertypes.Message{}, false) - if err != nil { - t.Fatalf("beforeAcceptFinalLegacy() error = %v", err) - } - if decision.Status != acceptance.AcceptanceContinue { - t.Fatalf("legacy status = %q, want continue", decision.Status) - } - if !strings.Contains(decision.ContinueHint, "") { - t.Fatalf("continue hint = %q", decision.ContinueHint) - } - }) -} - -func TestFinalAcceptanceHelperBranches(t *testing.T) { - t.Parallel() - - if got := buildAcceptanceContinueHint(acceptance.AcceptanceDecision{ - Status: acceptance.AcceptanceContinue, - ContinueHint: "base", - VerifierResults: nil, - }); !strings.Contains(got, "base") { - t.Fatalf("continue hint fallback = %q", got) - } - - if got := renderCompletionBlockedReasonHintSection("pending_todo", nil); !strings.Contains(got, "required_action") { - t.Fatalf("pending_todo fallback hint = %q", got) - } - if got := renderCompletionBlockedReasonHintSection("unverified_write", nil); !strings.Contains(got, "VerificationPerformed") { - t.Fatalf("unverified_write hint = %q", got) - } - - results := []verify.VerificationResult{ - {Name: "z", Status: verify.VerificationSoftBlock}, - {Name: "a", Status: verify.VerificationHardBlock}, - {Name: "ok", Status: verify.VerificationPass}, - } - section := renderVerifierFailureHintSection(results) - if !strings.Contains(section, "name=\"a\"") || !strings.Contains(section, "name=\"z\"") { - t.Fatalf("verifier section = %q", section) - } - - if xmlEscape(``) == `` { - t.Fatal("xmlEscape should escape special chars") - } -} diff --git a/internal/runtime/final_acceptance_test.go b/internal/runtime/final_acceptance_test.go deleted file mode 100644 index aa3f229d..00000000 --- a/internal/runtime/final_acceptance_test.go +++ /dev/null @@ -1,273 +0,0 @@ -package runtime - -import ( - "context" - "encoding/json" - "strings" - "testing" - - "neo-code/internal/config" - providertypes "neo-code/internal/provider/types" - "neo-code/internal/runtime/acceptance" - "neo-code/internal/runtime/verify" - agentsession "neo-code/internal/session" -) - -func TestBeforeAcceptFinalDecisionPaths(t *testing.T) { - t.Parallel() - - service := &Service{} - baseCfg := config.StaticDefaults().Clone() - snapshot := TurnBudgetSnapshot{ - Config: baseCfg, - Workdir: t.TempDir(), - } - - t.Run("pending required todo -> continue", func(t *testing.T) { - t.Parallel() - state := newRunState("run-continue", agentsession.New("continue")) - required := true - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - state.session.Todos = []agentsession.TodoItem{ - {ID: "todo-1", Content: "do work", Status: agentsession.TodoStatusPending, Required: &required}, - } - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, providertypes.Message{ - Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}, - }, true, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal() error = %v", err) - } - if decision.Status != acceptance.AcceptanceContinue { - t.Fatalf("status = %q, want continue", decision.Status) - } - }) - - t.Run("invalid profile -> failed", func(t *testing.T) { - t.Parallel() - state := newRunState("run-invalid-profile", agentsession.New("invalid-profile")) - state.session.TaskState.VerificationProfile = "bad" - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, providertypes.Message{}, true, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal() error = %v", err) - } - if decision.Status != acceptance.AcceptanceFailed { - t.Fatalf("status = %q, want failed", decision.Status) - } - }) - - t.Run("continue carries pending final progress signal", func(t *testing.T) { - t.Parallel() - state := newRunState("run-progress", agentsession.New("progress")) - required := true - state.pendingFinalProgress = true - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - state.session.Todos = []agentsession.TodoItem{ - {ID: "todo-1", Content: "do work", Status: agentsession.TodoStatusPending, Required: &required}, - } - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, providertypes.Message{}, true, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal() error = %v", err) - } - if !decision.HasProgress { - t.Fatal("expected continue decision to carry pending final progress") - } - }) - - t.Run("all converged -> accepted", func(t *testing.T) { - t.Parallel() - state := newRunState("run-accepted", agentsession.New("accepted")) - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, providertypes.Message{}, true, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal() error = %v", err) - } - if decision.Status != acceptance.AcceptanceAccepted { - t.Fatalf("status = %q, want accepted", decision.Status) - } - }) - - t.Run("final intercept streak drives no-progress breaker", func(t *testing.T) { - t.Parallel() - state := newRunState("run-incomplete", agentsession.New("incomplete")) - required := true - state.finalInterceptStreak = snapshot.Config.Runtime.MaxNoProgressStreak - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - state.session.Todos = []agentsession.TodoItem{ - {ID: "todo-1", Content: "do work", Status: agentsession.TodoStatusPending, Required: &required}, - } - decision, err := service.beforeAcceptFinal(context.Background(), &state, snapshot, providertypes.Message{}, true, beforeCompletionHookSignals{}) - if err != nil { - t.Fatalf("beforeAcceptFinal() error = %v", err) - } - if decision.Status != acceptance.AcceptanceIncomplete { - t.Fatalf("status = %q, want incomplete", decision.Status) - } - }) -} - -func TestFinalAcceptanceHelpers(t *testing.T) { - t.Parallel() - - t.Run("buildVerifyTaskState includes profile", func(t *testing.T) { - t.Parallel() - got := buildVerifyTaskState(agentsession.TaskState{ - VerificationProfile: agentsession.VerificationProfileDocs, - KeyArtifacts: []string{"README.md"}, - }) - if got.VerificationProfile != "docs" || len(got.KeyArtifacts) != 1 { - t.Fatalf("unexpected task state snapshot: %+v", got) - } - }) - - t.Run("applyAcceptanceResultProgress uses pending final progress", func(t *testing.T) { - t.Parallel() - state := newRunState("run-progress", agentsession.New("progress")) - state.finalInterceptStreak = 2 - state.pendingFinalProgress = true - applyAcceptanceResultProgress(&state, acceptance.AcceptanceDecision{Status: acceptance.AcceptanceContinue}) - if state.finalInterceptStreak != 0 || state.pendingFinalProgress { - t.Fatalf("unexpected state after progress reset: streak=%d, pending=%v", state.finalInterceptStreak, state.pendingFinalProgress) - } - - applyAcceptanceResultProgress(&state, acceptance.AcceptanceDecision{Status: acceptance.AcceptanceContinue}) - if state.finalInterceptStreak != 1 { - t.Fatalf("streak = %d, want 1", state.finalInterceptStreak) - } - }) - - t.Run("buildAcceptanceContinueHint includes actionable evidence and tool requirement", func(t *testing.T) { - t.Parallel() - decision := acceptance.AcceptanceDecision{ - Status: acceptance.AcceptanceContinue, - CompletionBlockedReason: "pending_todo", - VerifierResults: []verify.VerificationResult{ - { - Name: "todo_convergence", - Status: verify.VerificationSoftBlock, - Summary: "required todos are not converged", - Reason: "required todos are still pending, in progress, or internally blocked", - Evidence: map[string]any{ - "pending_ids": []string{"todo-2", "todo-1"}, - "in_progress_ids": []string{"todo-3"}, - "blocked_ids": []string{"todo-4"}, - }, - }, - }, - } - hint := buildAcceptanceContinueHint(decision) - if !strings.Contains(hint, "") { - t.Fatalf("hint should contain acceptance xml envelope, got %q", hint) - } - if !strings.Contains(hint, "MUST call todo_write") { - t.Fatalf("hint should force tool-based facts, got %q", hint) - } - if !strings.Contains(hint, "todo-1,todo-2") { - t.Fatalf("hint should include sorted pending ids, got %q", hint) - } - if !strings.Contains(hint, "pending_todo") { - t.Fatalf("hint should include completion blocked reason, got %q", hint) - } - }) - - t.Run("buildAcceptanceContinueHint emits unverified_write guidance", func(t *testing.T) { - t.Parallel() - hint := buildAcceptanceContinueHint(acceptance.AcceptanceDecision{ - Status: acceptance.AcceptanceContinue, - CompletionBlockedReason: "unverified_write", - }) - if !strings.Contains(hint, "unverified_write") { - t.Fatalf("hint should include unverified_write reason, got %q", hint) - } - if !strings.Contains(hint, "VerificationPerformed") || !strings.Contains(hint, "VerificationPassed") { - t.Fatalf("hint should require verification facts, got %q", hint) - } - }) - - t.Run("synthesizeTodoConvergenceEvidence projects required todos", func(t *testing.T) { - t.Parallel() - required := true - result := synthesizeTodoConvergenceEvidence([]agentsession.TodoItem{ - {ID: "todo-1", Content: "a", Status: agentsession.TodoStatusPending, Required: &required}, - {ID: "todo-2", Content: "b", Status: agentsession.TodoStatusInProgress, Required: &required}, - {ID: "todo-3", Content: "c", Status: agentsession.TodoStatusCompleted, Required: &required}, - }) - if result == nil { - t.Fatal("expected synthetic verifier result") - } - if result.Name != "todo_convergence" || result.Status != verify.VerificationSoftBlock { - t.Fatalf("unexpected synthetic result: %+v", *result) - } - pending, _ := result.Evidence["pending_ids"].([]string) - if len(pending) != 1 || pending[0] != "todo-1" { - t.Fatalf("pending ids = %+v, want [todo-1]", pending) - } - }) - - t.Run("buildVerifyTodos and content checks keep normalized values", func(t *testing.T) { - t.Parallel() - required := true - todos := buildVerifyTodos([]agentsession.TodoItem{ - { - ID: " todo-1 ", - Content: " do work ", - Status: agentsession.TodoStatusInProgress, - Required: &required, - Acceptance: []string{"a"}, - Artifacts: []string{"x.md"}, - Supersedes: []string{"todo-0"}, - ContentChecks: []agentsession.TodoContentCheck{ - {Artifact: " README.md ", Contains: []string{"done"}}, - }, - RetryCount: 1, - RetryLimit: 2, - FailureReason: " none ", - }, - }) - if len(todos) != 1 { - t.Fatalf("todos len = %d, want 1", len(todos)) - } - if todos[0].ID != "todo-1" || todos[0].Content != "do work" || todos[0].Status != "in_progress" { - t.Fatalf("unexpected todo snapshot: %+v", todos[0]) - } - if len(todos[0].ContentChecks) != 1 || todos[0].ContentChecks[0].Artifact != "README.md" { - t.Fatalf("unexpected content checks: %+v", todos[0].ContentChecks) - } - }) - - t.Run("buildVerifyMessages ignores non-text and trims content", func(t *testing.T) { - t.Parallel() - messages := buildVerifyMessages([]providertypes.Message{ - { - Role: " assistant ", - Parts: []providertypes.ContentPart{ - providertypes.NewTextPart(" first "), - {Kind: "tool_call", Text: "ignored"}, - providertypes.NewTextPart("second"), - }, - }, - }) - if len(messages) != 1 { - t.Fatalf("messages len = %d, want 1", len(messages)) - } - if messages[0].Role != "assistant" || messages[0].Content != "first\nsecond" { - t.Fatalf("unexpected message snapshot: %+v", messages[0]) - } - }) - - t.Run("evidence helpers normalize and serialize", func(t *testing.T) { - t.Parallel() - items := evidenceStringList([]any{" b ", map[string]any{"k": 1}, "a", "a"}) - if len(items) != 3 || items[0] != "a" || items[1] != "b" { - t.Fatalf("unexpected evidence list: %+v", items) - } - var m map[string]any - if err := json.Unmarshal([]byte(items[2]), &m); err != nil { - t.Fatalf("expected JSON encoded item, got %q", items[2]) - } - preview := evidenceJSONPreview(map[string]any{"x": 1}) - if preview == "" || !strings.Contains(preview, "\"x\":1") { - t.Fatalf("unexpected preview: %q", preview) - } - }) -} diff --git a/internal/runtime/hooks_integration_test.go b/internal/runtime/hooks_integration_test.go index 4fcab3a9..aa820bd4 100644 --- a/internal/runtime/hooks_integration_test.go +++ b/internal/runtime/hooks_integration_test.go @@ -15,7 +15,6 @@ import ( approvalflow "neo-code/internal/runtime/approval" "neo-code/internal/runtime/controlplane" runtimehooks "neo-code/internal/runtime/hooks" - agentsession "neo-code/internal/session" "neo-code/internal/subagent" "neo-code/internal/tools" ) @@ -272,96 +271,6 @@ func TestRunBeforeCompletionDecisionHookBlockIsObservedOnly(t *testing.T) { } } -func TestBeforeCompletionDecisionOrchestratorRunsUserRepoBeforeInternalAndFeedsDecision(t *testing.T) { - t.Parallel() - - service := &Service{events: make(chan RuntimeEvent, 16)} - - var ( - mu sync.Mutex - callFlow []string - ) - appendCall := func(value string) { - mu.Lock() - callFlow = append(callFlow, value) - mu.Unlock() - } - - baseRegistry := runtimehooks.NewRegistry() - if err := baseRegistry.Register(runtimehooks.HookSpec{ - ID: "internal-before-completion", - Point: runtimehooks.HookPointBeforeCompletionDecision, - Scope: runtimehooks.HookScopeInternal, - Source: runtimehooks.HookSourceInternal, - Handler: func(_ context.Context, _ runtimehooks.HookContext) runtimehooks.HookResult { - appendCall("internal") - return runtimehooks.HookResult{Status: runtimehooks.HookResultPass} - }, - }); err != nil { - t.Fatalf("register internal hook: %v", err) - } - - userRegistry := runtimehooks.NewRegistry() - if err := userRegistry.Register(runtimehooks.HookSpec{ - ID: "user-before-completion", - Point: runtimehooks.HookPointBeforeCompletionDecision, - Scope: runtimehooks.HookScopeUser, - Source: runtimehooks.HookSourceUser, - Handler: func(_ context.Context, _ runtimehooks.HookContext) runtimehooks.HookResult { - appendCall("user") - return runtimehooks.HookResult{Status: runtimehooks.HookResultFailed, Message: "user guard signal"} - }, - }); err != nil { - t.Fatalf("register user hook: %v", err) - } - - repoRegistry := runtimehooks.NewRegistry() - if err := repoRegistry.Register(runtimehooks.HookSpec{ - ID: "repo-before-completion", - Point: runtimehooks.HookPointBeforeCompletionDecision, - Scope: runtimehooks.HookScopeRepo, - Source: runtimehooks.HookSourceRepo, - Handler: func(_ context.Context, _ runtimehooks.HookContext) runtimehooks.HookResult { - appendCall("repo") - return runtimehooks.HookResult{Status: runtimehooks.HookResultPass, Message: "repo annotation"} - }, - }); err != nil { - t.Fatalf("register repo hook: %v", err) - } - - baseExecutor := runtimehooks.NewExecutor(baseRegistry, newHookRuntimeEventEmitter(service), time.Second) - userExecutor := runtimehooks.NewExecutor(userRegistry, newHookRuntimeEventEmitter(service), time.Second) - repoExecutor := runtimehooks.NewExecutor(repoRegistry, newHookRuntimeEventEmitter(service), time.Second) - service.SetHookExecutor(composeRuntimeHookExecutors(baseExecutor, userExecutor, repoExecutor)) - - session := newRuntimeSession("session-before-completion-orchestrator") - state := newRunState("run-before-completion-orchestrator", session) - snapshotCfg := TurnBudgetSnapshot{ - Config: config.StaticDefaults().Clone(), - Workdir: t.TempDir(), - } - state.session.TaskState.VerificationProfile = agentsession.VerificationProfileTaskOnly - decision, err := service.runBeforeCompletionDecisionAcceptance( - context.Background(), - &state, - snapshotCfg, - providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}}, - t.TempDir(), - true, - false, - providertypes.RoleAssistant, - ) - if err != nil { - t.Fatalf("runBeforeCompletionDecisionAcceptance() error = %v", err) - } - if got := strings.Join(callFlow, ","); got != "user,repo,internal" { - t.Fatalf("before_completion_decision hook order = %q, want %q", got, "user,repo,internal") - } - if !strings.Contains(decision.InternalSummary, "hook signals consumed") { - t.Fatalf("decision internal summary should include hook signal context, got %q", decision.InternalSummary) - } -} - func TestUserHookEventCarriesScopeAndMessage(t *testing.T) { t.Parallel() diff --git a/internal/runtime/message_render.go b/internal/runtime/message_render.go new file mode 100644 index 00000000..fc2e4344 --- /dev/null +++ b/internal/runtime/message_render.go @@ -0,0 +1,13 @@ +package runtime + +import ( + "strings" + + "neo-code/internal/partsrender" + providertypes "neo-code/internal/provider/types" +) + +// renderPartsForVerification 将多模态消息压平成验收与完成信号解析使用的稳定文本。 +func renderPartsForVerification(parts []providertypes.ContentPart) string { + return strings.TrimSpace(partsrender.RenderDisplayParts(parts)) +} diff --git a/internal/runtime/repository_context_additional_test.go b/internal/runtime/repository_context_additional_test.go index d2a3bf37..1ceab281 100644 --- a/internal/runtime/repository_context_additional_test.go +++ b/internal/runtime/repository_context_additional_test.go @@ -5,8 +5,8 @@ import ( "errors" "testing" - "neo-code/internal/repository" providertypes "neo-code/internal/provider/types" + "neo-code/internal/repository" agentsession "neo-code/internal/session" ) diff --git a/internal/runtime/repository_context_test.go b/internal/runtime/repository_context_test.go index cf7379c3..6ed20824 100644 --- a/internal/runtime/repository_context_test.go +++ b/internal/runtime/repository_context_test.go @@ -6,8 +6,8 @@ import ( "testing" agentcontext "neo-code/internal/context" - "neo-code/internal/repository" providertypes "neo-code/internal/provider/types" + "neo-code/internal/repository" agentsession "neo-code/internal/session" "neo-code/internal/tools" ) diff --git a/internal/runtime/run.go b/internal/runtime/run.go index 09e786f5..e11ee091 100644 --- a/internal/runtime/run.go +++ b/internal/runtime/run.go @@ -178,7 +178,6 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { agentsession.NormalizeAgentMode(session.AgentMode) == agentsession.AgentModePlan state.taskID = strings.TrimSpace(input.TaskID) state.agentID = strings.TrimSpace(input.AgentID) - state.taskKind = inferTaskKindFromInput(input.Parts) state.userGoal = strings.TrimSpace(renderPartsForVerification(input.Parts)) if input.CapabilityToken != nil { token := input.CapabilityToken.Normalize() @@ -219,6 +218,9 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { if err := s.appendUserMessageAndSave(ctx, &state, input.Parts); err != nil { return s.handleRunError(err) } + if err := s.maybeAppendTodoBootstrapReminder(ctx, &state); err != nil { + return s.handleRunError(err) + } s.emitRuntimeSnapshotUpdated(ctx, &state, "session_start") s.updateResumeCheckpoint(ctx, &state, "plan", "") @@ -315,6 +317,9 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { } hasToolCalls := len(turnOutput.assistant.ToolCalls) > 0 if hasToolCalls { + state.mu.Lock() + state.missingCompletionSignalStreak = 0 + state.mu.Unlock() if err := s.appendAssistantMessageAndSave( ctx, &state, @@ -399,16 +404,18 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { if err != nil { return s.handleRunError(err) } + hasThinking := len(turnOutput.assistant.ThinkingMetadata) > 0 if !completionSignaled { + if hasThinking { + break turnAttempt + } state.mu.Lock() state.missingCompletionSignalStreak++ - alreadyHinted := state.completionProtocolHinted - if !alreadyHinted { - state.completionProtocolHinted = true - } + missingCompletionSignalStreak := state.missingCompletionSignalStreak state.mu.Unlock() - if !alreadyHinted { - if err := s.appendSystemMessageAndSave(ctx, &state, completionProtocolReminder); err != nil { + if missingCompletionSignalStreak < missingCompletionSignalLimit { + reminder := completionProtocolReminderForStreak(missingCompletionSignalStreak) + if err := s.appendSystemMessageAndSave(ctx, &state, reminder); err != nil { return s.handleRunError(err) } break turnAttempt @@ -432,7 +439,6 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { if report.Outcome == acceptgate.OutcomeAccepted { state.mu.Lock() state.missingCompletionSignalStreak = 0 - state.completionProtocolHinted = false state.mu.Unlock() if markCurrentPlanCompleted(&state.session, completionSignaled) { state.touchSession() @@ -453,7 +459,6 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { } state.mu.Lock() state.missingCompletionSignalStreak = 0 - state.completionProtocolHinted = false state.mu.Unlock() if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, assistantForFinal); err != nil { return s.handleRunError(err) diff --git a/internal/runtime/runtime_progress_test.go b/internal/runtime/runtime_progress_test.go index 6fc53d22..cd76853e 100644 --- a/internal/runtime/runtime_progress_test.go +++ b/internal/runtime/runtime_progress_test.go @@ -579,6 +579,7 @@ func TestNoToolIncompleteTurnStillEvaluatesProgressAndInjectsReminder(t *testing registry.Register(todotool.New()) providerImpl := &scriptedProvider{ + requireExplicitCompletion: true, responses: []scriptedResponse{ { Message: providertypes.Message{ @@ -616,7 +617,7 @@ func TestNoToolIncompleteTurnStillEvaluatesProgressAndInjectsReminder(t *testing { Message: providertypes.Message{ Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("done")}, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":true}}\ndone")}, }, FinishReason: "stop", }, @@ -687,28 +688,14 @@ func TestAcceptanceContinueWithoutToolCallStopsAsIncomplete(t *testing.T) { store.sessions[session.ID] = cloneSession(session) providerImpl := &scriptedProvider{ + requireExplicitCompletion: true, responses: []scriptedResponse{ - { - Message: providertypes.Message{ - Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n我没有完成信号")}, - }, - FinishReason: "stop", - }, - { - Message: providertypes.Message{ - Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n仍然没有完成信号")}, - }, - FinishReason: "stop", - }, - { - Message: providertypes.Message{ - Role: providertypes.RoleAssistant, - Parts: []providertypes.ContentPart{providertypes.NewTextPart("不应再到这里")}, - }, - FinishReason: "stop", - }, + {Message: providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n1")}}, FinishReason: "stop"}, + {Message: providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n2")}}, FinishReason: "stop"}, + {Message: providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n3")}}, FinishReason: "stop"}, + {Message: providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n4")}}, FinishReason: "stop"}, + {Message: providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":false}}\n5")}}, FinishReason: "stop"}, + {Message: providertypes.Message{Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("不应再到这里")}}, FinishReason: "stop"}, }, } @@ -728,23 +715,26 @@ func TestAcceptanceContinueWithoutToolCallStopsAsIncomplete(t *testing.T) { t.Fatalf("Run() error = %v", err) } - if len(providerImpl.requests) != 2 { - t.Fatalf("expected runtime to stop after two no-tool continues, got %d requests", len(providerImpl.requests)) + if len(providerImpl.requests) != 6 { + t.Fatalf("expected runtime to stop after six missing completion signals, got %d requests", len(providerImpl.requests)) } - secondRequestMessages := providerImpl.requests[1].Messages - foundHint := false - for _, message := range secondRequestMessages { + // 第 6 个请求(streak=5 时注入最终提醒后)应包含最终协议提醒 + fifthRequestMessages := providerImpl.requests[5].Messages + foundFinalHint := false + for _, message := range fifthRequestMessages { if message.Role != providertypes.RoleSystem { continue } content := renderPartsForTest(message.Parts) - if strings.Contains(content, "[Runtime Control]") && strings.Contains(content, "task_completion") { - foundHint = true + if strings.Contains(content, "[Runtime Control]") && + strings.Contains(content, "final protocol reminder") && + strings.Contains(content, "task_completion") { + foundFinalHint = true break } } - if !foundHint { - t.Fatalf("expected runtime protocol note, got messages: %+v", secondRequestMessages) + if !foundFinalHint { + t.Fatalf("expected final runtime protocol note in request 5, got messages: %+v", fifthRequestMessages) } events := collectRuntimeEvents(service.Events()) diff --git a/internal/runtime/runtime_test.go b/internal/runtime/runtime_test.go index 399bd68a..a16d69dc 100644 --- a/internal/runtime/runtime_test.go +++ b/internal/runtime/runtime_test.go @@ -2519,7 +2519,7 @@ func TestServiceRunErrorPaths(t *testing.T) { }) } responses = append(responses, scriptedResponse{ - Message: providertypes.Message{Parts: []providertypes.ContentPart{providertypes.NewTextPart("done after many cycles")}}, + Message: providertypes.Message{Parts: []providertypes.ContentPart{providertypes.NewTextPart("{\"task_completion\":{\"completed\":true}}\ndone after many cycles")}}, FinishReason: "stop", }) return &scriptedProvider{responses: responses} @@ -2528,8 +2528,8 @@ func TestServiceRunErrorPaths(t *testing.T) { expectEvents: []EventType{EventUserMessage, EventToolStart, EventToolChunk, EventToolResult, EventAgentDone}, assert: func(t *testing.T, store *memoryStore, scripted *scriptedProvider, tool *stubTool) { t.Helper() - if scripted.callCount != 7 { - t.Fatalf("expected 7 provider calls before no-progress hard stop, got %d", scripted.callCount) + if scripted.callCount != 10 { + t.Fatalf("expected 10 provider calls (9 tool cycles + 1 completion), got %d", scripted.callCount) } }, }, diff --git a/internal/runtime/state.go b/internal/runtime/state.go index 912518a0..dd227232 100644 --- a/internal/runtime/state.go +++ b/internal/runtime/state.go @@ -5,7 +5,6 @@ import ( "time" "neo-code/internal/runtime/controlplane" - "neo-code/internal/runtime/decider" runtimefacts "neo-code/internal/runtime/facts" "neo-code/internal/security" agentsession "neo-code/internal/session" @@ -13,55 +12,47 @@ import ( // runState 汇总单次 Run 生命周期内会变化的会话与计量状态。 type runState struct { - mu sync.Mutex - runID string - runToken uint64 - session agentsession.Session - compactCount int - reactiveCompactAttempts int - rememberedThisRun bool - planningEnabled bool - taskID string - agentID string - capabilityToken *security.CapabilityToken - nextAttemptSeq int - turn int - baseLifecycle controlplane.RunState - lifecycle controlplane.RunState - waitingPermissionCount int - compactingCount int - stopEmitted bool - budgetExceeded bool - maxTurnsReached bool - maxTurnsLimit int - finalInterceptStreak int - pendingFinalProgress bool - mustUseToolAfterFinalContinue bool - noToolAfterFinalContinueStreak int - lastAcceptanceBlockedReason string - taskKind decider.TaskKind - userGoal string - missingCompletionSignalStreak int - completionProtocolHinted bool - factsCollector *runtimefacts.Collector - lastDeciderDecision decider.Decision - terminalStatus controlplane.TerminalStatus - terminalStopReason controlplane.StopReason - terminalStopDetail string - terminalSet bool - hasUnknownUsage bool - completion controlplane.CompletionState - progress controlplane.ProgressState - lastEndOfTurnCheckpointID string - baselineCheckpointID string - hookAnnotations []string - hookNotifications []queuedHookNotification - hookNotificationSeen map[string]time.Time - hookNotificationOmitted int - reportedMissingSkills map[string]struct{} - thinkingOverride *ThinkingOverride - pendingUserQuestion *UserQuestionRequestedPayload - disableTools bool + mu sync.Mutex + runID string + runToken uint64 + session agentsession.Session + compactCount int + reactiveCompactAttempts int + rememberedThisRun bool + planningEnabled bool + taskID string + agentID string + capabilityToken *security.CapabilityToken + nextAttemptSeq int + turn int + baseLifecycle controlplane.RunState + lifecycle controlplane.RunState + waitingPermissionCount int + compactingCount int + stopEmitted bool + budgetExceeded bool + maxTurnsReached bool + maxTurnsLimit int + userGoal string + missingCompletionSignalStreak int + factsCollector *runtimefacts.Collector + terminalStatus controlplane.TerminalStatus + terminalStopReason controlplane.StopReason + terminalStopDetail string + terminalSet bool + hasUnknownUsage bool + completion controlplane.CompletionState + progress controlplane.ProgressState + lastEndOfTurnCheckpointID string + baselineCheckpointID string + hookAnnotations []string + hookNotifications []queuedHookNotification + hookNotificationSeen map[string]time.Time + hookNotificationOmitted int + reportedMissingSkills map[string]struct{} + thinkingOverride *ThinkingOverride + pendingUserQuestion *UserQuestionRequestedPayload + disableTools bool } // newRunState 基于持久化会话创建一次运行的内存状态镜像。 @@ -72,7 +63,6 @@ func newRunState(runID string, session agentsession.Session) runState { nextAttemptSeq: 1, completion: controlplane.CompletionState{TodoOnlyTaskCandidate: true}, reportedMissingSkills: make(map[string]struct{}), - taskKind: "", factsCollector: runtimefacts.NewCollector(), hookNotificationSeen: make(map[string]time.Time), } diff --git a/internal/runtime/task_kind.go b/internal/runtime/task_kind.go deleted file mode 100644 index fa28d24b..00000000 --- a/internal/runtime/task_kind.go +++ /dev/null @@ -1,27 +0,0 @@ -package runtime - -import ( - "strings" - - providertypes "neo-code/internal/provider/types" - "neo-code/internal/runtime/decider" -) - -// inferTaskKindFromInput 基于用户输入文本推断任务类型,避免将简单状态任务误判为通用写入验证任务。 -func inferTaskKindFromInput(parts []providertypes.ContentPart) decider.TaskKind { - var builder strings.Builder - for _, part := range parts { - if part.Kind != providertypes.ContentPartText { - continue - } - text := strings.TrimSpace(part.Text) - if text == "" { - continue - } - if builder.Len() > 0 { - builder.WriteString("\n") - } - builder.WriteString(text) - } - return decider.InferTaskKind(builder.String()) -} diff --git a/internal/runtime/todo_bootstrap.go b/internal/runtime/todo_bootstrap.go new file mode 100644 index 00000000..309c97b9 --- /dev/null +++ b/internal/runtime/todo_bootstrap.go @@ -0,0 +1,56 @@ +package runtime + +import ( + "context" + + agentsession "neo-code/internal/session" +) + +const todoBootstrapRequiredReason = "todo_bootstrap_required" + +const todoBootstrapRequiredReminder = `[Runtime Control] + +todo_bootstrap_required: This build run has no current plan and no active todos. + +Before project analysis, documentation writing, code changes, multi-step debugging, or verification work, call todo_write with action=plan or action=add to create required todos for this run. + +Do not update or complete old todo IDs that are not present in the current Todo State.` + +// maybeAppendTodoBootstrapReminder 在 direct build 缺少 plan/todo 时注入一次结构化提醒。 +func (s *Service) maybeAppendTodoBootstrapReminder(ctx context.Context, state *runState) error { + if !shouldInjectTodoBootstrapReminder(state) { + return nil + } + return s.appendSystemMessageAndSave(ctx, state, todoBootstrapRequiredReminder) +} + +// shouldInjectTodoBootstrapReminder 判断本轮 build 是否需要先创建当前 run 的 todo。 +func shouldInjectTodoBootstrapReminder(state *runState) bool { + if state == nil || state.disableTools || !state.planningEnabled { + return false + } + state.mu.Lock() + session := state.session + state.mu.Unlock() + + if agentsession.NormalizeAgentMode(session.AgentMode) != agentsession.AgentModeBuild { + return false + } + if hasActivePlanForTodoBootstrap(session.CurrentPlan) || len(session.Todos) > 0 { + return false + } + return true +} + +// hasActivePlanForTodoBootstrap 判断当前 plan 是否仍可为 build 继承 todo 所有权。 +func hasActivePlanForTodoBootstrap(plan *agentsession.PlanArtifact) bool { + if plan == nil { + return false + } + switch agentsession.NormalizePlanStatus(plan.Status) { + case agentsession.PlanStatusDraft, agentsession.PlanStatusApproved: + return true + default: + return false + } +} diff --git a/internal/runtime/todo_bootstrap_test.go b/internal/runtime/todo_bootstrap_test.go new file mode 100644 index 00000000..61582649 --- /dev/null +++ b/internal/runtime/todo_bootstrap_test.go @@ -0,0 +1,164 @@ +package runtime + +import ( + "context" + "strings" + "testing" + + agentcontext "neo-code/internal/context" + providertypes "neo-code/internal/provider/types" + agentsession "neo-code/internal/session" + "neo-code/internal/tools" +) + +func TestShouldInjectTodoBootstrapReminder(t *testing.T) { + t.Parallel() + + required := true + cases := []struct { + name string + state runState + want bool + }{ + { + name: "direct build without plan or todos injects", + state: runState{ + session: agentsession.Session{AgentMode: agentsession.AgentModeBuild}, + planningEnabled: true, + }, + want: true, + }, + { + name: "direct build does not inspect user text", + state: runState{ + session: agentsession.Session{AgentMode: agentsession.AgentModeBuild}, + userGoal: "你好", + planningEnabled: true, + }, + want: true, + }, + { + name: "active plan skips", + state: runState{ + session: agentsession.Session{ + AgentMode: agentsession.AgentModeBuild, + CurrentPlan: &agentsession.PlanArtifact{ + Status: agentsession.PlanStatusApproved, + }, + }, + userGoal: "请分析项目并写文档", + planningEnabled: true, + }, + want: false, + }, + { + name: "existing todo skips", + state: runState{ + session: agentsession.Session{ + AgentMode: agentsession.AgentModeBuild, + Todos: []agentsession.TodoItem{{ + ID: "todo-1", + Content: "existing", + Status: agentsession.TodoStatusPending, + Required: &required, + }}, + }, + userGoal: "请分析项目并写文档", + planningEnabled: true, + }, + want: false, + }, + { + name: "plan mode skips", + state: runState{ + session: agentsession.Session{AgentMode: agentsession.AgentModePlan}, + userGoal: "请分析项目并写文档", + planningEnabled: true, + }, + want: false, + }, + { + name: "legacy non planning run skips", + state: runState{ + session: agentsession.Session{AgentMode: agentsession.AgentModeBuild}, + userGoal: "edit file", + }, + want: false, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := shouldInjectTodoBootstrapReminder(&tc.state) + if got != tc.want { + t.Fatalf("shouldInjectTodoBootstrapReminder() = %v, want %v", got, tc.want) + } + }) + } +} + +func TestServiceRunDirectBuildInjectsTodoBootstrapReminder(t *testing.T) { + t.Parallel() + + manager := newRuntimeConfigManager(t) + store := newMemoryStore() + builder := &stubContextBuilder{ + buildFn: func(ctx context.Context, input agentcontext.BuildInput) (agentcontext.BuildResult, error) { + return agentcontext.BuildResult{ + SystemPrompt: "stub system prompt", + Messages: append([]providertypes.Message(nil), input.Messages...), + }, nil + }, + } + scripted := &scriptedProvider{ + responses: []scriptedResponse{ + { + Message: providertypes.Message{ + Role: providertypes.RoleAssistant, + Parts: []providertypes.ContentPart{ + providertypes.NewTextPart(`{"task_completion":{"completed":true}}` + "\n完成。"), + }, + }, + FinishReason: "stop", + }, + }, + } + + service := NewWithFactory(manager, tools.NewRegistry(), store, &scriptedProviderFactory{provider: scripted}, builder) + if err := service.Run(context.Background(), UserInput{ + RunID: "run-direct-build-todo-bootstrap", + Mode: string(agentsession.AgentModeBuild), + Parts: []providertypes.ContentPart{providertypes.NewTextPart("请分析项目并写文档")}, + }); err != nil { + t.Fatalf("Run() error = %v", err) + } + + saved := onlySession(t, store) + foundPersistedReminder := false + for _, message := range saved.Messages { + if message.Role == providertypes.RoleSystem && + strings.Contains(renderPartsForTest(message.Parts), todoBootstrapRequiredReason) { + foundPersistedReminder = true + break + } + } + if !foundPersistedReminder { + t.Fatalf("expected persisted todo bootstrap reminder, messages=%+v", saved.Messages) + } + if len(scripted.requests) == 0 { + t.Fatalf("expected provider request") + } + foundRequestReminder := false + for _, message := range scripted.requests[0].Messages { + if message.Role == providertypes.RoleSystem && + strings.Contains(renderPartsForTest(message.Parts), todoBootstrapRequiredReason) { + foundRequestReminder = true + break + } + } + if !foundRequestReminder { + t.Fatalf("expected provider request to include todo bootstrap reminder, messages=%+v", scripted.requests[0].Messages) + } +} diff --git a/internal/runtime/todo_run_boundary.go b/internal/runtime/todo_run_boundary.go index 1ba5a7dc..5df46f46 100644 --- a/internal/runtime/todo_run_boundary.go +++ b/internal/runtime/todo_run_boundary.go @@ -2,6 +2,7 @@ package runtime import ( "context" + "reflect" "time" runtimefacts "neo-code/internal/runtime/facts" @@ -13,19 +14,18 @@ func (s *Service) resetTodosForUserRun(ctx context.Context, state *runState) err if s == nil || state == nil { return nil } - if !shouldResetTodosForUserRun(state.session) { - return nil - } state.mu.Lock() - if len(state.session.Todos) == 0 { + currentTodos := cloneTodosForPersistence(state.session.Todos) + nextTodos, reason := todosForUserRunBoundary(state.session, currentTodos) + if reflect.DeepEqual(currentTodos, nextTodos) { state.mu.Unlock() return nil } - state.session.Todos = nil + state.session.Todos = nextTodos state.session.UpdatedAt = time.Now() if state.factsCollector != nil { - state.factsCollector.ApplyTodoSnapshot(runtimefacts.TodoSummaryLike{}) + state.factsCollector.ApplyTodoSnapshot(todoSummaryLikeForItems(nextTodos)) } sessionSnapshot := cloneSessionForPersistence(state.session) state.mu.Unlock() @@ -34,12 +34,20 @@ func (s *Service) resetTodosForUserRun(ctx context.Context, state *runState) err return err } - payload := buildTodoEventPayload(state, "reset", "new_user_run") + payload := buildTodoEventPayload(state, "reset", reason) s.emitRunScoped(ctx, EventTodoSnapshotUpdated, state, payload) s.emitRuntimeSnapshotUpdated(ctx, state, "todo_reset") return nil } +// todosForUserRunBoundary 返回新 Run 应继承的 todo 集合;active plan 只继承 plan-owned todo。 +func todosForUserRunBoundary(session agentsession.Session, todos []agentsession.TodoItem) ([]agentsession.TodoItem, string) { + if shouldResetTodosForUserRun(session) { + return nil, "new_user_run" + } + return selectPlanOwnedTodos(session.CurrentPlan, todos), "plan_owned_prune" +} + // shouldResetTodosForUserRun 根据 PlanArtifact 生命周期判断本轮是否开启新的 Todo 边界。 func shouldResetTodosForUserRun(session agentsession.Session) bool { if session.CurrentPlan == nil { @@ -54,3 +62,23 @@ func shouldResetTodosForUserRun(session agentsession.Session) bool { return true } } + +// todoSummaryLikeForItems 将保留后的 todo 列表压缩成事实层需要的计数。 +func todoSummaryLikeForItems(items []agentsession.TodoItem) runtimefacts.TodoSummaryLike { + var summary runtimefacts.TodoSummaryLike + for _, item := range items { + if !item.RequiredValue() { + continue + } + if item.Status.IsTerminal() { + if item.Status == agentsession.TodoStatusFailed { + summary.RequiredFailed++ + } else { + summary.RequiredCompleted++ + } + continue + } + summary.RequiredOpen++ + } + return summary +} diff --git a/internal/runtime/todo_run_boundary_test.go b/internal/runtime/todo_run_boundary_test.go index 79eb1edc..7dd65895 100644 --- a/internal/runtime/todo_run_boundary_test.go +++ b/internal/runtime/todo_run_boundary_test.go @@ -69,6 +69,9 @@ func TestResetTodosForUserRunKeepsTodosForActivePlan(t *testing.T) { session.CurrentPlan = &agentsession.PlanArtifact{ ID: "plan-1", Status: agentsession.PlanStatusApproved, + Spec: agentsession.PlanSpec{ + Todos: []agentsession.TodoItem{{ID: "plan-todo", Content: "plan task"}}, + }, } session.Todos = []agentsession.TodoItem{{ ID: "plan-todo", @@ -94,6 +97,38 @@ func TestResetTodosForUserRunKeepsTodosForActivePlan(t *testing.T) { } } +func TestResetTodosForUserRunPrunesTodosOutsideActivePlan(t *testing.T) { + t.Parallel() + + store := newMemoryStore() + required := true + session := agentsession.New("todo-boundary-prune") + session.CurrentPlan = &agentsession.PlanArtifact{ + ID: "plan-1", + Status: agentsession.PlanStatusApproved, + Spec: agentsession.PlanSpec{ + Todos: []agentsession.TodoItem{{ID: "plan-todo", Content: "plan task"}}, + }, + } + session.Todos = []agentsession.TodoItem{ + {ID: "plan-todo", Content: "plan task", Status: agentsession.TodoStatusPending, Required: &required}, + {ID: "old-todo", Content: "old task", Status: agentsession.TodoStatusPending, Required: &required}, + } + created, err := store.CreateSession(context.Background(), createSessionInputFromSession(session)) + if err != nil { + t.Fatalf("CreateSession() error = %v", err) + } + + service := &Service{sessionStore: store, events: make(chan RuntimeEvent, 8)} + state := newRunState("run-boundary-prune", created) + if err := service.resetTodosForUserRun(context.Background(), &state); err != nil { + t.Fatalf("resetTodosForUserRun() error = %v", err) + } + if len(state.session.Todos) != 1 || state.session.Todos[0].ID != "plan-todo" { + t.Fatalf("state todos = %+v, want only plan-owned todo", state.session.Todos) + } +} + func TestShouldResetTodosForUserRunBoundaryVariants(t *testing.T) { t.Parallel() diff --git a/internal/runtime/tool_diff_helpers_test.go b/internal/runtime/tool_diff_helpers_test.go index cec450e6..e4071a40 100644 --- a/internal/runtime/tool_diff_helpers_test.go +++ b/internal/runtime/tool_diff_helpers_test.go @@ -4,8 +4,8 @@ import ( "context" "testing" - "neo-code/internal/repository" providertypes "neo-code/internal/provider/types" + "neo-code/internal/repository" agentsession "neo-code/internal/session" "neo-code/internal/tools" ) diff --git a/internal/runtime/verify/command_success.go b/internal/runtime/verify/command_success.go index 70db7012..cb848f36 100644 --- a/internal/runtime/verify/command_success.go +++ b/internal/runtime/verify/command_success.go @@ -90,7 +90,7 @@ func (v CommandSuccessVerifier) VerifyFinal(ctx context.Context, input FinalVeri } return VerificationResult{ Name: name, - Status: VerificationSoftBlock, + Status: VerificationFail, Summary: fmt.Sprintf("verification command failed with exit code %d", result.ExitCode), Reason: "command exit code is non-zero", ErrorClass: classifyCommandFailure(name, result), diff --git a/internal/runtime/verify/command_success_test.go b/internal/runtime/verify/command_success_test.go index 646aa84a..e6bec2bd 100644 --- a/internal/runtime/verify/command_success_test.go +++ b/internal/runtime/verify/command_success_test.go @@ -69,7 +69,7 @@ func TestCommandSuccessVerifier(t *testing.T) { if err != nil { t.Fatalf("VerifyFinal() error = %v", err) } - if result.Status != VerificationSoftBlock || result.ErrorClass != ErrorClassCompileError { + if result.Status != VerificationFail || result.ErrorClass != ErrorClassCompileError { t.Fatalf("unexpected result: %+v", result) } }) diff --git a/internal/runtime/verify/content_match.go b/internal/runtime/verify/content_match.go index f686d2db..54d30aab 100644 --- a/internal/runtime/verify/content_match.go +++ b/internal/runtime/verify/content_match.go @@ -73,7 +73,7 @@ func (ContentMatchVerifier) VerifyFinal(_ context.Context, input FinalVerifyInpu } return VerificationResult{ Name: contentMatchVerifierName, - Status: VerificationSoftBlock, + Status: VerificationFail, Summary: "content rule mismatch detected", Reason: "content match check did not pass", Evidence: evidence, diff --git a/internal/runtime/verify/content_match_test.go b/internal/runtime/verify/content_match_test.go index 81ca623a..574c546e 100644 --- a/internal/runtime/verify/content_match_test.go +++ b/internal/runtime/verify/content_match_test.go @@ -55,7 +55,7 @@ func TestContentMatchVerifier(t *testing.T) { if err != nil { t.Fatalf("VerifyFinal() error = %v", err) } - if result.Status != VerificationSoftBlock { + if result.Status != VerificationFail { t.Fatalf("status = %q, want soft_block", result.Status) } }) diff --git a/internal/runtime/verify/file_exists.go b/internal/runtime/verify/file_exists.go index 321e7a96..1fb1dd5e 100644 --- a/internal/runtime/verify/file_exists.go +++ b/internal/runtime/verify/file_exists.go @@ -21,7 +21,7 @@ func (FileExistsVerifier) VerifyFinal(_ context.Context, input FinalVerifyInput) if len(paths) == 0 { return VerificationResult{ Name: fileExistsVerifierName, - Status: VerificationSoftBlock, + Status: VerificationFail, Summary: "no artifact targets declared", Reason: "file existence targets are missing", }, nil @@ -71,7 +71,7 @@ func (FileExistsVerifier) VerifyFinal(_ context.Context, input FinalVerifyInput) } return VerificationResult{ Name: fileExistsVerifierName, - Status: VerificationSoftBlock, + Status: VerificationFail, Summary: "artifact targets are missing or invalid", Reason: "file existence check did not pass", Evidence: evidence, diff --git a/internal/runtime/verify/file_exists_test.go b/internal/runtime/verify/file_exists_test.go index 7c8a884e..6aab55db 100644 --- a/internal/runtime/verify/file_exists_test.go +++ b/internal/runtime/verify/file_exists_test.go @@ -38,7 +38,7 @@ func TestFileExistsVerifier(t *testing.T) { if err != nil { t.Fatalf("VerifyFinal() error = %v", err) } - if result.Status != VerificationSoftBlock { + if result.Status != VerificationFail { t.Fatalf("status = %q, want soft_block", result.Status) } }) diff --git a/internal/runtime/verify/orchestrator.go b/internal/runtime/verify/orchestrator.go index 887e57fa..a903bfff 100644 --- a/internal/runtime/verify/orchestrator.go +++ b/internal/runtime/verify/orchestrator.go @@ -8,12 +8,12 @@ import ( "neo-code/internal/runtime/controlplane" ) -// Orchestrator 按固定顺序执行 verifier 并在首个非 pass 结果处短路。 +// Orchestrator 执行所有 verifier 并聚合结果。 type Orchestrator struct { Verifiers []FinalVerifier } -// RunFinalVerification 执行 verifier 列表并生成统一 gate 决议。 +// RunFinalVerification 执行所有 verifier 并生成统一 gate 决议。任一 Fail → 整体 Failed。 func (o Orchestrator) RunFinalVerification(ctx context.Context, input FinalVerifyInput) (VerificationGateDecision, error) { results := make([]VerificationResult, 0, len(o.Verifiers)) decision := VerificationGateDecision{ @@ -43,22 +43,8 @@ func (o Orchestrator) RunFinalVerification(ctx context.Context, input FinalVerif if result.Status == VerificationPass { continue } - decision.Passed = false - switch result.Status { - case VerificationSoftBlock: - decision.Reason = controlplane.StopReasonTodoNotConverged - case VerificationHardBlock: - if result.WaitingExternal { - decision.Reason = controlplane.StopReasonTodoWaitingExternal - } else { - decision.Reason = controlplane.StopReasonTodoNotConverged - } - default: - decision.Reason = stopReasonForVerificationFailure(result) - } - decision.Results = results - return decision, nil + decision.Reason = stopReasonForVerificationFailure(result) } decision.Results = results return decision, nil diff --git a/internal/runtime/verify/orchestrator_test.go b/internal/runtime/verify/orchestrator_test.go index b48d2d8b..1bc11547 100644 --- a/internal/runtime/verify/orchestrator_test.go +++ b/internal/runtime/verify/orchestrator_test.go @@ -27,20 +27,20 @@ func (s stubFinalVerifier) VerifyFinal(ctx context.Context, input FinalVerifyInp func TestOrchestratorRunFinalVerification(t *testing.T) { t.Parallel() - t.Run("short-circuits on first non-pass", func(t *testing.T) { + t.Run("runs all verifiers, aggregates all results", func(t *testing.T) { t.Parallel() decision, err := (Orchestrator{Verifiers: []FinalVerifier{ - stubFinalVerifier{name: "todo", result: VerificationResult{Name: "todo", Status: VerificationSoftBlock}}, + stubFinalVerifier{name: "todo", result: VerificationResult{Name: "todo", Status: VerificationFail}}, stubFinalVerifier{name: "build", result: VerificationResult{Name: "build", Status: VerificationFail}}, }}).RunFinalVerification(context.Background(), FinalVerifyInput{}) if err != nil { t.Fatalf("RunFinalVerification() error = %v", err) } - if decision.Passed || decision.Reason != controlplane.StopReasonTodoNotConverged { + if decision.Passed { t.Fatalf("unexpected decision: %+v", decision) } - if len(decision.Results) != 1 { - t.Fatalf("results len = %d, want 1", len(decision.Results)) + if len(decision.Results) != 2 { + t.Fatalf("results len = %d, want 2 (all verifiers run)", len(decision.Results)) } }) @@ -57,15 +57,15 @@ func TestOrchestratorRunFinalVerification(t *testing.T) { } }) - t.Run("hard block waiting external maps correctly", func(t *testing.T) { + t.Run("fail with waiting external maps to verification_failed", func(t *testing.T) { t.Parallel() decision, err := (Orchestrator{Verifiers: []FinalVerifier{ - stubFinalVerifier{name: "todo", result: VerificationResult{Name: "todo", Status: VerificationHardBlock, WaitingExternal: true}}, + stubFinalVerifier{name: "todo", result: VerificationResult{Name: "todo", Status: VerificationFail, WaitingExternal: true}}, }}).RunFinalVerification(context.Background(), FinalVerifyInput{}) if err != nil { t.Fatalf("RunFinalVerification() error = %v", err) } - if decision.Reason != controlplane.StopReasonTodoWaitingExternal { + if decision.Reason != controlplane.StopReasonVerificationFailed { t.Fatalf("reason = %q, want %q", decision.Reason, controlplane.StopReasonTodoWaitingExternal) } }) diff --git a/internal/runtime/verify/todo_convergence.go b/internal/runtime/verify/todo_convergence.go index 51fde050..68a710b9 100644 --- a/internal/runtime/verify/todo_convergence.go +++ b/internal/runtime/verify/todo_convergence.go @@ -100,7 +100,7 @@ func (TodoConvergenceVerifier) VerifyFinal(_ context.Context, input FinalVerifyI if len(waitingExternalIDs) > 0 { return VerificationResult{ Name: todoConvergenceVerifierName, - Status: VerificationHardBlock, + Status: VerificationFail, Summary: fmt.Sprintf("%d required todo(s) wait for external input", len(waitingExternalIDs)), Reason: "required todos are blocked by external dependency", WaitingExternal: true, @@ -110,7 +110,7 @@ func (TodoConvergenceVerifier) VerifyFinal(_ context.Context, input FinalVerifyI if len(pendingIDs) > 0 || len(inProgressIDs) > 0 || len(blockedIDs) > 0 { return VerificationResult{ Name: todoConvergenceVerifierName, - Status: VerificationSoftBlock, + Status: VerificationFail, Summary: "required todos are not converged", Reason: "required todos are still pending, in progress, or internally blocked", Evidence: evidence, diff --git a/internal/runtime/verify/todo_convergence_test.go b/internal/runtime/verify/todo_convergence_test.go index 0b6bccf4..9913b3c9 100644 --- a/internal/runtime/verify/todo_convergence_test.go +++ b/internal/runtime/verify/todo_convergence_test.go @@ -52,8 +52,8 @@ func TestTodoConvergenceVerifierStates(t *testing.T) { if err != nil { t.Fatalf("VerifyFinal() error = %v", err) } - if result.Status != VerificationSoftBlock { - t.Fatalf("status = %q, want %q", result.Status, VerificationSoftBlock) + if result.Status != VerificationFail { + t.Fatalf("status = %q, want %q", result.Status, VerificationFail) } }) @@ -68,8 +68,8 @@ func TestTodoConvergenceVerifierStates(t *testing.T) { if err != nil { t.Fatalf("VerifyFinal() error = %v", err) } - if result.Status != VerificationSoftBlock { - t.Fatalf("status = %q, want %q", result.Status, VerificationSoftBlock) + if result.Status != VerificationFail { + t.Fatalf("status = %q, want %q", result.Status, VerificationFail) } }) @@ -83,8 +83,8 @@ func TestTodoConvergenceVerifierStates(t *testing.T) { if err != nil { t.Fatalf("VerifyFinal() error = %v", err) } - if result.Status != VerificationHardBlock { - t.Fatalf("status = %q, want %q", result.Status, VerificationHardBlock) + if result.Status != VerificationFail { + t.Fatalf("status = %q, want %q", result.Status, VerificationFail) } if !result.WaitingExternal { t.Fatalf("expected WaitingExternal=true") diff --git a/internal/runtime/verify/types.go b/internal/runtime/verify/types.go index 41cf53a8..594c3026 100644 --- a/internal/runtime/verify/types.go +++ b/internal/runtime/verify/types.go @@ -11,11 +11,7 @@ type VerificationStatus string const ( // VerificationPass 表示验证通过。 VerificationPass VerificationStatus = "pass" - // VerificationSoftBlock 表示当前不能收尾,但仍可继续推进。 - VerificationSoftBlock VerificationStatus = "soft_block" - // VerificationHardBlock 表示当前不能收尾且需要外部条件才能继续。 - VerificationHardBlock VerificationStatus = "hard_block" - // VerificationFail 表示验证明确失败。 + // VerificationFail 表示验证失败。 VerificationFail VerificationStatus = "fail" ) @@ -88,14 +84,6 @@ type TaskStateSnapshot struct { KeyArtifacts []string `json:"key_artifacts,omitempty"` } -// RuntimeStateSnapshot 表示 verifier 所需的 runtime 控制面快照。 -type RuntimeStateSnapshot struct { - Turn int `json:"turn,omitempty"` - MaxTurns int `json:"max_turns,omitempty"` - MaxTurnsReached bool `json:"max_turns_reached,omitempty"` - FinalInterceptStreak int `json:"final_intercept_streak,omitempty"` -} - // FinalVerifyInput 表示一次 final 验证请求的完整输入。 type FinalVerifyInput struct { SessionID string `json:"session_id,omitempty"` @@ -107,7 +95,6 @@ type FinalVerifyInput struct { LastAssistantFinal string `json:"last_assistant_final,omitempty"` ToolResults []ToolResultLike `json:"tool_results,omitempty"` TaskState TaskStateSnapshot `json:"task_state,omitempty"` - RuntimeState RuntimeStateSnapshot `json:"runtime_state,omitempty"` VerificationConfig config.VerificationConfig `json:"verification_config,omitempty"` } diff --git a/internal/session/plan.go b/internal/session/plan.go index 816704a4..2209036c 100644 --- a/internal/session/plan.go +++ b/internal/session/plan.go @@ -52,7 +52,7 @@ type AcceptCheck struct { Kind string `json:"kind"` Target string `json:"target,omitempty"` Match string `json:"match,omitempty"` - Required bool `json:"required,omitempty"` + Required *bool `json:"required,omitempty"` Params map[string]string `json:"params,omitempty"` } @@ -309,6 +309,14 @@ func (checks *AcceptChecks) UnmarshalJSON(data []byte) error { return nil } +// RequiredValue 返回验收项是否为必需项;nil 表示 JSON 省略字段,默认视为必需。 +func (check AcceptCheck) RequiredValue() bool { + if check.Required == nil { + return true + } + return *check.Required +} + // Clone 返回验收项深拷贝,避免调用方共享 Params map。 func (checks AcceptChecks) Clone() AcceptChecks { if len(checks) == 0 { @@ -321,6 +329,10 @@ func (checks AcceptChecks) Clone() AcceptChecks { cloned.Kind = strings.TrimSpace(cloned.Kind) cloned.Target = strings.TrimSpace(cloned.Target) cloned.Match = strings.TrimSpace(cloned.Match) + if check.Required != nil { + required := *check.Required + cloned.Required = &required + } if len(check.Params) > 0 { cloned.Params = make(map[string]string, len(check.Params)) for key, value := range check.Params { @@ -398,7 +410,7 @@ func migrateLegacyAcceptCheck(value string) AcceptCheck { case looksLikePath(value): kind = AcceptCheckFileExists } - return AcceptCheck{Kind: kind, Target: strings.TrimSpace(value), Required: true} + return AcceptCheck{Kind: kind, Target: strings.TrimSpace(value)} } func normalizeAcceptCheckKind(kind string) string { diff --git a/internal/session/plan_test.go b/internal/session/plan_test.go index 07b65031..e0f7970a 100644 --- a/internal/session/plan_test.go +++ b/internal/session/plan_test.go @@ -1,6 +1,7 @@ package session import ( + "encoding/json" "fmt" "strings" "testing" @@ -8,7 +9,25 @@ import ( ) func acceptText(target string) AcceptChecks { - return AcceptChecks{{Kind: AcceptCheckOutputOnly, Target: target, Required: true}} + return AcceptChecks{{Kind: AcceptCheckOutputOnly, Target: target}} +} + +func TestAcceptChecksUnmarshalRequiredDefaultAndExplicitFalse(t *testing.T) { + t.Parallel() + + var checks AcceptChecks + if err := json.Unmarshal([]byte(`[{"kind":"output_only"},{"kind":"tool_fact","required":false}]`), &checks); err != nil { + t.Fatalf("Unmarshal() error = %v", err) + } + if len(checks) != 2 { + t.Fatalf("len = %d, want 2", len(checks)) + } + if !checks[0].RequiredValue() { + t.Fatalf("omitted required should default to true: %+v", checks[0]) + } + if checks[1].RequiredValue() { + t.Fatalf("explicit required=false should stay optional: %+v", checks[1]) + } } func TestNormalizeSummaryViewFallsBackToBuiltSummaryWhenStructurallyInvalid(t *testing.T) { diff --git a/internal/session/store_test.go b/internal/session/store_test.go index 230b6c5d..b9ec1e4b 100644 --- a/internal/session/store_test.go +++ b/internal/session/store_test.go @@ -620,7 +620,7 @@ func TestSQLiteStorePersistsPlanStateRoundTrip(t *testing.T) { Goal: "落地 plan/build 模式", Steps: []string{"扩展 session", "扩展 runtime"}, Constraints: []string{"保持 tools 边界"}, - Verify: AcceptChecks{{Kind: AcceptCheckCommandSuccess, Target: "go test ./internal/...", Required: true}}, + Verify: AcceptChecks{{Kind: AcceptCheckCommandSuccess, Target: "go test ./internal/..."}}, Todos: []TodoItem{ {ID: "todo-plan-1", Content: "补 plan 模型"}, }, @@ -629,7 +629,7 @@ func TestSQLiteStorePersistsPlanStateRoundTrip(t *testing.T) { Goal: "落地 plan/build 模式", KeySteps: []string{"扩展 session", "扩展 runtime"}, Constraints: []string{"保持 tools 边界"}, - Verify: AcceptChecks{{Kind: AcceptCheckCommandSuccess, Target: "go test ./internal/...", Required: true}}, + Verify: AcceptChecks{{Kind: AcceptCheckCommandSuccess, Target: "go test ./internal/..."}}, ActiveTodoIDs: []string{"todo-plan-1"}, }, }, diff --git a/internal/tools/todo/write.go b/internal/tools/todo/write.go index 2de010cf..716d6478 100644 --- a/internal/tools/todo/write.go +++ b/internal/tools/todo/write.go @@ -287,13 +287,22 @@ func (t *Tool) Execute(ctx context.Context, call tools.ToolCallInput) (tools.Too if resultErr != nil { reason := mapReason(resultErr) extra := map[string]any{"action": input.Action} + details := resultErr.Error() if reason == reasonRevisionConflict && input.ID != "" { if current, ok := call.SessionMutator.FindTodo(input.ID); ok { extra["current_revision"] = current.Revision extra["current_status"] = string(current.Status) } } - return errorResult(reason, resultErr.Error(), extra), resultErr + if reason == reasonTodoNotFound { + details = todoNotFoundRecoveryDetails(call.SessionMutator, input.ID, resultErr) + todos := call.SessionMutator.ListTodos() + extra["todo_count"] = len(todos) + if ids := activeTodoIDsForRecovery(todos); len(ids) > 0 { + extra["active_todo_ids"] = ids + } + } + return errorResult(reason, details, extra), resultErr } return successResultWithMetadata(input.Action, call.SessionMutator.ListTodos(), dispatchMeta), nil @@ -394,6 +403,41 @@ func (t *Tool) dispatch(call tools.ToolCallInput, input writeInput) (map[string] } } +// todoNotFoundRecoveryDetails 为缺失 todo 的错误结果补充下一步恢复建议。 +func todoNotFoundRecoveryDetails(mutator tools.SessionMutator, id string, err error) string { + base := strings.TrimSpace(err.Error()) + if base == "" { + base = fmt.Sprintf("%s: todo %q", agentsession.ErrTodoNotFound, strings.TrimSpace(id)) + } + if mutator == nil || len(mutator.ListTodos()) == 0 { + return base + "; current session has no active todos. Create current run todos first with todo_write action=\"plan\" or action=\"add\", then update or complete those ids." + } + ids := activeTodoIDsForRecovery(mutator.ListTodos()) + if len(ids) == 0 { + return base + "; current todos are all terminal. Create new current plan todos with todo_write action=\"plan\" or action=\"add\" before updating status." + } + return base + "; use one of the current active todo ids or recreate the current plan todos. active_todo_ids=" + strings.Join(ids, ",") +} + +// activeTodoIDsForRecovery 收集非终态 todo ID,帮助模型从 todo_not_found 中恢复。 +func activeTodoIDsForRecovery(items []agentsession.TodoItem) []string { + if len(items) == 0 { + return nil + } + ids := make([]string, 0, len(items)) + for _, item := range items { + id := strings.TrimSpace(item.ID) + if id == "" || item.Status.IsTerminal() { + continue + } + ids = append(ids, id) + } + if len(ids) == 0 { + return nil + } + return ids +} + // completeTodoWithErgonomics 为 complete 动作提供 pending->in_progress->completed 便捷迁移。 func completeTodoWithErgonomics(call tools.ToolCallInput, input writeInput) (map[string]any, error) { current, ok := call.SessionMutator.FindTodo(input.ID) diff --git a/internal/tools/todo/write_test.go b/internal/tools/todo/write_test.go index db60f3ca..0c11e6df 100644 --- a/internal/tools/todo/write_test.go +++ b/internal/tools/todo/write_test.go @@ -435,6 +435,57 @@ func TestToolExecuteReasonMapping(t *testing.T) { } } +func TestToolExecuteTodoNotFoundRecoveryDetails(t *testing.T) { + t.Parallel() + + tool := New() + emptySession := agentsession.New("todo-not-found-empty") + emptyMutator := &stubMutator{session: &emptySession} + result, err := tool.Execute(context.Background(), tools.ToolCallInput{ + Name: tools.ToolNameTodoWrite, + SessionMutator: emptyMutator, + Arguments: []byte(`{"action":"complete","id":"missing"}`), + }) + if err == nil { + t.Fatalf("expected missing todo error") + } + if result.Metadata["reason_code"] != reasonTodoNotFound { + t.Fatalf("reason_code = %v, want %q", result.Metadata["reason_code"], reasonTodoNotFound) + } + if !strings.Contains(result.Content, "current session has no active todos") || + !strings.Contains(result.Content, `action="plan"`) { + t.Fatalf("expected empty-session recovery details, got %q", result.Content) + } + if result.Metadata["todo_count"] != 0 { + t.Fatalf("todo_count = %v, want 0", result.Metadata["todo_count"]) + } + + existingSession := agentsession.New("todo-not-found-existing") + if err := existingSession.AddTodo(agentsession.TodoItem{ + ID: "todo-1", + Content: "current", + Status: agentsession.TodoStatusPending, + }); err != nil { + t.Fatalf("AddTodo() error = %v", err) + } + existingMutator := &stubMutator{session: &existingSession} + result, err = tool.Execute(context.Background(), tools.ToolCallInput{ + Name: tools.ToolNameTodoWrite, + SessionMutator: existingMutator, + Arguments: []byte(`{"action":"complete","id":"missing"}`), + }) + if err == nil { + t.Fatalf("expected missing todo error") + } + if !strings.Contains(result.Content, "active_todo_ids=todo-1") { + t.Fatalf("expected active todo recovery details, got %q", result.Content) + } + ids, ok := result.Metadata["active_todo_ids"].([]string) + if !ok || len(ids) != 1 || ids[0] != "todo-1" { + t.Fatalf("active_todo_ids metadata = %#v", result.Metadata["active_todo_ids"]) + } +} + func TestParseInput(t *testing.T) { t.Parallel() diff --git a/internal/tui/core/app/todo_test.go b/internal/tui/core/app/todo_test.go index 159cf7b5..997d429e 100644 --- a/internal/tui/core/app/todo_test.go +++ b/internal/tui/core/app/todo_test.go @@ -200,7 +200,7 @@ func TestRebuildTodoSanitizesMarkdownTableLikeTitle(t *testing.T) { ID: "todo-md", Status: "pending", Priority: 2, - Title: "| col1 | col2 |\n| --- | --- |\n| value-a | value-b |", + Title: "| col1 | col2 |\n| --- | --- |\n| value-a | value-b |", }, } @@ -600,6 +600,35 @@ func TestRuntimeEventTodoHandlers(t *testing.T) { } } +func TestRuntimeEventTodoConflictTodoNotFoundDoesNotRefreshEmptyPanel(t *testing.T) { + app, _ := newTestApp(t) + app.state.ActiveSessionID = "session-1" + app.todoItems = []todoViewItem{{ID: "stale", Title: "stale", Status: "pending"}} + app.todoPanelVisible = true + + handled := runtimeEventTodoConflictHandler(&app, agentruntime.RuntimeEvent{ + SessionID: "session-1", + Payload: agentruntime.TodoEventPayload{ + Action: "complete", + Reason: "todo_not_found", + }, + }) + if handled { + t.Fatalf("expected todo conflict handler to return false") + } + if len(app.todoItems) != 0 || app.todoPanelVisible { + t.Fatalf("expected empty todo_not_found conflict to hide stale panel, items=%+v visible=%v", app.todoItems, app.todoPanelVisible) + } + if len(app.activities) == 0 || app.activities[len(app.activities)-1].Title != "Todo conflict" { + t.Fatalf("expected todo conflict activity, got %+v", app.activities) + } + for _, activity := range app.activities { + if activity.Title == "Failed to refresh todo panel" { + t.Fatalf("did not expect refresh failure activity for empty todo_not_found conflict: %+v", app.activities) + } + } +} + func TestParseTodoEventPayload(t *testing.T) { got, ok := parseTodoEventPayload(agentruntime.TodoEventPayload{Action: "a", Reason: "b"}) if !ok || got.Action != "a" || got.Reason != "b" { diff --git a/internal/tui/core/app/update.go b/internal/tui/core/app/update.go index c3e1434a..e16df225 100644 --- a/internal/tui/core/app/update.go +++ b/internal/tui/core/app/update.go @@ -3608,7 +3608,13 @@ func runtimeEventAcceptanceDecidedHandler(a *App, event tuiservices.RuntimeEvent if status == "" { status = "unknown" } - detail := strings.TrimSpace(payload.UserVisibleSummary) + detail := strings.TrimSpace(payload.Summary) + if detail == "" { + detail = formatAcceptanceResults(payload.Results) + } + if detail == "" { + detail = strings.TrimSpace(payload.UserVisibleSummary) + } if detail == "" { detail = strings.TrimSpace(payload.InternalSummary) } @@ -3626,6 +3632,33 @@ func runtimeEventAcceptanceDecidedHandler(a *App, event tuiservices.RuntimeEvent return false } +// formatAcceptanceResults 将逐项验收结果压缩成活动日志可读的一行摘要。 +func formatAcceptanceResults(results []tuiservices.AcceptanceCheckResult) string { + if len(results) == 0 { + return "" + } + parts := make([]string, 0, len(results)) + for _, result := range results { + name := strings.TrimSpace(result.Name) + if name == "" { + name = strings.TrimSpace(result.Kind) + } + if name == "" { + name = "accept_check" + } + if result.Passed { + parts = append(parts, name+": pass") + continue + } + reason := strings.TrimSpace(result.Reason) + if reason == "" { + reason = "failed" + } + parts = append(parts, name+": "+reason) + } + return strings.Join(parts, "; ") +} + // runtimeEventStopReasonDecidedHandler 处理运行终止原因事件,统一收尾状态与活动日志。 func runtimeEventStopReasonDecidedHandler(a *App, event tuiservices.RuntimeEvent) bool { payload, ok := event.Payload.(tuiservices.StopReasonDecidedPayload) @@ -3661,6 +3694,9 @@ func runtimeEventStopReasonDecidedHandler(a *App, event tuiservices.RuntimeEvent } case strings.ToLower(string(tuiservices.StopReasonTodoNotConverged)), strings.ToLower(string(tuiservices.StopReasonTodoWaitingExternal)), + strings.ToLower(string(tuiservices.StopReasonMissingCompletionSignal)), + strings.ToLower(string(tuiservices.StopReasonNoProgress)), + strings.ToLower(string(tuiservices.StopReasonRepeatCycle)), strings.ToLower(string(tuiservices.StopReasonNoProgressAfterFinalIntercept)), strings.ToLower(string(tuiservices.StopReasonMaxTurnExceededWithUnconvergedTodos)), strings.ToLower(string(tuiservices.StopReasonMaxTurnExceededWithFailedVerification)): @@ -3676,6 +3712,7 @@ func runtimeEventStopReasonDecidedHandler(a *App, event tuiservices.RuntimeEvent a.state.StatusText = statusCanceled a.appendActivity("run", "Canceled current run", "", false) case strings.ToLower(string(tuiservices.StopReasonVerificationFailed)), + strings.ToLower(string(tuiservices.StopReasonAcceptCheckFailed)), strings.ToLower(string(tuiservices.StopReasonRequiredTodoFailed)), strings.ToLower(string(tuiservices.StopReasonVerificationExecutionDenied)), strings.ToLower(string(tuiservices.StopReasonVerificationExecutionError)): @@ -3729,8 +3766,16 @@ func runtimeEventTodoUpdatedHandler(a *App, event tuiservices.RuntimeEvent) bool } payload, _ := parseTodoEventPayload(event.Payload) + rawReason := strings.TrimSpace(payload.Reason) + if rawReason == "" { + rawReason = todoConflictReasonFromPayload(event.Payload) + } if len(payload.Items) > 0 { a.syncTodosFromEventItems(payload.Items) + } else if isTodoNotFoundConflict(rawReason) { + a.clearTodos() + a.applyComponentLayout(false) + a.todoPanelVisible = false } else if err := a.refreshTodosFromSession(sessionID); err != nil { a.appendActivity("todo", "Failed to refresh todo panel", err.Error(), true) return false @@ -3765,14 +3810,22 @@ func runtimeEventTodoConflictHandler(a *App, event tuiservices.RuntimeEvent) boo } payload, _ := parseTodoEventPayload(event.Payload) + rawReason := strings.TrimSpace(payload.Reason) + if rawReason == "" { + rawReason = todoConflictReasonFromPayload(event.Payload) + } if len(payload.Items) > 0 { a.syncTodosFromEventItems(payload.Items) + } else if isTodoNotFoundConflict(rawReason) { + a.clearTodos() + a.applyComponentLayout(false) + a.todoPanelVisible = false } else if err := a.refreshTodosFromSession(sessionID); err != nil { a.appendActivity("todo", "Failed to refresh todo panel", err.Error(), true) return false } a.state.StatusText = formatTodoSummaryStatus(payload.Summary) - reason := strings.TrimSpace(payload.Reason) + reason := rawReason if reason == "" { reason = "todo conflict" } @@ -3790,6 +3843,21 @@ func runtimeEventTodoConflictHandler(a *App, event tuiservices.RuntimeEvent) boo return false } +// isTodoNotFoundConflict 判断 todo 冲突是否只是模型操作了不存在的 todo id。 +func isTodoNotFoundConflict(reason string) bool { + return strings.EqualFold(strings.TrimSpace(reason), "todo_not_found") || + strings.Contains(strings.ToLower(strings.TrimSpace(reason)), "todo_not_found") || + strings.Contains(strings.ToLower(strings.TrimSpace(reason)), "todo not found") +} + +// todoConflictReasonFromPayload 从未解析的 payload 中兜底提取冲突原因文本。 +func todoConflictReasonFromPayload(payload any) string { + if payload == nil { + return "" + } + return strings.TrimSpace(fmt.Sprintf("%v", payload)) +} + // runtimeEventTodoSnapshotUpdatedHandler 处理 todo_snapshot_updated 事件并实时同步 Todo 面板。 func runtimeEventTodoSnapshotUpdatedHandler(a *App, event tuiservices.RuntimeEvent) bool { return runtimeEventTodoUpdatedHandler(a, event) diff --git a/internal/tui/core/app/update_runtime_events_test.go b/internal/tui/core/app/update_runtime_events_test.go index 0015ba03..72231187 100644 --- a/internal/tui/core/app/update_runtime_events_test.go +++ b/internal/tui/core/app/update_runtime_events_test.go @@ -799,15 +799,24 @@ func TestRuntimeEventVerificationAndAcceptanceHandlers(t *testing.T) { } runtimeEventAcceptanceDecidedHandler(&app, agentruntime.RuntimeEvent{ Payload: agentruntime.AcceptanceDecidedPayload{ - Status: "failed", - UserVisibleSummary: "", - InternalSummary: "", - ContinueHint: "provide missing files", - CompletionBlockedReason: "unverified_write", + Status: "failed", + Summary: "command_success: missing successful command evidence", + StopReason: agentruntime.StopReasonAcceptCheckFailed, + Results: []agentruntime.AcceptanceCheckResult{ + { + Passed: false, + Name: "command_success", + Kind: "command_success", + Target: "go test ./...", + Reason: "missing successful command evidence", + }, + }, }, }) acceptance := app.activities[len(app.activities)-1] - if acceptance.Title != "Acceptance decided (failed)" || !strings.Contains(acceptance.Detail, "reason=unverified_write") || !acceptance.IsError { + if acceptance.Title != "Acceptance decided (failed)" || + !strings.Contains(acceptance.Detail, "command_success") || + !acceptance.IsError { t.Fatalf("unexpected acceptance activity: %+v", acceptance) } } diff --git a/internal/tui/services/runtime_contract.go b/internal/tui/services/runtime_contract.go index 7c4d1cf8..2d23886e 100644 --- a/internal/tui/services/runtime_contract.go +++ b/internal/tui/services/runtime_contract.go @@ -305,12 +305,20 @@ const ( StopReasonVerificationFailed StopReason = "verification_failed" // StopReasonAccepted 表示双门控通过并被 acceptance 接受。 StopReasonAccepted StopReason = "accepted" + // StopReasonMissingCompletionSignal 表示 assistant 停止调用工具但未输出完成信号。 + StopReasonMissingCompletionSignal StopReason = "missing_completion_signal" + // StopReasonAcceptCheckFailed 表示最终 Accept Gate 的验收项失败。 + StopReasonAcceptCheckFailed StopReason = "accept_check_failed" // StopReasonTodoNotConverged 表示 required todo 未收敛。 StopReasonTodoNotConverged StopReason = "todo_not_converged" // StopReasonTodoWaitingExternal 表示 todo 等待外部输入。 StopReasonTodoWaitingExternal StopReason = "todo_waiting_external" // StopReasonNoProgressAfterFinalIntercept 表示 final 被拦截后长期无进展。 StopReasonNoProgressAfterFinalIntercept StopReason = "no_progress_after_final_intercept" + // StopReasonNoProgress 表示运行连续缺少实质进展。 + StopReasonNoProgress StopReason = "no_progress" + // StopReasonRepeatCycle 表示运行重复相同动作或结果。 + StopReasonRepeatCycle StopReason = "repeat_cycle" // StopReasonMaxTurnExceededWithUnconvergedTodos 表示 max turn + todo 未收敛。 StopReasonMaxTurnExceededWithUnconvergedTodos StopReason = "max_turn_exceeded_with_unconverged_todos" // StopReasonMaxTurnExceededWithFailedVerification 表示 max turn + verification 失败。 @@ -366,13 +374,24 @@ type VerificationFailedPayload struct { // AcceptanceDecidedPayload 描述 acceptance 引擎输出。 type AcceptanceDecidedPayload struct { - Status string `json:"status"` - StopReason StopReason `json:"stop_reason,omitempty"` - ErrorClass string `json:"error_class,omitempty"` - CompletionBlockedReason string `json:"completion_blocked_reason,omitempty"` - UserVisibleSummary string `json:"user_visible_summary,omitempty"` - InternalSummary string `json:"internal_summary,omitempty"` - ContinueHint string `json:"continue_hint,omitempty"` + Status string `json:"status"` + StopReason StopReason `json:"stop_reason,omitempty"` + ErrorClass string `json:"error_class,omitempty"` + CompletionBlockedReason string `json:"completion_blocked_reason,omitempty"` + UserVisibleSummary string `json:"user_visible_summary,omitempty"` + InternalSummary string `json:"internal_summary,omitempty"` + ContinueHint string `json:"continue_hint,omitempty"` + Summary string `json:"summary,omitempty"` + Results []AcceptanceCheckResult `json:"results,omitempty"` +} + +// AcceptanceCheckResult 描述 Accept Gate 中单个检查项的结果。 +type AcceptanceCheckResult struct { + Passed bool `json:"passed"` + Name string `json:"name"` + Kind string `json:"kind,omitempty"` + Target string `json:"target,omitempty"` + Reason string `json:"reason,omitempty"` } // TokenUsagePayload 描述 runtime 当前 token_usage 事件载荷。 From bef9833e7230629fb8f9e070133c8b8306f7fd82 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sat, 9 May 2026 18:09:30 +0800 Subject: [PATCH 03/15] =?UTF-8?q?fix(runtime)=EF=BC=9A=E4=BF=AE=E5=A4=8Dpl?= =?UTF-8?q?an=E6=A8=A1=E5=BC=8F=E4=B8=8B=E5=88=9B=E5=BB=BAtodo=E5=A4=B1?= =?UTF-8?q?=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/runtime/planning.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/internal/runtime/planning.go b/internal/runtime/planning.go index 14d463cd..fcdc3827 100644 --- a/internal/runtime/planning.go +++ b/internal/runtime/planning.go @@ -305,6 +305,20 @@ func applyCurrentPlanRevision(session *agentsession.Session, plan *agentsession. if oldPlan := session.CurrentPlan; oldPlan != nil && oldPlan.Revision < plan.Revision { agentsession.CancelTodosByIDs(session.Todos, oldPlan.Summary.ActiveTodoIDs) } + // 将 PlanSpec.Todos 中尚不存在于 session.Todos 的条目补入, + // 避免 plan 模式下模型后续通过 todo_write 引用这些 ID 时找不到。 + for _, planTodo := range plan.Spec.Todos { + id := strings.TrimSpace(planTodo.ID) + if id == "" { + continue + } + if _, exists := session.FindTodo(id); exists { + continue + } + if err := session.AddTodo(planTodo); err != nil { + return false + } + } session.CurrentPlan = plan session.PlanApprovalPendingFullAlign = false session.PlanCompletionPendingFullReview = false From 3b4dcefb8b1167df6229793fbf2c50473fb8b0cf Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sat, 9 May 2026 18:19:06 +0800 Subject: [PATCH 04/15] =?UTF-8?q?fix(runtime)=EF=BC=9A=E5=B0=86codebase?= =?UTF-8?q?=E5=B7=A5=E5=85=B7=E6=B7=BB=E5=8A=A0=E5=88=B0plan=E6=A8=A1?= =?UTF-8?q?=E5=BC=8F=E7=99=BD=E5=90=8D=E5=8D=95=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/tools/mode_filter.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/tools/mode_filter.go b/internal/tools/mode_filter.go index 4c953a32..697391bd 100644 --- a/internal/tools/mode_filter.go +++ b/internal/tools/mode_filter.go @@ -12,6 +12,9 @@ func isReadOnlyVisibleTool(name string) bool { case ToolNameFilesystemReadFile, ToolNameFilesystemGrep, ToolNameFilesystemGlob, + ToolNameCodebaseRead, + ToolNameCodebaseSearchText, + ToolNameCodebaseSearchSymbol, ToolNameWebFetch, ToolNameMemoRecall, ToolNameMemoList, From 915fb4744c35bc57b7cf155a85815981818b8b12 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sat, 9 May 2026 18:34:37 +0800 Subject: [PATCH 05/15] =?UTF-8?q?fix(runtime)=EF=BC=9A=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E4=B8=8D=E5=90=8C=20content=5Fcontains=20=E6=88=96=20required?= =?UTF-8?q?=20=E5=B1=9E=E6=80=A7=E8=A2=AB=E5=90=88=E5=B9=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/runtime/acceptgate/checks.go | 17 +++++++++++- internal/runtime/acceptgate_runtime.go | 20 +++++++++++--- internal/session/plan.go | 25 +++++++++++++++-- internal/session/plan_test.go | 38 ++++++++++++++++++++++++++ internal/tools/permission_mapper.go | 21 ++++++++++++++ 5 files changed, 114 insertions(+), 7 deletions(-) diff --git a/internal/runtime/acceptgate/checks.go b/internal/runtime/acceptgate/checks.go index cdec8f51..b3f39002 100644 --- a/internal/runtime/acceptgate/checks.go +++ b/internal/runtime/acceptgate/checks.go @@ -193,7 +193,7 @@ func normalizeCommand(value string) string { fields := strings.Fields(value) out := make([]string, 0, len(fields)) for _, field := range fields { - if strings.Contains(field, "=") && !strings.Contains(field, "/") && !strings.Contains(field, "\\") { + if isEnvVarAssignment(field) { continue } if strings.HasPrefix(strings.ToLower(field), "$env:") { @@ -204,6 +204,21 @@ func normalizeCommand(value string) string { return strings.ToLower(strings.Join(out, " ")) } +// isEnvVarAssignment 识别裸环境变量赋值,避免把 CLI 的 -flag=value 当作环境变量剥离。 +func isEnvVarAssignment(field string) bool { + field = strings.TrimSpace(field) + if !strings.Contains(field, "=") { + return false + } + if strings.HasPrefix(field, "-") { + return false + } + if strings.Contains(field, "/") || strings.Contains(field, "\\") { + return false + } + return true +} + func normalizePath(value string) string { value = strings.TrimSpace(value) if value == "" { diff --git a/internal/runtime/acceptgate_runtime.go b/internal/runtime/acceptgate_runtime.go index 70b2a8f9..9195c6f7 100644 --- a/internal/runtime/acceptgate_runtime.go +++ b/internal/runtime/acceptgate_runtime.go @@ -51,7 +51,7 @@ func (s *Service) evaluateAcceptGate(ctx context.Context, state *runState, assis }) } -// selectPlanOwnedTodos 只把当前计划显式拥有的 todo 交给终态验收,避免无 plan 的 chat/read-only 被旧 todo 污染。 +// selectPlanOwnedTodos 只把当前计划拥有的 todo 交给终态验收,避免无 plan 的 chat/read-only 被旧 todo 污染。 func selectPlanOwnedTodos(plan *agentsession.PlanArtifact, todos []agentsession.TodoItem) []agentsession.TodoItem { if plan == nil || len(todos) == 0 { return nil @@ -69,18 +69,30 @@ func selectPlanOwnedTodos(plan *agentsession.PlanArtifact, todos []agentsession. owned[id] = struct{}{} } } - if len(owned) == 0 { - return nil - } selected := make([]agentsession.TodoItem, 0, len(todos)) for _, todo := range todos { if _, ok := owned[strings.TrimSpace(todo.ID)]; ok { selected = append(selected, todo) + continue + } + if isPostPlanRequiredTodo(plan, todo) { + selected = append(selected, todo) } } return selected } +// isPostPlanRequiredTodo 判断计划执行期新增的必需 todo 是否应纳入当前计划验收。 +func isPostPlanRequiredTodo(plan *agentsession.PlanArtifact, todo agentsession.TodoItem) bool { + if plan == nil || !todo.RequiredValue() || todo.Status.IsTerminal() { + return false + } + if plan.CreatedAt.IsZero() || todo.CreatedAt.IsZero() { + return false + } + return !todo.CreatedAt.Before(plan.CreatedAt) +} + // emitAcceptGateReport 将 Accept Gate 报告发布为统一 acceptance_decided 事件。 func (s *Service) emitAcceptGateReport(state *runState, report acceptgate.Report) { status := string(acceptgate.OutcomeFailed) diff --git a/internal/session/plan.go b/internal/session/plan.go index 2209036c..1c1f6ab8 100644 --- a/internal/session/plan.go +++ b/internal/session/plan.go @@ -3,6 +3,8 @@ package session import ( "encoding/json" "fmt" + "sort" + "strconv" "strings" "time" ) @@ -361,10 +363,12 @@ func (checks AcceptChecks) Normalize() AcceptChecks { check.Kind = normalizeAcceptCheckKind(check.Kind) check.Target = strings.TrimSpace(check.Target) check.Match = strings.TrimSpace(check.Match) - key := check.Kind + "\x00" + check.Target + "\x00" + check.Match - if key == "\x00\x00" { + if check.Kind == "" && check.Target == "" && check.Match == "" { continue } + key := check.Kind + "\x00" + check.Target + "\x00" + check.Match + + "\x00" + paramsKey(check.Params) + + "\x00" + strconv.FormatBool(check.RequiredValue()) if _, exists := seen[key]; exists { continue } @@ -377,6 +381,23 @@ func (checks AcceptChecks) Normalize() AcceptChecks { return out } +// paramsKey 将验收参数稳定序列化,用于区分同目标下的不同机器检查。 +func paramsKey(params map[string]string) string { + if len(params) == 0 { + return "" + } + keys := make([]string, 0, len(params)) + for key := range params { + keys = append(keys, key) + } + sort.Strings(keys) + parts := make([]string, 0, len(keys)) + for _, key := range keys { + parts = append(parts, strconv.Quote(key)+"="+strconv.Quote(params[key])) + } + return strings.Join(parts, ";") +} + // RenderLines 返回面向计划正文的稳定验收项文本。 func (checks AcceptChecks) RenderLines() []string { normalized := checks.Normalize() diff --git a/internal/session/plan_test.go b/internal/session/plan_test.go index e0f7970a..f52d5f95 100644 --- a/internal/session/plan_test.go +++ b/internal/session/plan_test.go @@ -30,6 +30,44 @@ func TestAcceptChecksUnmarshalRequiredDefaultAndExplicitFalse(t *testing.T) { } } +func TestAcceptChecksNormalizePreservesDistinctParams(t *testing.T) { + t.Parallel() + + checks := AcceptChecks{ + {Kind: AcceptCheckContentContains, Target: "README.md", Params: map[string]string{"contains": "NeoCode"}}, + {Kind: AcceptCheckContentContains, Target: "README.md", Params: map[string]string{"contains": "Todo"}}, + {Kind: AcceptCheckContentContains, Target: "README.md", Params: map[string]string{"contains": "NeoCode"}}, + } + + normalized := checks.Normalize() + if len(normalized) != 2 { + t.Fatalf("Normalize() length = %d, want 2: %+v", len(normalized), normalized) + } + if normalized[0].Params["contains"] != "NeoCode" || normalized[1].Params["contains"] != "Todo" { + t.Fatalf("Normalize() = %+v, want distinct contains params kept", normalized) + } +} + +func TestAcceptChecksNormalizePreservesDistinctRequired(t *testing.T) { + t.Parallel() + + required := true + optional := false + checks := AcceptChecks{ + {Kind: AcceptCheckCommandSuccess, Target: "go test ./...", Required: &required}, + {Kind: AcceptCheckCommandSuccess, Target: "go test ./...", Required: &optional}, + } + + normalized := checks.Normalize() + if len(normalized) != 2 { + t.Fatalf("Normalize() length = %d, want 2: %+v", len(normalized), normalized) + } + if !normalized[0].RequiredValue() || normalized[1].RequiredValue() { + t.Fatalf("Normalize() required flags = [%v %v], want [true false]", + normalized[0].RequiredValue(), normalized[1].RequiredValue()) + } +} + func TestNormalizeSummaryViewFallsBackToBuiltSummaryWhenStructurallyInvalid(t *testing.T) { t.Parallel() diff --git a/internal/tools/permission_mapper.go b/internal/tools/permission_mapper.go index f3dd5f43..0dd71f92 100644 --- a/internal/tools/permission_mapper.go +++ b/internal/tools/permission_mapper.go @@ -59,6 +59,27 @@ func buildPermissionAction(input ToolCallInput) (security.Action, error) { action.Payload.Target = extractStringArgument(input.Arguments, "path") action.Payload.SandboxTargetType = security.TargetTypePath action.Payload.SandboxTarget = action.Payload.Target + case ToolNameCodebaseRead: + action.Type = security.ActionTypeRead + action.Payload.Operation = "codebase_read" + action.Payload.TargetType = security.TargetTypePath + action.Payload.Target = extractStringArgument(input.Arguments, "path") + action.Payload.SandboxTargetType = security.TargetTypePath + action.Payload.SandboxTarget = action.Payload.Target + case ToolNameCodebaseSearchText: + action.Type = security.ActionTypeRead + action.Payload.Operation = "codebase_search_text" + action.Payload.TargetType = security.TargetTypeDirectory + action.Payload.Target = extractStringArgument(input.Arguments, "dir") + action.Payload.SandboxTargetType = security.TargetTypeDirectory + action.Payload.SandboxTarget = action.Payload.Target + case ToolNameCodebaseSearchSymbol: + action.Type = security.ActionTypeRead + action.Payload.Operation = "codebase_search_symbol" + action.Payload.TargetType = security.TargetTypeDirectory + action.Payload.Target = extractStringArgument(input.Arguments, "dir") + action.Payload.SandboxTargetType = security.TargetTypeDirectory + action.Payload.SandboxTarget = action.Payload.Target case ToolNameFilesystemGrep: action.Type = security.ActionTypeRead action.Payload.Operation = "grep" From e8ddb70176c385f2cd94f1ff93a920cf5114662c Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sat, 9 May 2026 18:48:03 +0800 Subject: [PATCH 06/15] =?UTF-8?q?fix(runtime)=EF=BC=9A=E5=A2=9E=E5=8A=A0re?= =?UTF-8?q?peat=E8=87=AA=E6=88=91=E7=BA=A0=E5=81=8F=E6=9C=BA=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/runtime/controlplane/progress.go | 35 ++-- .../runtime/controlplane/progress_test.go | 66 ++++++- internal/runtime/runtime_progress_test.go | 166 ++++++++++++++++-- 3 files changed, 239 insertions(+), 28 deletions(-) diff --git a/internal/runtime/controlplane/progress.go b/internal/runtime/controlplane/progress.go index a2827fa4..6120dd5e 100644 --- a/internal/runtime/controlplane/progress.go +++ b/internal/runtime/controlplane/progress.go @@ -143,13 +143,9 @@ func EvaluateProgress(state ProgressState, input ProgressInput) ProgressState { next.StalledProgressState = StalledProgressHealthy next.ReminderKind = ReminderKindNone } - if input.NoProgressLimit > 0 && next.NoProgressStreak >= input.NoProgressLimit { + if shouldTerminateAfterStalledReminder(state.LastScore, next) { next.ShouldTerminate = true - next.TerminateReason = StopReasonNoProgress - } - if input.RepeatCycleLimit > 0 && next.RepeatCycleStreak >= input.RepeatCycleLimit { - next.ShouldTerminate = true - next.TerminateReason = StopReasonRepeatCycle + next.TerminateReason = stopReasonForReminderKind(next.ReminderKind) } return ProgressState{ @@ -160,6 +156,27 @@ func EvaluateProgress(state ProgressState, input ProgressInput) ProgressState { } } +// shouldTerminateAfterStalledReminder 只在同类 stalled 已提醒过一轮后才允许硬终止。 +func shouldTerminateAfterStalledReminder(previous ProgressScore, current ProgressScore) bool { + if current.StalledProgressState != StalledProgressStalled || current.ReminderKind == ReminderKindNone { + return false + } + return previous.StalledProgressState == StalledProgressStalled && + previous.ReminderKind == current.ReminderKind +} + +// stopReasonForReminderKind 将当前 stalled 提醒类型映射为最终终止原因。 +func stopReasonForReminderKind(kind ReminderKind) StopReason { + switch kind { + case ReminderKindRepeatCycle: + return StopReasonRepeatCycle + case ReminderKindNoProgress, ReminderKindGenericStalled: + return StopReasonNoProgress + default: + return StopReasonNoProgress + } +} + type evidenceFlags struct { strongCount int mediumCount int @@ -202,10 +219,8 @@ func isExplorationProgress(runState RunState, flags evidenceFlags) bool { // explorationWindowForPhase 返回不同阶段允许的 exploration 宽容窗口。 func explorationWindowForPhase(runState RunState) int { switch runState { - case RunStatePlan: - return 10 - case RunStateExecute: - return 6 + case RunStatePlan, RunStateExecute: + return 15 default: return 0 } diff --git a/internal/runtime/controlplane/progress_test.go b/internal/runtime/controlplane/progress_test.go index 22dbdda9..2fe2fd9e 100644 --- a/internal/runtime/controlplane/progress_test.go +++ b/internal/runtime/controlplane/progress_test.go @@ -68,7 +68,7 @@ func TestEvaluateProgressExplorationExhaustionStartsNoProgress(t *testing.T) { state := ProgressState{ LastScore: ProgressScore{ - ExplorationStreak: 11, + ExplorationStreak: 15, NoProgressStreak: 1, }, } @@ -87,6 +87,39 @@ func TestEvaluateProgressExplorationExhaustionStartsNoProgress(t *testing.T) { } } +func TestEvaluateProgressNoProgressWarnsBeforeTerminate(t *testing.T) { + t.Parallel() + + first := EvaluateProgress(ProgressState{ + LastScore: ProgressScore{NoProgressStreak: 2}, + }, ProgressInput{ + RunState: RunStateExecute, + NoProgressLimit: 3, + RepeatCycleLimit: 3, + SubgoalFingerprint: "subgoal", + }) + + if first.LastScore.StalledProgressState != StalledProgressStalled { + t.Fatalf("first stalled state = %q, want %q", first.LastScore.StalledProgressState, StalledProgressStalled) + } + if first.LastScore.ReminderKind != ReminderKindNoProgress { + t.Fatalf("first reminder = %q, want %q", first.LastScore.ReminderKind, ReminderKindNoProgress) + } + if first.LastScore.ShouldTerminate { + t.Fatal("first stalled no-progress should warn before hard terminate") + } + + second := EvaluateProgress(first, ProgressInput{ + RunState: RunStateExecute, + NoProgressLimit: 3, + RepeatCycleLimit: 3, + SubgoalFingerprint: "subgoal", + }) + if !second.LastScore.ShouldTerminate || second.LastScore.TerminateReason != StopReasonNoProgress { + t.Fatalf("second score = %+v, want no-progress hard terminate", second.LastScore) + } +} + func TestEvaluateProgressRepeatCycleRequiresSameResultAndSubgoal(t *testing.T) { t.Parallel() @@ -115,6 +148,37 @@ func TestEvaluateProgressRepeatCycleRequiresSameResultAndSubgoal(t *testing.T) { if got.LastScore.ReminderKind != ReminderKindRepeatCycle { t.Fatalf("reminder = %q, want %q", got.LastScore.ReminderKind, ReminderKindRepeatCycle) } + if got.LastScore.ShouldTerminate { + t.Fatal("first stalled repeat-cycle should warn before hard terminate") + } +} + +func TestEvaluateProgressRepeatCycleTerminatesAfterReminder(t *testing.T) { + t.Parallel() + + state := ProgressState{ + LastScore: ProgressScore{ + RepeatCycleStreak: 3, + StalledProgressState: StalledProgressStalled, + ReminderKind: ReminderKindRepeatCycle, + }, + LastToolSignature: "sig", + LastResultFingerprint: "result", + LastSubgoalFingerprint: "subgoal", + } + + got := EvaluateProgress(state, ProgressInput{ + RunState: RunStateExecute, + CurrentToolSignature: "sig", + ResultFingerprint: "result", + SubgoalFingerprint: "subgoal", + NoProgressLimit: 10, + RepeatCycleLimit: 3, + }) + + if !got.LastScore.ShouldTerminate || got.LastScore.TerminateReason != StopReasonRepeatCycle { + t.Fatalf("score = %+v, want repeat-cycle hard terminate", got.LastScore) + } } func TestEvaluateProgressUnknownSubgoalDoesNotAdvanceRepeat(t *testing.T) { diff --git a/internal/runtime/runtime_progress_test.go b/internal/runtime/runtime_progress_test.go index cd76853e..267c664f 100644 --- a/internal/runtime/runtime_progress_test.go +++ b/internal/runtime/runtime_progress_test.go @@ -16,7 +16,7 @@ import ( todotool "neo-code/internal/tools/todo" ) -func TestProgressStreakNoLongerStopsRun(t *testing.T) { +func TestProgressStreakWarnsAndAllowsRecovery(t *testing.T) { t.Setenv("TEST_KEY", "dummy") cfg := config.Config{ @@ -45,16 +45,14 @@ func TestProgressStreakNoLongerStopsRun(t *testing.T) { providerFactory := &scriptedProviderFactory{ provider: &scriptedProvider{ chatFn: func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { - call := atomic.AddInt32(&providerCalls, 1) - seq := atomic.AddInt32(&signatureSeq, 1) + atomic.AddInt32(&providerCalls, 1) if strings.Contains(req.SystemPrompt, selfHealingReminder) { promptInjected = true - } - if call >= 5 { events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\ndone") events <- providertypes.NewMessageDoneStreamEvent("stop", nil) return nil } + seq := atomic.AddInt32(&signatureSeq, 1) // the model always decides to call the tool events <- providertypes.NewToolCallStartStreamEvent(0, "call_err", "tool_error") events <- providertypes.NewToolCallDeltaStreamEvent( @@ -84,14 +82,83 @@ func TestProgressStreakNoLongerStopsRun(t *testing.T) { } if err := service.Run(context.Background(), input); err != nil { + t.Fatalf("expected run to recover after no-progress reminder, got %v", err) + } + + events := collectRuntimeEvents(service.Events()) + assertStopReasonDecided(t, events, controlplane.StopReasonAccepted, "") + + if !promptInjected { + t.Error("expected self-healing prompt injection before recovery") + } + if providerCalls != 4 { + t.Fatalf("expected 4 provider turns including reminder recovery, got %d", providerCalls) + } +} + +func TestProgressStreakTerminatesAfterReminderIfStillStalled(t *testing.T) { + t.Setenv("TEST_KEY", "dummy") + + cfg := config.Config{ + Providers: []config.ProviderConfig{{Name: "test-progress-hard-stop", Driver: "test", BaseURL: "http://localhost", Model: "test", APIKeyEnv: "TEST_KEY"}}, + SelectedProvider: "test-progress-hard-stop", + Workdir: t.TempDir(), + Runtime: config.RuntimeConfig{ + MaxNoProgressStreak: 2, + MaxRepeatCycleStreak: 6, + }, + } + + var executeCalls int32 + toolManager := &stubToolManager{ + specs: []providertypes.ToolSpec{{Name: "tool_error"}}, + executeFn: func(ctx context.Context, input tools.ToolCallInput) (tools.ToolResult, error) { + atomic.AddInt32(&executeCalls, 1) + return tools.ToolResult{Name: input.Name, Content: "error occurred", IsError: true}, nil + }, + } + + var promptInjected bool + var providerCalls int32 + var signatureSeq int32 + providerFactory := &scriptedProviderFactory{ + provider: &scriptedProvider{ + chatFn: func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { + atomic.AddInt32(&providerCalls, 1) + if strings.Contains(req.SystemPrompt, selfHealingReminder) { + promptInjected = true + } + seq := atomic.AddInt32(&signatureSeq, 1) + events <- providertypes.NewToolCallStartStreamEvent(0, "call_err", "tool_error") + events <- providertypes.NewToolCallDeltaStreamEvent( + 0, + "call_err", + `{"seq":`+strconv.FormatInt(int64(seq), 10)+`}`, + ) + events <- providertypes.NewMessageDoneStreamEvent("tool_calls", nil) + return nil + }, + }, + } + + manager := config.NewManager(config.NewLoader(t.TempDir(), &cfg)) + service := NewWithFactory(manager, toolManager, newMemoryStore(), providerFactory, nil) + + if err := service.Run(context.Background(), UserInput{ + RunID: "run-progress-hard-stop", + Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger unrecovered error loop")}, + }); err != nil { t.Fatalf("expected run to stop cleanly on no-progress, got %v", err) } events := collectRuntimeEvents(service.Events()) assertStopReasonDecided(t, events, controlplane.StopReasonNoProgress, "") - if promptInjected { - t.Error("did not expect self-healing prompt injection after hard no-progress termination") + if !promptInjected { + t.Fatal("expected self-healing prompt injection before hard no-progress termination") + } + if executeCalls != 3 { + t.Fatalf("expected 3 tool executions before no-progress termination, got %d", executeCalls) } if providerCalls != 3 { t.Fatalf("expected 3 provider turns before no-progress termination, got %d", providerCalls) @@ -233,20 +300,83 @@ func TestRepeatCycleStreakNoLongerStopsRunAndInjectsReminder(t *testing.T) { Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger repeat loop")}, }) if err != nil { + t.Fatalf("expected run to recover after repeat-cycle reminder, got %v", err) + } + + events := collectRuntimeEvents(service.Events()) + assertStopReasonDecided(t, events, controlplane.StopReasonAccepted, "") + + if !promptInjected { + t.Fatal("expected repeat self-healing prompt injection before recovery") + } + if executeCalls != 4 { + t.Fatalf("expected 4 repeated tool executions before repeat reminder recovery, got %d", executeCalls) + } + if providerCalls != 5 { + t.Fatalf("expected 5 provider turns including recovery response, got %d", providerCalls) + } +} + +func TestRepeatCycleTerminatesAfterReminderIfStillStalled(t *testing.T) { + t.Setenv("TEST_KEY", "dummy") + + cfg := config.Config{ + Providers: []config.ProviderConfig{{Name: "test-repeat-hard-stop", Driver: "test", BaseURL: "http://localhost", Model: "test", APIKeyEnv: "TEST_KEY"}}, + SelectedProvider: "test-repeat-hard-stop", + Workdir: t.TempDir(), + Runtime: config.RuntimeConfig{ + MaxNoProgressStreak: 10, + MaxRepeatCycleStreak: 3, + }, + } + + var executeCalls int32 + var providerCalls int32 + toolManager := &stubToolManager{ + specs: []providertypes.ToolSpec{{Name: "tool_repeat"}}, + executeFn: func(ctx context.Context, input tools.ToolCallInput) (tools.ToolResult, error) { + atomic.AddInt32(&executeCalls, 1) + return tools.ToolResult{Name: input.Name, Content: "ok", IsError: false}, nil + }, + } + + var promptInjected bool + providerFactory := &scriptedProviderFactory{ + provider: &scriptedProvider{ + chatFn: func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { + atomic.AddInt32(&providerCalls, 1) + if strings.Contains(req.SystemPrompt, selfHealingRepeatReminder) { + promptInjected = true + } + events <- providertypes.NewToolCallStartStreamEvent(0, "call_repeat", "tool_repeat") + events <- providertypes.NewToolCallDeltaStreamEvent(0, "call_repeat", `{"path":"x"}`) + events <- providertypes.NewMessageDoneStreamEvent("tool_calls", nil) + return nil + }, + }, + } + + manager := config.NewManager(config.NewLoader(t.TempDir(), &cfg)) + service := NewWithFactory(manager, toolManager, newMemoryStore(), providerFactory, nil) + + if err := service.Run(context.Background(), UserInput{ + RunID: "run-repeat-hard-stop", + Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger unrecovered repeat loop")}, + }); err != nil { t.Fatalf("expected run to stop cleanly on repeat-cycle, got %v", err) } events := collectRuntimeEvents(service.Events()) assertStopReasonDecided(t, events, controlplane.StopReasonRepeatCycle, "") - if promptInjected { - t.Fatal("did not expect repeat self-healing prompt injection after hard repeat-cycle termination") + if !promptInjected { + t.Fatal("expected repeat self-healing prompt injection before hard repeat-cycle termination") } - if executeCalls != 4 { - t.Fatalf("expected repeated tool executions to stop at repeat limit, got %d", executeCalls) + if executeCalls != 5 { + t.Fatalf("expected 5 repeated tool executions before repeat-cycle termination, got %d", executeCalls) } - if providerCalls != 4 { - t.Fatalf("expected 4 provider turns before repeat-cycle termination, got %d", providerCalls) + if providerCalls != 5 { + t.Fatalf("expected 5 provider turns before repeat-cycle termination, got %d", providerCalls) } } @@ -306,14 +436,16 @@ func TestRepeatCycleFailedCallsNoLongerHardStop(t *testing.T) { Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger repeat fail loop")}, }) if err != nil { - t.Fatalf("expected run to stop cleanly on repeat-cycle, got %v", err) + t.Fatalf("expected run to recover after repeat-cycle reminder, got %v", err) } if executeCalls != 4 { - t.Fatalf("expected failed repeated calls to stop at repeat limit, got %d", executeCalls) + t.Fatalf("expected 4 failed repeated calls before recovery, got %d", executeCalls) } - if providerCalls != 4 { - t.Fatalf("expected 4 provider turns before repeat-cycle termination, got %d", providerCalls) + if providerCalls != 5 { + t.Fatalf("expected 5 provider turns including recovery response, got %d", providerCalls) } + events := collectRuntimeEvents(service.Events()) + assertStopReasonDecided(t, events, controlplane.StopReasonAccepted, "") } func TestRunStopsWhenMaxTurnsReached(t *testing.T) { From 887e07107fe905f55aa4d6be9e9e21c78c1a7939 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sat, 9 May 2026 19:25:08 +0800 Subject: [PATCH 07/15] =?UTF-8?q?fix(runtime)=EF=BC=9A=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=B7=A5=E5=85=B7=E8=B0=83=E7=94=A8=E6=97=B6=E9=95=BF=E9=98=B2?= =?UTF-8?q?=E6=AD=A2=E8=B6=85=E6=97=B6=E4=B8=AD=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/config/runtime.go | 2 +- internal/repository/path.go | 4 +- .../repository/repository_coverage_test.go | 37 +++++- internal/runtime/permission.go | 106 +++++++++++++++++- internal/runtime/permission_test.go | 104 +++++++++++++++++ internal/runtime/state.go | 2 + internal/tools/codebase/searchsymbol.go | 2 +- internal/tools/codebase/searchtext.go | 2 +- 8 files changed, 250 insertions(+), 9 deletions(-) diff --git a/internal/config/runtime.go b/internal/config/runtime.go index 57dd1375..a28991e9 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -8,7 +8,7 @@ import ( ) const ( - DefaultMaxNoProgressStreak = 5 + DefaultMaxNoProgressStreak = 6 DefaultMaxRepeatCycleStreak = 3 DefaultMaxTurns = 90 ) diff --git a/internal/repository/path.go b/internal/repository/path.go index b88d2fe3..83855dbc 100644 --- a/internal/repository/path.go +++ b/internal/repository/path.go @@ -185,7 +185,9 @@ func walkWorkspaceFiles( func skipDirEntry(entry fs.DirEntry) bool { name := strings.ToLower(strings.TrimSpace(entry.Name())) switch name { - case ".git", ".idea", ".vscode", "node_modules": + case ".git", ".idea", ".vscode", "node_modules", + ".cache", ".tmp", "tmp", "build", "dist", "out", "target", "coverage", + ".next", ".nuxt", ".turbo", ".parcel-cache", ".vite", "vendor", "bin", "obj": return true default: return false diff --git a/internal/repository/repository_coverage_test.go b/internal/repository/repository_coverage_test.go index b025de3b..c0a30232 100644 --- a/internal/repository/repository_coverage_test.go +++ b/internal/repository/repository_coverage_test.go @@ -376,7 +376,14 @@ func TestRepositoryHelpersAndGitParsing(t *testing.T) { workdir := t.TempDir() mustWriteRepositoryFile(t, filepath.Join(workdir, "pkg", "a.go"), "package pkg\n\nconst Name = \"Widget\"\n") mustWriteRepositoryFile(t, filepath.Join(workdir, "pkg", "b.txt"), "Widget appears twice\nWidget\n") - mustWriteRepositoryFile(t, filepath.Join(workdir, "node_modules", "ignored.txt"), "ignored") + skippedDirs := []string{ + ".cache", ".tmp", "tmp", "build", "dist", "out", "target", "coverage", + ".next", ".nuxt", ".turbo", ".parcel-cache", ".vite", "vendor", "bin", "obj", + "node_modules", + } + for _, dir := range skippedDirs { + mustWriteRepositoryFile(t, filepath.Join(workdir, dir, "ignored.txt"), "ignored") + } if _, _, _, err := normalizeRetrievalQuery(workdir, RetrievalQuery{Mode: RetrievalModePath, Value: " "}); err == nil { t.Fatal("expected empty query error") @@ -417,14 +424,20 @@ func TestRepositoryHelpersAndGitParsing(t *testing.T) { var visited []string err = walkWorkspaceFiles(context.Background(), workdir, workdir, func(path string) error { - visited = append(visited, filepath.Base(path)) + rel, relErr := filepath.Rel(workdir, path) + if relErr != nil { + return relErr + } + visited = append(visited, filepath.Clean(rel)) return nil }) if err != nil { t.Fatalf("walkWorkspaceFiles() error = %v", err) } - if slices.Contains(visited, "ignored.txt") { - t.Fatalf("expected node_modules to be skipped, got %v", visited) + for _, dir := range skippedDirs { + if slices.Contains(visited, filepath.Clean(filepath.Join(dir, "ignored.txt"))) { + t.Fatalf("expected %s to be skipped, got %v", dir, visited) + } } stopErr := errors.New("stop") if err := walkWorkspaceFiles(context.Background(), workdir, workdir, func(path string) error { return stopErr }); !errors.Is(err, stopErr) { @@ -508,6 +521,8 @@ func TestRepositoryReadSearchAndServiceEntrypoints(t *testing.T) { mustWriteRepositoryFile(t, filepath.Join(workdir, "notes.py"), "def py_symbol():\n return 1\n") mustWriteRepositoryFile(t, filepath.Join(workdir, ".env"), "SECRET=1\n") mustWriteRepositoryFile(t, filepath.Join(workdir, "pkg", "bin.dat"), string([]byte{0x00, 0x01, 0x02})) + mustWriteRepositoryFile(t, filepath.Join(workdir, ".cache", "hidden.txt"), "hidden_alpha\n") + mustWriteRepositoryFile(t, filepath.Join(workdir, "build", "hidden.go"), "package build\n\nfunc HiddenWidget() {}\n") service := NewService() @@ -539,6 +554,13 @@ func TestRepositoryReadSearchAndServiceEntrypoints(t *testing.T) { if len(textResult.Hits) != 1 || !textResult.Truncated || textResult.TotalCount == 0 { t.Fatalf("unexpected text search result: %+v", textResult) } + skippedTextResult, err := service.SearchText(context.Background(), workdir, "hidden_alpha", SearchOptions{Limit: 10}) + if err != nil { + t.Fatalf("SearchText(skipped) error = %v", err) + } + if len(skippedTextResult.Hits) != 0 { + t.Fatalf("expected skipped directory text to be ignored, got %+v", skippedTextResult) + } symbolResult, err := service.SearchSymbol(context.Background(), workdir, "BuildWidget", SearchOptions{Limit: 10}) if err != nil { @@ -563,6 +585,13 @@ func TestRepositoryReadSearchAndServiceEntrypoints(t *testing.T) { if len(fallbackResult.Hits) == 0 || fallbackResult.Hits[0].Kind != "reference" { t.Fatalf("unexpected fallback symbol result: %+v", fallbackResult) } + skippedSymbolResult, err := service.SearchSymbol(context.Background(), workdir, "HiddenWidget", SearchOptions{Limit: 10}) + if err != nil { + t.Fatalf("SearchSymbol(skipped) error = %v", err) + } + if len(skippedSymbolResult.Hits) != 0 { + t.Fatalf("expected skipped directory symbol to be ignored, got %+v", skippedSymbolResult) + } if got := extractGoSignature("func BuildWidget(\n\tname string,\n) string {\n\treturn name\n}\n", 1); !strings.Contains(got, "name string") { t.Fatalf("extractGoSignature(multiline) = %q", got) diff --git a/internal/runtime/permission.go b/internal/runtime/permission.go index b61bb253..e3645f04 100644 --- a/internal/runtime/permission.go +++ b/internal/runtime/permission.go @@ -57,8 +57,10 @@ const ( minInlineSubAgentToolTimeout = 30 * time.Second defaultDiagnoseToolTimeout = 60 * time.Second defaultPermissionToolTimeout = 20 * time.Second + defaultCodebaseSearchToolTimeout = 60 * time.Second defaultAskUserToolTimeout = 5 * time.Minute maxAskUserToolTimeout = time.Hour + maxAdaptiveToolTimeout = 160 * time.Second ) // permissionExecutionInput 汇总一次工具执行与审批协作所需的上下文。 @@ -168,24 +170,28 @@ func (s *Service) executeToolCallWithPermission(ctx context.Context, input permi } callInput.SubAgentInvoker = newRuntimeSubAgentInvoker(s, input.RunID, input.SessionID, input.AgentID, input.Workdir) - effectiveTimeout := resolveToolExecutionTimeout(input.Call, input.ToolTimeout) + baseTimeout := resolveToolExecutionTimeout(input.Call, input.ToolTimeout) + effectiveTimeout := resolveAdaptiveToolExecutionTimeout(input.State, input.Call, baseTimeout) runCtx, cancel := context.WithTimeout(ctx, effectiveTimeout) defer cancel() if s.runnerToolDispatcher != nil { result, handled, dispatchErr := s.runnerToolDispatcher.TryDispatch(runCtx, input.SessionID, input.RunID, callInput) if handled { + recordAdaptiveToolTimeoutResult(input.State, input.Call, result, dispatchErr) return result, dispatchErr } } result, execErr := s.toolManager.Execute(runCtx, callInput) if execErr == nil { + recordAdaptiveToolTimeoutResult(input.State, input.Call, result, nil) return result, nil } var permissionErr *tools.PermissionDecisionError if !errors.As(execErr, &permissionErr) { + recordAdaptiveToolTimeoutResult(input.State, input.Call, result, execErr) return result, execErr } @@ -220,6 +226,7 @@ func (s *Service) executeToolCallWithPermission(ctx context.Context, input permi Reason: reason, Enforced: true, }) + recordAdaptiveToolTimeoutResult(input.State, input.Call, blockedResult, errors.New(reason)) return blockedResult, errors.New(reason) } @@ -233,6 +240,7 @@ func (s *Service) executeToolCallWithPermission(ctx context.Context, input permi permissionResolutionStatus(permissionErr.Decision()), permissionErr.RememberScope(), ) + recordAdaptiveToolTimeoutResult(input.State, input.Call, result, execErr) return result, execErr } @@ -296,6 +304,7 @@ func (s *Service) executeToolCallWithPermission(ctx context.Context, input permi retryCtx, retryCancel := context.WithTimeout(ctx, effectiveTimeout) retryResult, retryErr := s.toolManager.Execute(retryCtx, callInput) retryCancel() + recordAdaptiveToolTimeoutResult(input.State, input.Call, retryResult, retryErr) return retryResult, retryErr } @@ -312,6 +321,12 @@ func resolveToolExecutionTimeout(call providertypes.ToolCall, fallback time.Dura } return base } + if isCodebaseSearchTool(name) { + if base < defaultCodebaseSearchToolTimeout { + return defaultCodebaseSearchToolTimeout + } + return base + } if strings.EqualFold(name, tools.ToolNameAskUser) { requested := parseAskUserTimeoutFromArguments(call.Arguments) if requested <= 0 { @@ -341,6 +356,95 @@ func resolveToolExecutionTimeout(call providertypes.ToolCall, fallback time.Dura return base } +// isCodebaseSearchTool 识别会做代码库遍历的搜索工具,用于给首轮执行预留更合理的时间。 +func isCodebaseSearchTool(name string) bool { + return strings.EqualFold(name, tools.ToolNameCodebaseSearchText) || + strings.EqualFold(name, tools.ToolNameCodebaseSearchSymbol) +} + +// resolveAdaptiveToolExecutionTimeout 根据同一 Run 内同签名工具的 timeout 次数指数放大超时。 +func resolveAdaptiveToolExecutionTimeout(state *runState, call providertypes.ToolCall, base time.Duration) time.Duration { + if state == nil || !supportsAdaptiveToolTimeout(call.Name) { + return base + } + key := toolTimeoutBackoffKey(call) + if key == "" { + return base + } + state.mu.Lock() + attempts := state.toolTimeoutBackoff[key] + state.mu.Unlock() + timeout := base + for attempts > 0 && timeout < maxAdaptiveToolTimeout { + timeout *= 2 + if timeout > maxAdaptiveToolTimeout { + timeout = maxAdaptiveToolTimeout + } + attempts-- + } + return timeout +} + +// recordAdaptiveToolTimeoutResult 记录工具 timeout 结果;成功或非 timeout 错误会清除该签名的倍增状态。 +func recordAdaptiveToolTimeoutResult(state *runState, call providertypes.ToolCall, result tools.ToolResult, err error) { + if state == nil || !supportsAdaptiveToolTimeout(call.Name) { + return + } + key := toolTimeoutBackoffKey(call) + if key == "" { + return + } + state.mu.Lock() + defer state.mu.Unlock() + if toolExecutionTimedOut(result, err) { + if state.toolTimeoutBackoff == nil { + state.toolTimeoutBackoff = make(map[string]int) + } + state.toolTimeoutBackoff[key]++ + return + } + delete(state.toolTimeoutBackoff, key) +} + +// supportsAdaptiveToolTimeout 仅对普通工具调用启用倍增,避免覆盖交互/子代理等自带超时语义。 +func supportsAdaptiveToolTimeout(name string) bool { + normalized := strings.TrimSpace(name) + if normalized == "" { + return false + } + switch { + case strings.EqualFold(normalized, tools.ToolNameAskUser), + strings.EqualFold(normalized, tools.ToolNameSpawnSubAgent), + strings.EqualFold(normalized, tools.ToolNameDiagnose): + return false + default: + return true + } +} + +// toolTimeoutBackoffKey 将工具名和规范化参数组合为本轮 timeout 倍增键。 +func toolTimeoutBackoffKey(call providertypes.ToolCall) string { + signature := computeToolSignature([]providertypes.ToolCall{call}) + if strings.TrimSpace(signature) == "" { + return "" + } + return strings.ToLower(strings.TrimSpace(call.Name)) + "\x00" + signature +} + +// toolExecutionTimedOut 判断工具结果是否代表执行超时。 +func toolExecutionTimedOut(result tools.ToolResult, err error) bool { + if errors.Is(err, context.DeadlineExceeded) { + return true + } + if strings.EqualFold(strings.TrimSpace(result.ErrorClass), "timeout") { + return true + } + content := strings.ToLower(strings.TrimSpace(result.Content)) + return strings.Contains(content, "context deadline exceeded") || + strings.Contains(content, "timed out") || + strings.Contains(content, "timeout") +} + // parseAskUserTimeoutFromArguments 解析 ask_user 的 timeout_sec,并返回持续时间。 func parseAskUserTimeoutFromArguments(raw string) time.Duration { if strings.TrimSpace(raw) == "" { diff --git a/internal/runtime/permission_test.go b/internal/runtime/permission_test.go index 7a1ae74b..090e576c 100644 --- a/internal/runtime/permission_test.go +++ b/internal/runtime/permission_test.go @@ -11,6 +11,7 @@ import ( providertypes "neo-code/internal/provider/types" approvalflow "neo-code/internal/runtime/approval" "neo-code/internal/security" + agentsession "neo-code/internal/session" "neo-code/internal/tools" "neo-code/internal/tools/mcp" ) @@ -1332,6 +1333,109 @@ func TestResolveToolExecutionTimeoutForAskUser(t *testing.T) { } } +func TestResolveToolExecutionTimeoutForCodebaseSearch(t *testing.T) { + t.Parallel() + + base := 20 * time.Second + for _, name := range []string{tools.ToolNameCodebaseSearchText, tools.ToolNameCodebaseSearchSymbol} { + got := resolveToolExecutionTimeout(providertypes.ToolCall{ + Name: name, + Arguments: `{"query":"plan mode"}`, + }, base) + if got != defaultCodebaseSearchToolTimeout { + t.Fatalf("%s timeout = %v, want %v", name, got, defaultCodebaseSearchToolTimeout) + } + } + + largerBase := 90 * time.Second + got := resolveToolExecutionTimeout(providertypes.ToolCall{ + Name: tools.ToolNameCodebaseSearchText, + Arguments: `{"query":"plan mode"}`, + }, largerBase) + if got != largerBase { + t.Fatalf("expected larger base timeout %v to win, got %v", largerBase, got) + } +} + +func TestResolveAdaptiveToolExecutionTimeoutBackoff(t *testing.T) { + t.Parallel() + + state := newRunState("run-timeout-backoff", agentsession.New("timeout-backoff")) + call := providertypes.ToolCall{ + Name: tools.ToolNameBash, + Arguments: `{"command":"go test ./..."}`, + } + base := 20 * time.Second + + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 20*time.Second { + t.Fatalf("first timeout = %v, want 20s", got) + } + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{}, context.DeadlineExceeded) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 40*time.Second { + t.Fatalf("second timeout = %v, want 40s", got) + } + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{ErrorClass: "timeout"}, nil) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 80*time.Second { + t.Fatalf("third timeout = %v, want 80s", got) + } + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{Content: "command timed out"}, nil) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 160*time.Second { + t.Fatalf("fourth timeout = %v, want 160s", got) + } + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{Content: "timeout"}, nil) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 160*time.Second { + t.Fatalf("capped timeout = %v, want 160s", got) + } + + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{Name: tools.ToolNameBash, Content: "ok"}, nil) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != base { + t.Fatalf("reset timeout = %v, want %v", got, base) + } +} + +func TestResolveAdaptiveToolExecutionTimeoutForCodebaseSearch(t *testing.T) { + t.Parallel() + + state := newRunState("run-codebase-timeout-backoff", agentsession.New("codebase-timeout-backoff")) + call := providertypes.ToolCall{ + Name: tools.ToolNameCodebaseSearchText, + Arguments: `{"query":"plan mode"}`, + } + base := defaultCodebaseSearchToolTimeout + + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 60*time.Second { + t.Fatalf("first codebase timeout = %v, want 60s", got) + } + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{}, context.DeadlineExceeded) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 120*time.Second { + t.Fatalf("second codebase timeout = %v, want 120s", got) + } + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{ErrorClass: "timeout"}, nil) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 160*time.Second { + t.Fatalf("third codebase timeout = %v, want 160s", got) + } + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{Name: tools.ToolNameCodebaseSearchText, Content: "ok"}, nil) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != base { + t.Fatalf("reset codebase timeout = %v, want %v", got, base) + } +} + +func TestResolveAdaptiveToolExecutionTimeoutSkipsInteractiveTools(t *testing.T) { + t.Parallel() + + state := newRunState("run-timeout-no-backoff", agentsession.New("timeout-no-backoff")) + call := providertypes.ToolCall{ + Name: tools.ToolNameAskUser, + Arguments: `{"question_id":"q1","title":"T","kind":"text"}`, + } + base := 20 * time.Second + + recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{ErrorClass: "timeout"}, nil) + if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != base { + t.Fatalf("ask_user adaptive timeout = %v, want unchanged %v", got, base) + } +} + func TestResolveToolExecutionTimeoutFallbackAndHelpers(t *testing.T) { t.Parallel() diff --git a/internal/runtime/state.go b/internal/runtime/state.go index dd227232..d924e594 100644 --- a/internal/runtime/state.go +++ b/internal/runtime/state.go @@ -43,6 +43,7 @@ type runState struct { hasUnknownUsage bool completion controlplane.CompletionState progress controlplane.ProgressState + toolTimeoutBackoff map[string]int lastEndOfTurnCheckpointID string baselineCheckpointID string hookAnnotations []string @@ -65,6 +66,7 @@ func newRunState(runID string, session agentsession.Session) runState { reportedMissingSkills: make(map[string]struct{}), factsCollector: runtimefacts.NewCollector(), hookNotificationSeen: make(map[string]time.Time), + toolTimeoutBackoff: make(map[string]int), } } diff --git a/internal/tools/codebase/searchsymbol.go b/internal/tools/codebase/searchsymbol.go index ccf22101..e5d9286e 100644 --- a/internal/tools/codebase/searchsymbol.go +++ b/internal/tools/codebase/searchsymbol.go @@ -25,7 +25,7 @@ func (t *SearchSymbolTool) Name() string { } func (t *SearchSymbolTool) Description() string { - return "Search for symbol definitions across the workspace. Returns file paths, line hints, kind (function/type/method/etc.), and signature. Does NOT return the function body; use codebase_read to view implementation." + return "Search for symbol definitions across the workspace. Prefer scope_dir during exploration/plan mode to avoid expensive full-workspace scans. Returns file paths, line hints, kind (function/type/method/etc.), and signature. Does NOT return the function body; use codebase_read to view implementation." } func (t *SearchSymbolTool) Schema() map[string]any { diff --git a/internal/tools/codebase/searchtext.go b/internal/tools/codebase/searchtext.go index dda70073..f5f670c7 100644 --- a/internal/tools/codebase/searchtext.go +++ b/internal/tools/codebase/searchtext.go @@ -25,7 +25,7 @@ func (t *SearchTextTool) Name() string { } func (t *SearchTextTool) Description() string { - return "Search for text occurrences across the workspace. Returns file paths, line hints, and match counts. Does NOT return code snippets; use codebase_read to view content." + return "Search for text occurrences across the workspace. Prefer scope_dir during exploration/plan mode to avoid expensive full-workspace scans. Returns file paths, line hints, and match counts. Does NOT return code snippets; use codebase_read to view content." } func (t *SearchTextTool) Schema() map[string]any { From c0557fdaaa1439fae5df01fa130350b7279f9287 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sun, 10 May 2026 00:57:25 +0800 Subject: [PATCH 08/15] =?UTF-8?q?fix(runtime):=E5=88=A0=E9=99=A4noprogress?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/guides/configuration.md | 6 +- docs/stop-reason-and-decision-priority.md | 4 +- internal/config/config_test.go | 15 +- internal/config/runtime.go | 10 - internal/config/runtime_test.go | 17 +- internal/context/source_plan_mode.go | 7 + internal/promptasset/assets.go | 7 - internal/promptasset/assets_test.go | 3 - .../templates/context/plan_mode_plan.md | 4 +- internal/runtime/budget_models.go | 3 - internal/runtime/controlplane/progress.go | 179 ++--------- .../runtime/controlplane/progress_test.go | 226 ++++---------- internal/runtime/controlplane/stop_reason.go | 4 - internal/runtime/permission.go | 55 +++- internal/runtime/permission_test.go | 48 ++- internal/runtime/run.go | 44 +-- internal/runtime/runtime_progress_test.go | 289 ------------------ internal/runtime/runtime_test.go | 12 +- internal/runtime/todo_bootstrap.go | 36 +++ internal/runtime/toolexec.go | 57 +++- internal/runtime/turn_control.go | 106 +------ internal/runtime/turn_control_test.go | 164 ++++++---- internal/tools/filesystem/glob_test.go | 30 ++ internal/tools/filesystem/grep_test.go | 30 ++ internal/tools/filesystem/helpers.go | 4 +- internal/tools/filesystem/helpers_test.go | 9 + internal/tui/core/app/update.go | 4 - internal/tui/services/runtime_contract.go | 4 - 28 files changed, 472 insertions(+), 905 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 8fbb467b..64130bb0 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -26,7 +26,6 @@ tool_timeout_sec: 20 generate_start_timeout_sec: 90 runtime: - max_no_progress_streak: 5 max_repeat_cycle_streak: 3 max_turns: 90 hooks: @@ -108,9 +107,8 @@ context: | 字段 | 说明 | |------|------| -| `runtime.max_no_progress_streak` | 连续“无进展”轮次提醒阈值,默认 `5`;达到 `limit-1` 起会向模型注入纠偏提示,不会直接终止运行 | -| `runtime.max_repeat_cycle_streak` | 连续“重复调用同一工具参数”提醒阈值,默认 `3`;达到阈值后触发重复循环提醒,不会直接终止运行 | -| `runtime.max_turns` | 单次 Run 的最大推理轮数上限,默认 `40`;达到上限后直接终止并返回明确 stop reason | +| `runtime.max_repeat_cycle_streak` | 连续“相同工具签名 + 相同结果指纹 + 相同子目标”阈值,默认 `3`;达到阈值后先注入重复循环提醒,提醒后仍重复则终止为 `repeat_cycle` | +| `runtime.max_turns` | 单次 Run 的最大推理轮数上限,默认 `90`;达到上限后直接终止并返回明确 stop reason | | `runtime.hooks.enabled` | hooks 总开关;关闭后不执行 runtime hooks | | `runtime.hooks.user_hooks_enabled` | user hooks 开关;关闭后不加载 `runtime.hooks.items` | | `runtime.hooks.default_timeout_sec` | user hook 默认超时秒数,需 `> 0` | diff --git a/docs/stop-reason-and-decision-priority.md b/docs/stop-reason-and-decision-priority.md index b637c676..03ce1ec0 100644 --- a/docs/stop-reason-and-decision-priority.md +++ b/docs/stop-reason-and-decision-priority.md @@ -7,9 +7,11 @@ - `max_turn_exceeded` - `verification_failed` - `accepted` +- `missing_completion_signal` +- `accept_check_failed` - `todo_not_converged` - `todo_waiting_external` -- `no_progress_after_final_intercept` +- `repeat_cycle` - `max_turn_exceeded_with_unconverged_todos` - `max_turn_exceeded_with_failed_verification` - `verification_config_missing` diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 4e5b8c39..a26549d7 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -1735,7 +1735,6 @@ func TestValidateSnapshotPropagatesCompactError(t *testing.T) { }, }, Runtime: RuntimeConfig{ - MaxNoProgressStreak: 3, MaxRepeatCycleStreak: 3, }, Context: ContextConfig{ @@ -1844,7 +1843,7 @@ func TestParseCurrentConfigRoundTripRuntimeConfig(t *testing.T) { t.Parallel() snapshot := testDefaultConfig().Clone() - snapshot.Runtime.MaxNoProgressStreak = 5 + snapshot.Runtime.MaxRepeatCycleStreak = 5 data, err := marshalPersistedConfig(snapshot) if err != nil { @@ -1855,8 +1854,8 @@ func TestParseCurrentConfigRoundTripRuntimeConfig(t *testing.T) { if err != nil { t.Fatalf("parseCurrentConfig() error = %v", err) } - if parsed.Runtime.MaxNoProgressStreak != 5 { - t.Fatalf("expected max_no_progress_streak=5, got %d", parsed.Runtime.MaxNoProgressStreak) + if parsed.Runtime.MaxRepeatCycleStreak != 5 { + t.Fatalf("expected max_repeat_cycle_streak=5, got %d", parsed.Runtime.MaxRepeatCycleStreak) } } @@ -1868,7 +1867,7 @@ selected_provider: openai current_model: gpt-4.1 shell: bash runtime: - max_no_progress_streak: -2 + max_repeat_cycle_streak: -2 `) parsed, err := parseCurrentConfig(raw, StaticDefaults().Context, StaticDefaults().Memo) @@ -1880,9 +1879,9 @@ runtime: if err := parsed.ValidateSnapshot(); err != nil { t.Fatalf("ValidateSnapshot() error = %v", err) } - if parsed.Runtime.MaxNoProgressStreak != DefaultMaxNoProgressStreak { - t.Fatalf("expected default max_no_progress_streak=%d, got %d", - DefaultMaxNoProgressStreak, parsed.Runtime.MaxNoProgressStreak) + if parsed.Runtime.MaxRepeatCycleStreak != DefaultMaxRepeatCycleStreak { + t.Fatalf("expected default max_repeat_cycle_streak=%d, got %d", + DefaultMaxRepeatCycleStreak, parsed.Runtime.MaxRepeatCycleStreak) } } diff --git a/internal/config/runtime.go b/internal/config/runtime.go index a28991e9..36e035a2 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -8,14 +8,12 @@ import ( ) const ( - DefaultMaxNoProgressStreak = 6 DefaultMaxRepeatCycleStreak = 3 DefaultMaxTurns = 90 ) // RuntimeConfig 定义 runtime 层的可调参数。 type RuntimeConfig struct { - MaxNoProgressStreak int `yaml:"max_no_progress_streak,omitempty"` MaxRepeatCycleStreak int `yaml:"max_repeat_cycle_streak,omitempty"` MaxTurns int `yaml:"max_turns,omitempty"` Verification VerificationConfig `yaml:"verification,omitempty"` @@ -32,7 +30,6 @@ type RuntimeAssetsConfig struct { // defaultRuntimeConfig 返回 runtime 配置的静态默认值。 func defaultRuntimeConfig() RuntimeConfig { return RuntimeConfig{ - MaxNoProgressStreak: DefaultMaxNoProgressStreak, MaxRepeatCycleStreak: DefaultMaxRepeatCycleStreak, MaxTurns: DefaultMaxTurns, Verification: defaultVerificationConfig(), @@ -52,7 +49,6 @@ func defaultRuntimeAssetsConfig() RuntimeAssetsConfig { // Clone 复制 runtime 配置,避免调用方共享可变状态。 func (c RuntimeConfig) Clone() RuntimeConfig { return RuntimeConfig{ - MaxNoProgressStreak: c.MaxNoProgressStreak, MaxRepeatCycleStreak: c.MaxRepeatCycleStreak, MaxTurns: c.MaxTurns, Verification: c.Verification.Clone(), @@ -66,9 +62,6 @@ func (c *RuntimeConfig) ApplyDefaults(defaults RuntimeConfig) { if c == nil { return } - if c.MaxNoProgressStreak <= 0 { - c.MaxNoProgressStreak = defaults.MaxNoProgressStreak - } if c.MaxRepeatCycleStreak <= 0 { c.MaxRepeatCycleStreak = defaults.MaxRepeatCycleStreak } @@ -82,9 +75,6 @@ func (c *RuntimeConfig) ApplyDefaults(defaults RuntimeConfig) { // Validate 校验 runtime 配置是否满足最小约束。 func (c RuntimeConfig) Validate() error { - if c.MaxNoProgressStreak <= 0 { - return errors.New("max_no_progress_streak must be greater than 0") - } if c.MaxRepeatCycleStreak <= 0 { return errors.New("max_repeat_cycle_streak must be greater than 0") } diff --git a/internal/config/runtime_test.go b/internal/config/runtime_test.go index 5918c034..e88fdfd3 100644 --- a/internal/config/runtime_test.go +++ b/internal/config/runtime_test.go @@ -5,18 +5,15 @@ import "testing" func TestRuntimeConfigCloneAndDefaults(t *testing.T) { t.Parallel() - cfg := RuntimeConfig{MaxNoProgressStreak: 7, MaxRepeatCycleStreak: 4, MaxTurns: 21} + cfg := RuntimeConfig{MaxRepeatCycleStreak: 4, MaxTurns: 21} cloned := cfg.Clone() - if cloned.MaxNoProgressStreak != 7 || cloned.MaxRepeatCycleStreak != 4 || cloned.MaxTurns != 21 { + if cloned.MaxRepeatCycleStreak != 4 || cloned.MaxTurns != 21 { t.Fatalf("Clone() mismatch: %+v", cloned) } defaults := defaultRuntimeConfig() var zero RuntimeConfig zero.ApplyDefaults(defaults) - if zero.MaxNoProgressStreak != defaults.MaxNoProgressStreak { - t.Fatalf("MaxNoProgressStreak = %d, want %d", zero.MaxNoProgressStreak, defaults.MaxNoProgressStreak) - } if len(zero.Verification.Verifiers) == 0 { t.Fatal("expected default verifiers to be populated") } @@ -25,21 +22,17 @@ func TestRuntimeConfigCloneAndDefaults(t *testing.T) { func TestRuntimeConfigValidate(t *testing.T) { t.Parallel() - if err := (RuntimeConfig{MaxNoProgressStreak: 1, MaxRepeatCycleStreak: 1, MaxTurns: 1}).Validate(); err != nil { + if err := (RuntimeConfig{MaxRepeatCycleStreak: 1, MaxTurns: 1}).Validate(); err != nil { t.Fatalf("expected valid config, got %v", err) } - if err := (RuntimeConfig{MaxNoProgressStreak: 0, MaxRepeatCycleStreak: 1, MaxTurns: 1}).Validate(); err == nil { - t.Fatal("expected max_no_progress_streak validation error") - } - if err := (RuntimeConfig{MaxNoProgressStreak: 1, MaxRepeatCycleStreak: 0, MaxTurns: 1}).Validate(); err == nil { + if err := (RuntimeConfig{MaxRepeatCycleStreak: 0, MaxTurns: 1}).Validate(); err == nil { t.Fatal("expected max_repeat_cycle_streak validation error") } - if err := (RuntimeConfig{MaxNoProgressStreak: 1, MaxRepeatCycleStreak: 1, MaxTurns: -1}).Validate(); err == nil { + if err := (RuntimeConfig{MaxRepeatCycleStreak: 1, MaxTurns: -1}).Validate(); err == nil { t.Fatal("expected max_turns validation error") } err := (RuntimeConfig{ - MaxNoProgressStreak: 1, MaxRepeatCycleStreak: 1, MaxTurns: 1, Verification: VerificationConfig{ diff --git a/internal/context/source_plan_mode.go b/internal/context/source_plan_mode.go index eb727669..910a5425 100644 --- a/internal/context/source_plan_mode.go +++ b/internal/context/source_plan_mode.go @@ -31,6 +31,13 @@ func (planModeContextSource) Sections(ctx context.Context, input BuildInput) ([] } if input.CurrentPlan == nil { + if stage == "plan" { + noPlanHint := promptSection{ + Title: "Current Plan", + Content: "status: none\n\nNo current plan exists. You must create one by outputting a `plan_spec` + `summary_candidate` JSON before this turn ends.", + } + sections = append(sections, noPlanHint) + } return sections, nil } planSection := renderCurrentPlanSection(input.CurrentPlan, input.InjectFullPlan) diff --git a/internal/promptasset/assets.go b/internal/promptasset/assets.go index f50aed11..642cb2c2 100644 --- a/internal/promptasset/assets.go +++ b/internal/promptasset/assets.go @@ -22,8 +22,6 @@ const ( var coreSections = loadCoreSections() -var noProgressReminder = mustReadTemplate("templates/runtime/self_healing_no_progress.txt") - var repeatCycleReminder = mustReadTemplate("templates/runtime/self_healing_repeat_cycle.txt") var completionProtocolReminder = mustReadTemplate("templates/runtime/completion_protocol_reminder.md") @@ -51,11 +49,6 @@ func CoreSections() []Section { return append([]Section(nil), coreSections...) } -// NoProgressReminder 返回 runtime 无进展自愈提醒文案。 -func NoProgressReminder() string { - return noProgressReminder -} - // RepeatCycleReminder 返回 runtime 重复同参工具调用自愈提醒文案。 func RepeatCycleReminder() string { return repeatCycleReminder diff --git a/internal/promptasset/assets_test.go b/internal/promptasset/assets_test.go index f0a4a21c..03c4b13a 100644 --- a/internal/promptasset/assets_test.go +++ b/internal/promptasset/assets_test.go @@ -58,9 +58,6 @@ func TestCorePromptContainsOperationalGuidance(t *testing.T) { func TestRuntimeReminderTemplates(t *testing.T) { t.Parallel() - if !strings.Contains(NoProgressReminder(), "multiple consecutive attempts") { - t.Fatalf("expected no-progress reminder guidance, got %q", NoProgressReminder()) - } if !strings.Contains(RepeatCycleReminder(), "exact same arguments") { t.Fatalf("expected repeat-cycle reminder guidance, got %q", RepeatCycleReminder()) } diff --git a/internal/promptasset/templates/context/plan_mode_plan.md b/internal/promptasset/templates/context/plan_mode_plan.md index ff1706c7..0268dfdb 100644 --- a/internal/promptasset/templates/context/plan_mode_plan.md +++ b/internal/promptasset/templates/context/plan_mode_plan.md @@ -3,9 +3,11 @@ You are currently in the planning stage. - You may research, analyze, ask clarifying questions, and produce a plan. - Do not perform any write action in this stage. - Do not rewrite the current full plan unless the conversation clearly requires creating or replacing the plan itself. -- If you are only answering questions, comparing options, clarifying constraints, or refining details, do not output planning JSON. +- **If no Current Plan section is attached, your first priority is to produce a plan.** The user has entered planning mode expecting a structured plan. Research the codebase as needed, then output a complete `plan_spec` + `summary_candidate` JSON. Do not end the turn with only a conversational answer when there is no existing plan. +- If a Current Plan is already present, you may refine, replace, or discuss it. When the user asks a clarifying question or wants to explore options without committing to a new plan revision, you may answer conversationally without outputting planning JSON. - Only output a JSON object containing `plan_spec` and `summary_candidate` when you are explicitly creating or rewriting the current full plan. - `plan_spec` must include `goal`, `steps`, `constraints`, `verify`, `todos`, and `open_questions`. +- `plan_spec.todos` **must not be empty**. Populate it with the major actionable items that the plan requires. Each todo must have a unique `id`, a descriptive `content`, and `status: "pending"`. Without todos the plan has no executable work items and the build stage cannot proceed. - `summary_candidate` must include `goal`, `key_steps`, `constraints`, `verify`, and `active_todo_ids`. - If a Todo State section is attached, decide which non-terminal todos still belong to the current plan. - Todos that still belong to the current plan must appear in `plan_spec.todos` and their IDs must appear in `summary_candidate.active_todo_ids`. diff --git a/internal/runtime/budget_models.go b/internal/runtime/budget_models.go index e56d9768..12c8ec94 100644 --- a/internal/runtime/budget_models.go +++ b/internal/runtime/budget_models.go @@ -20,7 +20,6 @@ type TurnBudgetSnapshot struct { PromptBudget int BudgetSource string CompactCount int - NoProgressStreakLimit int RepeatCycleStreakLimit int InjectFullPlan bool ContextWindow int @@ -62,7 +61,6 @@ func newTurnBudgetSnapshot( promptBudget int, budgetSource string, compactCount int, - noProgressStreakLimit int, repeatCycleStreakLimit int, injectFullPlan bool, contextWindow int, @@ -84,7 +82,6 @@ func newTurnBudgetSnapshot( PromptBudget: promptBudget, BudgetSource: budgetSource, CompactCount: compactCount, - NoProgressStreakLimit: noProgressStreakLimit, RepeatCycleStreakLimit: repeatCycleStreakLimit, InjectFullPlan: injectFullPlan, ContextWindow: contextWindow, diff --git a/internal/runtime/controlplane/progress.go b/internal/runtime/controlplane/progress.go index 6120dd5e..f185ac43 100644 --- a/internal/runtime/controlplane/progress.go +++ b/internal/runtime/controlplane/progress.go @@ -1,21 +1,5 @@ package controlplane -// ProgressEvidenceKind 标识 runtime 聚合得到的结构化进展证据。 -type ProgressEvidenceKind string - -const ( - // EvidenceTaskStateChanged 表示任务状态发生合法迁移。 - EvidenceTaskStateChanged ProgressEvidenceKind = "TASK_STATE_CHANGED" - // EvidenceTodoStateChanged 表示 todo 列表发生结构化变化。 - EvidenceTodoStateChanged ProgressEvidenceKind = "TODO_STATE_CHANGED" - // EvidenceWriteApplied 表示本轮产生了有效文件改动。 - EvidenceWriteApplied ProgressEvidenceKind = "WRITE_APPLIED" - // EvidenceVerifyPassed 表示本轮存在明确的验证成功信号(仅与写入证据组合后算业务推进)。 - EvidenceVerifyPassed ProgressEvidenceKind = "VERIFY_PASSED" - // EvidenceNewInfoNonDup 表示本轮引入了去重后的新信息。 - EvidenceNewInfoNonDup ProgressEvidenceKind = "NEW_INFO_NON_DUP" -) - // SubgoalRelation 表示当前轮子目标与上一轮的关系。 type SubgoalRelation string @@ -28,7 +12,7 @@ const ( SubgoalRelationUnknown SubgoalRelation = "unknown" ) -// StalledProgressState 表示当前进展是否已进入软卡住状态。 +// StalledProgressState 表示当前重复循环检测是否已进入软卡住状态。 type StalledProgressState string const ( @@ -44,40 +28,23 @@ type ReminderKind string const ( // ReminderKindNone 表示当前轮无需注入提醒。 ReminderKindNone ReminderKind = "" - // ReminderKindNoProgress 表示应注入无进展提醒。 - ReminderKindNoProgress ReminderKind = "REMINDER_NO_PROGRESS" // ReminderKindRepeatCycle 表示应注入重复循环提醒。 ReminderKindRepeatCycle ReminderKind = "REMINDER_REPEAT_CYCLE" - // ReminderKindGenericStalled 表示应注入通用 stalled 提醒。 - ReminderKindGenericStalled ReminderKind = "REMINDER_GENERIC_STALLED" ) -// ProgressEvidenceRecord 描述一条结构化进展证据。 -type ProgressEvidenceRecord struct { - Kind ProgressEvidenceKind `json:"kind"` - Detail string `json:"detail,omitempty"` -} - -// ProgressScore 表示一次 progress 评估后的完整快照。 +// ProgressScore 表示一次重复循环检测后的快照。 type ProgressScore struct { - HasBusinessProgress bool `json:"has_business_progress"` - HasExplorationProgress bool `json:"has_exploration_progress"` - StrongEvidenceCount int `json:"strong_evidence_count"` - MediumEvidenceCount int `json:"medium_evidence_count"` - WeakEvidenceCount int `json:"weak_evidence_count"` - ExplorationStreak int `json:"exploration_streak"` - NoProgressStreak int `json:"no_progress_streak"` - RepeatCycleStreak int `json:"repeat_cycle_streak"` - SameToolSignature bool `json:"same_tool_signature"` - SameResultFingerprint bool `json:"same_result_fingerprint"` - SameSubgoal SubgoalRelation `json:"same_subgoal"` - StalledProgressState StalledProgressState `json:"stalled_progress_state"` - ReminderKind ReminderKind `json:"reminder_kind,omitempty"` - ShouldTerminate bool `json:"should_terminate"` - TerminateReason StopReason `json:"terminate_reason,omitempty"` + RepeatCycleStreak int `json:"repeat_cycle_streak"` + SameToolSignature bool `json:"same_tool_signature"` + SameResultFingerprint bool `json:"same_result_fingerprint"` + SameSubgoal SubgoalRelation `json:"same_subgoal"` + StalledProgressState StalledProgressState `json:"stalled_progress_state"` + ReminderKind ReminderKind `json:"reminder_kind,omitempty"` + ShouldTerminate bool `json:"should_terminate"` + TerminateReason StopReason `json:"terminate_reason,omitempty"` } -// ProgressState 保存跨轮 progress 判定所需的历史快照。 +// ProgressState 保存跨轮重复循环检测所需的历史快照。 type ProgressState struct { LastScore ProgressScore `json:"last_score"` LastToolSignature string `json:"last_tool_signature,omitempty"` @@ -85,27 +52,17 @@ type ProgressState struct { LastSubgoalFingerprint string `json:"last_subgoal_fingerprint,omitempty"` } -// ProgressInput 描述一次 progress 评估所需的事实输入。 +// ProgressInput 描述一次重复循环检测所需的指纹输入。 type ProgressInput struct { - RunState RunState - Evidence []ProgressEvidenceRecord CurrentToolSignature string ResultFingerprint string SubgoalFingerprint string - NoProgressLimit int RepeatCycleLimit int } -// EvaluateProgress 基于上一轮状态和本轮事实生成新的 progress 快照。 +// EvaluateProgress 基于上一轮指纹和本轮指纹检测 agent 是否陷入重复循环。 func EvaluateProgress(state ProgressState, input ProgressInput) ProgressState { next := ProgressScore{} - flags := summarizeEvidence(input.Evidence) - - next.StrongEvidenceCount = flags.strongCount - next.MediumEvidenceCount = flags.mediumCount - next.WeakEvidenceCount = flags.weakCount - next.HasBusinessProgress = flags.strongCount > 0 || (flags.hasWrite && flags.hasVerify) - next.HasExplorationProgress = !next.HasBusinessProgress && isExplorationProgress(input.RunState, flags) next.SameToolSignature = input.CurrentToolSignature != "" && state.LastToolSignature != "" && input.CurrentToolSignature == state.LastToolSignature @@ -114,38 +71,22 @@ func EvaluateProgress(state ProgressState, input ProgressInput) ProgressState { input.ResultFingerprint == state.LastResultFingerprint next.SameSubgoal = compareSubgoalFingerprint(state.LastSubgoalFingerprint, input.SubgoalFingerprint) - if next.HasBusinessProgress { - next.ExplorationStreak = 0 - next.NoProgressStreak = 0 - } else if next.HasExplorationProgress { - next.ExplorationStreak = state.LastScore.ExplorationStreak + 1 - next.NoProgressStreak = state.LastScore.NoProgressStreak - if next.ExplorationStreak > explorationWindowForPhase(input.RunState) { - next.NoProgressStreak++ - } - } else { - next.ExplorationStreak = 0 - next.NoProgressStreak = state.LastScore.NoProgressStreak + 1 - } - - if next.HasBusinessProgress { - next.RepeatCycleStreak = 0 - } else if next.SameToolSignature && next.SameResultFingerprint && next.SameSubgoal == SubgoalRelationSame { + if next.SameToolSignature && next.SameResultFingerprint && next.SameSubgoal == SubgoalRelationSame { next.RepeatCycleStreak = state.LastScore.RepeatCycleStreak + 1 } else { next.RepeatCycleStreak = 0 } - if shouldStall(next, input.NoProgressLimit, input.RepeatCycleLimit) { + if shouldStall(next, input.RepeatCycleLimit) { next.StalledProgressState = StalledProgressStalled - next.ReminderKind = selectReminderKind(next) + next.ReminderKind = ReminderKindRepeatCycle } else { next.StalledProgressState = StalledProgressHealthy next.ReminderKind = ReminderKindNone } if shouldTerminateAfterStalledReminder(state.LastScore, next) { next.ShouldTerminate = true - next.TerminateReason = stopReasonForReminderKind(next.ReminderKind) + next.TerminateReason = StopReasonRepeatCycle } return ProgressState{ @@ -156,7 +97,7 @@ func EvaluateProgress(state ProgressState, input ProgressInput) ProgressState { } } -// shouldTerminateAfterStalledReminder 只在同类 stalled 已提醒过一轮后才允许硬终止。 +// shouldTerminateAfterStalledReminder 只在 repeat stalled 已提醒过一轮后才允许硬终止。 func shouldTerminateAfterStalledReminder(previous ProgressScore, current ProgressScore) bool { if current.StalledProgressState != StalledProgressStalled || current.ReminderKind == ReminderKindNone { return false @@ -165,67 +106,6 @@ func shouldTerminateAfterStalledReminder(previous ProgressScore, current Progres previous.ReminderKind == current.ReminderKind } -// stopReasonForReminderKind 将当前 stalled 提醒类型映射为最终终止原因。 -func stopReasonForReminderKind(kind ReminderKind) StopReason { - switch kind { - case ReminderKindRepeatCycle: - return StopReasonRepeatCycle - case ReminderKindNoProgress, ReminderKindGenericStalled: - return StopReasonNoProgress - default: - return StopReasonNoProgress - } -} - -type evidenceFlags struct { - strongCount int - mediumCount int - weakCount int - hasWrite bool - hasVerify bool -} - -// summarizeEvidence 汇总本轮 evidence 的强中弱计数与关键标记。 -func summarizeEvidence(records []ProgressEvidenceRecord) evidenceFlags { - var flags evidenceFlags - for _, record := range records { - switch record.Kind { - case EvidenceTaskStateChanged, EvidenceTodoStateChanged: - flags.strongCount++ - case EvidenceWriteApplied, EvidenceVerifyPassed: - flags.mediumCount++ - case EvidenceNewInfoNonDup: - flags.weakCount++ - } - - switch record.Kind { - case EvidenceWriteApplied: - flags.hasWrite = true - case EvidenceVerifyPassed: - flags.hasVerify = true - } - } - return flags -} - -// isExplorationProgress 判断本轮是否属于可被宽容窗口吸收的探索型推进。 -func isExplorationProgress(runState RunState, flags evidenceFlags) bool { - if runState != RunStatePlan && runState != RunStateExecute { - return false - } - return flags.weakCount > 0 -} - -// explorationWindowForPhase 返回不同阶段允许的 exploration 宽容窗口。 -func explorationWindowForPhase(runState RunState) int { - switch runState { - case RunStatePlan, RunStateExecute: - return 15 - default: - return 0 - } -} - // compareSubgoalFingerprint 判断当前轮与上一轮的子目标关系。 func compareSubgoalFingerprint(previous string, current string) SubgoalRelation { if previous == "" && current == "" { @@ -240,24 +120,7 @@ func compareSubgoalFingerprint(previous string, current string) SubgoalRelation return SubgoalRelationDifferent } -// shouldStall 判断当前快照是否应进入 stalled。 -func shouldStall(score ProgressScore, noProgressLimit int, repeatLimit int) bool { - if repeatLimit > 0 && score.RepeatCycleStreak >= repeatLimit { - return true - } - if noProgressLimit > 0 && score.NoProgressStreak >= noProgressLimit { - return true - } - return false -} - -// selectReminderKind 选择 stalled 场景下应注入的提醒类型。 -func selectReminderKind(score ProgressScore) ReminderKind { - if score.RepeatCycleStreak > 0 && score.SameToolSignature && score.SameResultFingerprint { - return ReminderKindRepeatCycle - } - if score.NoProgressStreak > 0 { - return ReminderKindNoProgress - } - return ReminderKindGenericStalled +// shouldStall 判断当前快照是否应进入 repeat stalled。 +func shouldStall(score ProgressScore, repeatLimit int) bool { + return repeatLimit > 0 && score.RepeatCycleStreak >= repeatLimit } diff --git a/internal/runtime/controlplane/progress_test.go b/internal/runtime/controlplane/progress_test.go index 2fe2fd9e..288ebd05 100644 --- a/internal/runtime/controlplane/progress_test.go +++ b/internal/runtime/controlplane/progress_test.go @@ -2,124 +2,6 @@ package controlplane import "testing" -func TestEvaluateProgressBusinessProgressResetsStreaks(t *testing.T) { - t.Parallel() - - state := ProgressState{ - LastScore: ProgressScore{ - ExplorationStreak: 2, - NoProgressStreak: 3, - RepeatCycleStreak: 1, - }, - } - - got := EvaluateProgress(state, ProgressInput{ - RunState: RunStateExecute, - Evidence: []ProgressEvidenceRecord{ - {Kind: EvidenceTodoStateChanged}, - }, - NoProgressLimit: 3, - RepeatCycleLimit: 3, - }) - - if !got.LastScore.HasBusinessProgress { - t.Fatalf("expected business progress") - } - if got.LastScore.NoProgressStreak != 0 { - t.Fatalf("no-progress streak = %d, want 0", got.LastScore.NoProgressStreak) - } - if got.LastScore.RepeatCycleStreak != 0 { - t.Fatalf("repeat streak = %d, want 0", got.LastScore.RepeatCycleStreak) - } -} - -func TestEvaluateProgressExplorationUsesWindow(t *testing.T) { - t.Parallel() - - state := ProgressState{ - LastScore: ProgressScore{ - ExplorationStreak: 3, - NoProgressStreak: 1, - }, - } - - got := EvaluateProgress(state, ProgressInput{ - RunState: RunStatePlan, - Evidence: []ProgressEvidenceRecord{ - {Kind: EvidenceNewInfoNonDup}, - }, - NoProgressLimit: 3, - RepeatCycleLimit: 3, - }) - - if !got.LastScore.HasExplorationProgress { - t.Fatalf("expected exploration progress") - } - if got.LastScore.ExplorationStreak != 4 { - t.Fatalf("exploration streak = %d, want 4", got.LastScore.ExplorationStreak) - } - if got.LastScore.NoProgressStreak != 1 { - t.Fatalf("no-progress streak = %d, want unchanged 1", got.LastScore.NoProgressStreak) - } -} - -func TestEvaluateProgressExplorationExhaustionStartsNoProgress(t *testing.T) { - t.Parallel() - - state := ProgressState{ - LastScore: ProgressScore{ - ExplorationStreak: 15, - NoProgressStreak: 1, - }, - } - - got := EvaluateProgress(state, ProgressInput{ - RunState: RunStatePlan, - Evidence: []ProgressEvidenceRecord{ - {Kind: EvidenceNewInfoNonDup}, - }, - NoProgressLimit: 3, - RepeatCycleLimit: 3, - }) - - if got.LastScore.NoProgressStreak != 2 { - t.Fatalf("no-progress streak = %d, want 2", got.LastScore.NoProgressStreak) - } -} - -func TestEvaluateProgressNoProgressWarnsBeforeTerminate(t *testing.T) { - t.Parallel() - - first := EvaluateProgress(ProgressState{ - LastScore: ProgressScore{NoProgressStreak: 2}, - }, ProgressInput{ - RunState: RunStateExecute, - NoProgressLimit: 3, - RepeatCycleLimit: 3, - SubgoalFingerprint: "subgoal", - }) - - if first.LastScore.StalledProgressState != StalledProgressStalled { - t.Fatalf("first stalled state = %q, want %q", first.LastScore.StalledProgressState, StalledProgressStalled) - } - if first.LastScore.ReminderKind != ReminderKindNoProgress { - t.Fatalf("first reminder = %q, want %q", first.LastScore.ReminderKind, ReminderKindNoProgress) - } - if first.LastScore.ShouldTerminate { - t.Fatal("first stalled no-progress should warn before hard terminate") - } - - second := EvaluateProgress(first, ProgressInput{ - RunState: RunStateExecute, - NoProgressLimit: 3, - RepeatCycleLimit: 3, - SubgoalFingerprint: "subgoal", - }) - if !second.LastScore.ShouldTerminate || second.LastScore.TerminateReason != StopReasonNoProgress { - t.Fatalf("second score = %+v, want no-progress hard terminate", second.LastScore) - } -} - func TestEvaluateProgressRepeatCycleRequiresSameResultAndSubgoal(t *testing.T) { t.Parallel() @@ -131,11 +13,9 @@ func TestEvaluateProgressRepeatCycleRequiresSameResultAndSubgoal(t *testing.T) { } got := EvaluateProgress(state, ProgressInput{ - RunState: RunStateExecute, CurrentToolSignature: "sig", ResultFingerprint: "result", SubgoalFingerprint: "subgoal", - NoProgressLimit: 3, RepeatCycleLimit: 3, }) @@ -168,11 +48,9 @@ func TestEvaluateProgressRepeatCycleTerminatesAfterReminder(t *testing.T) { } got := EvaluateProgress(state, ProgressInput{ - RunState: RunStateExecute, CurrentToolSignature: "sig", ResultFingerprint: "result", SubgoalFingerprint: "subgoal", - NoProgressLimit: 10, RepeatCycleLimit: 3, }) @@ -181,51 +59,77 @@ func TestEvaluateProgressRepeatCycleTerminatesAfterReminder(t *testing.T) { } } -func TestEvaluateProgressUnknownSubgoalDoesNotAdvanceRepeat(t *testing.T) { +func TestEvaluateProgressDifferentToolResultOrSubgoalResetsRepeat(t *testing.T) { t.Parallel() + tests := []struct { + name string + input ProgressInput + same SubgoalRelation + }{ + { + name: "different tool", + input: ProgressInput{ + CurrentToolSignature: "other", + ResultFingerprint: "result", + SubgoalFingerprint: "subgoal", + RepeatCycleLimit: 3, + }, + same: SubgoalRelationSame, + }, + { + name: "different result", + input: ProgressInput{ + CurrentToolSignature: "sig", + ResultFingerprint: "other", + SubgoalFingerprint: "subgoal", + RepeatCycleLimit: 3, + }, + same: SubgoalRelationSame, + }, + { + name: "different subgoal", + input: ProgressInput{ + CurrentToolSignature: "sig", + ResultFingerprint: "result", + SubgoalFingerprint: "other", + RepeatCycleLimit: 3, + }, + same: SubgoalRelationDifferent, + }, + { + name: "unknown subgoal", + input: ProgressInput{ + CurrentToolSignature: "sig", + ResultFingerprint: "result", + SubgoalFingerprint: "", + RepeatCycleLimit: 3, + }, + same: SubgoalRelationUnknown, + }, + } + state := ProgressState{ - LastScore: ProgressScore{RepeatCycleStreak: 1}, + LastScore: ProgressScore{RepeatCycleStreak: 2}, LastToolSignature: "sig", LastResultFingerprint: "result", LastSubgoalFingerprint: "subgoal", } - - got := EvaluateProgress(state, ProgressInput{ - RunState: RunStateExecute, - CurrentToolSignature: "sig", - ResultFingerprint: "result", - SubgoalFingerprint: "", - NoProgressLimit: 3, - RepeatCycleLimit: 3, - }) - - if got.LastScore.SameSubgoal != SubgoalRelationUnknown { - t.Fatalf("same subgoal = %q, want %q", got.LastScore.SameSubgoal, SubgoalRelationUnknown) - } - if got.LastScore.RepeatCycleStreak != 0 { - t.Fatalf("repeat streak = %d, want 0", got.LastScore.RepeatCycleStreak) - } -} - -func TestEvaluateProgressVerifyPassedAloneIsNotBusinessProgress(t *testing.T) { - t.Parallel() - - got := EvaluateProgress(ProgressState{}, ProgressInput{ - RunState: RunStateVerify, - Evidence: []ProgressEvidenceRecord{ - {Kind: EvidenceVerifyPassed}, - }, - NoProgressLimit: 3, - RepeatCycleLimit: 3, - }) - if got.LastScore.HasBusinessProgress { - t.Fatalf("expected verify-passed alone to not count as business progress") - } - if got.LastScore.StrongEvidenceCount != 0 { - t.Fatalf("strong evidence = %d, want 0", got.LastScore.StrongEvidenceCount) - } - if got.LastScore.MediumEvidenceCount != 1 { - t.Fatalf("medium evidence = %d, want 1", got.LastScore.MediumEvidenceCount) + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := EvaluateProgress(state, tt.input) + if got.LastScore.SameSubgoal != tt.same { + t.Fatalf("same subgoal = %q, want %q", got.LastScore.SameSubgoal, tt.same) + } + if got.LastScore.RepeatCycleStreak != 0 { + t.Fatalf("repeat streak = %d, want 0", got.LastScore.RepeatCycleStreak) + } + if got.LastScore.StalledProgressState != StalledProgressHealthy { + t.Fatalf("stalled state = %q, want healthy", got.LastScore.StalledProgressState) + } + }) } } diff --git a/internal/runtime/controlplane/stop_reason.go b/internal/runtime/controlplane/stop_reason.go index f3dc5842..c7ff736e 100644 --- a/internal/runtime/controlplane/stop_reason.go +++ b/internal/runtime/controlplane/stop_reason.go @@ -24,10 +24,6 @@ const ( StopReasonTodoNotConverged StopReason = "todo_not_converged" // StopReasonTodoWaitingExternal 表示 required todo 仍在等待外部条件。 StopReasonTodoWaitingExternal StopReason = "todo_waiting_external" - // StopReasonNoProgressAfterFinalIntercept 表示 final 连续被拦截且没有新进展。 - StopReasonNoProgressAfterFinalIntercept StopReason = "no_progress_after_final_intercept" - // StopReasonNoProgress 表示运行连续缺少实质进展并触发硬终止。 - StopReasonNoProgress StopReason = "no_progress" // StopReasonRepeatCycle 表示运行重复相同动作/结果并触发硬终止。 StopReasonRepeatCycle StopReason = "repeat_cycle" // StopReasonMaxTurnExceededWithUnconvergedTodos 表示达到最大轮次时 todo 仍未收敛。 diff --git a/internal/runtime/permission.go b/internal/runtime/permission.go index e3645f04..79e8e3dd 100644 --- a/internal/runtime/permission.go +++ b/internal/runtime/permission.go @@ -57,7 +57,7 @@ const ( minInlineSubAgentToolTimeout = 30 * time.Second defaultDiagnoseToolTimeout = 60 * time.Second defaultPermissionToolTimeout = 20 * time.Second - defaultCodebaseSearchToolTimeout = 60 * time.Second + defaultWorkspaceScanToolTimeout = 60 * time.Second defaultAskUserToolTimeout = 5 * time.Minute maxAskUserToolTimeout = time.Hour maxAdaptiveToolTimeout = 160 * time.Second @@ -321,9 +321,9 @@ func resolveToolExecutionTimeout(call providertypes.ToolCall, fallback time.Dura } return base } - if isCodebaseSearchTool(name) { - if base < defaultCodebaseSearchToolTimeout { - return defaultCodebaseSearchToolTimeout + if isWorkspaceScanTool(name) { + if base < defaultWorkspaceScanToolTimeout { + return defaultWorkspaceScanToolTimeout } return base } @@ -356,10 +356,12 @@ func resolveToolExecutionTimeout(call providertypes.ToolCall, fallback time.Dura return base } -// isCodebaseSearchTool 识别会做代码库遍历的搜索工具,用于给首轮执行预留更合理的时间。 -func isCodebaseSearchTool(name string) bool { +// isWorkspaceScanTool 识别会遍历工作区的搜索工具,用于给首轮执行预留更合理的时间。 +func isWorkspaceScanTool(name string) bool { return strings.EqualFold(name, tools.ToolNameCodebaseSearchText) || - strings.EqualFold(name, tools.ToolNameCodebaseSearchSymbol) + strings.EqualFold(name, tools.ToolNameCodebaseSearchSymbol) || + strings.EqualFold(name, tools.ToolNameFilesystemGrep) || + strings.EqualFold(name, tools.ToolNameFilesystemGlob) } // resolveAdaptiveToolExecutionTimeout 根据同一 Run 内同签名工具的 timeout 次数指数放大超时。 @@ -424,6 +426,9 @@ func supportsAdaptiveToolTimeout(name string) bool { // toolTimeoutBackoffKey 将工具名和规范化参数组合为本轮 timeout 倍增键。 func toolTimeoutBackoffKey(call providertypes.ToolCall) string { + if isWorkspaceScanTool(call.Name) { + return workspaceScanToolTimeoutBackoffKey(call) + } signature := computeToolSignature([]providertypes.ToolCall{call}) if strings.TrimSpace(signature) == "" { return "" @@ -431,6 +436,42 @@ func toolTimeoutBackoffKey(call providertypes.ToolCall) string { return strings.ToLower(strings.TrimSpace(call.Name)) + "\x00" + signature } +// workspaceScanToolTimeoutBackoffKey 仅按扫描工具和范围聚合 timeout,避免换关键词后丢失退避状态。 +func workspaceScanToolTimeoutBackoffKey(call providertypes.ToolCall) string { + name := strings.ToLower(strings.TrimSpace(call.Name)) + if name == "" { + return "" + } + return name + "\x00" + workspaceScanScopeFromArguments(call.Arguments) +} + +// workspaceScanScopeFromArguments 从搜索工具参数中抽取扫描范围,解析失败时回落到全工作区范围。 +func workspaceScanScopeFromArguments(raw string) string { + if strings.TrimSpace(raw) == "" { + return "." + } + var payload struct { + Dir string `json:"dir"` + ScopeDir string `json:"scope_dir"` + Workdir string `json:"workdir"` + } + if err := json.Unmarshal([]byte(raw), &payload); err != nil { + return "." + } + scope := strings.TrimSpace(payload.ScopeDir) + if scope == "" { + scope = strings.TrimSpace(payload.Dir) + } + if scope == "" { + scope = "." + } + workdir := strings.TrimSpace(payload.Workdir) + if workdir == "" { + return scope + } + return workdir + "/" + scope +} + // toolExecutionTimedOut 判断工具结果是否代表执行超时。 func toolExecutionTimedOut(result tools.ToolResult, err error) bool { if errors.Is(err, context.DeadlineExceeded) { diff --git a/internal/runtime/permission_test.go b/internal/runtime/permission_test.go index 090e576c..269a373b 100644 --- a/internal/runtime/permission_test.go +++ b/internal/runtime/permission_test.go @@ -1333,17 +1333,22 @@ func TestResolveToolExecutionTimeoutForAskUser(t *testing.T) { } } -func TestResolveToolExecutionTimeoutForCodebaseSearch(t *testing.T) { +func TestResolveToolExecutionTimeoutForWorkspaceScanTools(t *testing.T) { t.Parallel() base := 20 * time.Second - for _, name := range []string{tools.ToolNameCodebaseSearchText, tools.ToolNameCodebaseSearchSymbol} { + for _, name := range []string{ + tools.ToolNameCodebaseSearchText, + tools.ToolNameCodebaseSearchSymbol, + tools.ToolNameFilesystemGrep, + tools.ToolNameFilesystemGlob, + } { got := resolveToolExecutionTimeout(providertypes.ToolCall{ Name: name, Arguments: `{"query":"plan mode"}`, }, base) - if got != defaultCodebaseSearchToolTimeout { - t.Fatalf("%s timeout = %v, want %v", name, got, defaultCodebaseSearchToolTimeout) + if got != defaultWorkspaceScanToolTimeout { + t.Fatalf("%s timeout = %v, want %v", name, got, defaultWorkspaceScanToolTimeout) } } @@ -1393,22 +1398,33 @@ func TestResolveAdaptiveToolExecutionTimeoutBackoff(t *testing.T) { } } -func TestResolveAdaptiveToolExecutionTimeoutForCodebaseSearch(t *testing.T) { +func TestResolveAdaptiveToolExecutionTimeoutForWorkspaceScanTools(t *testing.T) { t.Parallel() state := newRunState("run-codebase-timeout-backoff", agentsession.New("codebase-timeout-backoff")) call := providertypes.ToolCall{ Name: tools.ToolNameCodebaseSearchText, - Arguments: `{"query":"plan mode"}`, + Arguments: `{"query":"plan mode","scope_dir":"internal/runtime"}`, + } + sameScopeDifferentQuery := providertypes.ToolCall{ + Name: tools.ToolNameCodebaseSearchText, + Arguments: `{"query":"todo","scope_dir":"internal/runtime"}`, + } + differentScope := providertypes.ToolCall{ + Name: tools.ToolNameCodebaseSearchText, + Arguments: `{"query":"todo","scope_dir":"internal/session"}`, } - base := defaultCodebaseSearchToolTimeout + base := defaultWorkspaceScanToolTimeout if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 60*time.Second { t.Fatalf("first codebase timeout = %v, want 60s", got) } recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{}, context.DeadlineExceeded) - if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 120*time.Second { - t.Fatalf("second codebase timeout = %v, want 120s", got) + if got := resolveAdaptiveToolExecutionTimeout(&state, sameScopeDifferentQuery, base); got != 120*time.Second { + t.Fatalf("same-scope codebase timeout = %v, want 120s", got) + } + if got := resolveAdaptiveToolExecutionTimeout(&state, differentScope, base); got != base { + t.Fatalf("different-scope codebase timeout = %v, want %v", got, base) } recordAdaptiveToolTimeoutResult(&state, call, tools.ToolResult{ErrorClass: "timeout"}, nil) if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != 160*time.Second { @@ -1418,6 +1434,20 @@ func TestResolveAdaptiveToolExecutionTimeoutForCodebaseSearch(t *testing.T) { if got := resolveAdaptiveToolExecutionTimeout(&state, call, base); got != base { t.Fatalf("reset codebase timeout = %v, want %v", got, base) } + + grepState := newRunState("run-grep-timeout-backoff", agentsession.New("grep-timeout-backoff")) + grepCall := providertypes.ToolCall{ + Name: tools.ToolNameFilesystemGrep, + Arguments: `{"pattern":"CurrentPlan","dir":"internal/runtime"}`, + } + grepSameDir := providertypes.ToolCall{ + Name: tools.ToolNameFilesystemGrep, + Arguments: `{"pattern":"PlanArtifact","dir":"internal/runtime"}`, + } + recordAdaptiveToolTimeoutResult(&grepState, grepCall, tools.ToolResult{}, context.DeadlineExceeded) + if got := resolveAdaptiveToolExecutionTimeout(&grepState, grepSameDir, base); got != 120*time.Second { + t.Fatalf("same-dir grep timeout = %v, want 120s", got) + } } func TestResolveAdaptiveToolExecutionTimeoutSkipsInteractiveTools(t *testing.T) { diff --git a/internal/runtime/run.go b/internal/runtime/run.go index e11ee091..4784f4ca 100644 --- a/internal/runtime/run.go +++ b/internal/runtime/run.go @@ -25,8 +25,6 @@ import ( "neo-code/internal/tools" ) -var selfHealingReminder = promptasset.NoProgressReminder() - var selfHealingRepeatReminder = promptasset.RepeatCycleReminder() const ( @@ -63,20 +61,6 @@ func computeToolSignature(calls []providertypes.ToolCall) string { return hex.EncodeToString(hash[:]) } -// computeTodoStateSignature 计算当前 Todo 列表的状态签名,用于识别 dispatch 是否产生了真实状态变化。 -func computeTodoStateSignature(items []agentsession.TodoItem) string { - normalized := cloneTodosForPersistence(items) - if len(normalized) == 0 { - return "" - } - encoded, err := json.Marshal(normalized) - if err != nil { - return "" - } - hash := sha256.Sum256(encoded) - return hex.EncodeToString(hash[:]) -} - // Run 执行一次完整的 ReAct 闭环:保存用户输入、驱动模型、执行工具并发出事件。 // 已有会话会先加锁再加载/更新,确保同一会话并发 Run 不会出现状态覆盖; // 新会话在创建后再绑定会话锁,不同会话可并行执行。 @@ -221,6 +205,9 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { if err := s.maybeAppendTodoBootstrapReminder(ctx, &state); err != nil { return s.handleRunError(err) } + if err := s.maybeAppendPlanBootstrapReminder(ctx, &state); err != nil { + return s.handleRunError(err) + } s.emitRuntimeSnapshotUpdated(ctx, &state, "session_start") s.updateResumeCheckpoint(ctx, &state, "plan", "") @@ -471,9 +458,6 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { return nil } - beforeTask := state.session.TaskState.Clone() - beforeTodos := cloneTodosForPersistence(state.session.Todos) - if err := s.setBaseRunState(ctx, &state, controlplane.RunStateExecute); err != nil { return s.handleRunError(err) } @@ -493,18 +477,10 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { state.completion = applyToolExecutionCompletion(state.completion, summary) afterTask := state.session.TaskState.Clone() afterTodos := cloneTodosForPersistence(state.session.Todos) - progressRunState := controlplane.RunStateExecute - if resolvePlanningStageForState(&state) == planStagePlan { - progressRunState = controlplane.RunStatePlan - } progressInput := collectProgressInput( - progressRunState, - beforeTask, afterTask, - beforeTodos, afterTodos, summary, - snapshot.NoProgressStreakLimit, snapshot.RepeatCycleStreakLimit, ) state.progress = controlplane.EvaluateProgress(state.progress, progressInput) @@ -515,7 +491,7 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { if currentScore.ShouldTerminate { reason := currentScore.TerminateReason if reason == "" { - reason = controlplane.StopReasonNoProgress + reason = controlplane.StopReasonRepeatCycle } state.markTerminalDecision(controlplane.TerminalStatusIncomplete, reason, "progress hard stop") s.emitRunScoped(ctx, EventAgentDone, &state, providertypes.Message{Role: providertypes.RoleAssistant}) @@ -608,7 +584,6 @@ func (s *Service) prepareTurnBudgetSnapshot(ctx context.Context, state *runState score := state.progress.LastScore state.mu.Unlock() - limit := resolveNoProgressStreakLimit(cfg.Runtime) repeatLimit := resolveRepeatCycleStreakLimit(cfg.Runtime) systemPrompt := withProgressReminder(builtContext.SystemPrompt, score) if notificationHint := strings.TrimSpace(s.drainHookNotificationsForTurn(state)); notificationHint != "" { @@ -650,7 +625,6 @@ func (s *Service) prepareTurnBudgetSnapshot(ctx context.Context, state *runState promptBudget, budgetSource, state.compactCount, - limit, repeatLimit, injectFullPlan, contextWindow, @@ -658,14 +632,6 @@ func (s *Service) prepareTurnBudgetSnapshot(ctx context.Context, state *runState ), false, nil } -// resolveNoProgressStreakLimit 统一解析熔断阈值,避免运行期出现无效值导致分支行为不一致。 -func resolveNoProgressStreakLimit(rc config.RuntimeConfig) int { - if rc.MaxNoProgressStreak <= 0 { - return config.DefaultMaxNoProgressStreak - } - return rc.MaxNoProgressStreak -} - // resolveRepeatCycleStreakLimit 统一解析重复调用循环阈值。 func resolveRepeatCycleStreakLimit(rc config.RuntimeConfig) int { if rc.MaxRepeatCycleStreak <= 0 { @@ -1129,8 +1095,6 @@ func withProgressReminder(systemPrompt string, score controlplane.ProgressScore) switch score.ReminderKind { case controlplane.ReminderKindRepeatCycle: reminder = selfHealingRepeatReminder - case controlplane.ReminderKindNoProgress, controlplane.ReminderKindGenericStalled: - reminder = selfHealingReminder default: return systemPrompt } diff --git a/internal/runtime/runtime_progress_test.go b/internal/runtime/runtime_progress_test.go index 267c664f..0acae70f 100644 --- a/internal/runtime/runtime_progress_test.go +++ b/internal/runtime/runtime_progress_test.go @@ -2,7 +2,6 @@ package runtime import ( "context" - "strconv" "strings" "sync/atomic" "testing" @@ -16,230 +15,6 @@ import ( todotool "neo-code/internal/tools/todo" ) -func TestProgressStreakWarnsAndAllowsRecovery(t *testing.T) { - t.Setenv("TEST_KEY", "dummy") - - cfg := config.Config{ - Providers: []config.ProviderConfig{{Name: "test-progress", Driver: "test", BaseURL: "http://localhost", Model: "test", APIKeyEnv: "TEST_KEY"}}, - SelectedProvider: "test-progress", - Workdir: t.TempDir(), - Runtime: config.RuntimeConfig{ - MaxNoProgressStreak: 3, - MaxRepeatCycleStreak: 6, - }, - } - - toolManager := &stubToolManager{ - specs: []providertypes.ToolSpec{ - {Name: "tool_error"}, - }, - executeFn: func(ctx context.Context, input tools.ToolCallInput) (tools.ToolResult, error) { - // Always return error to avoid generating progress - return tools.ToolResult{Content: "error occurred", IsError: true}, nil - }, - } - - var promptInjected bool - var providerCalls int32 - var signatureSeq int32 - providerFactory := &scriptedProviderFactory{ - provider: &scriptedProvider{ - chatFn: func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { - atomic.AddInt32(&providerCalls, 1) - if strings.Contains(req.SystemPrompt, selfHealingReminder) { - promptInjected = true - events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\ndone") - events <- providertypes.NewMessageDoneStreamEvent("stop", nil) - return nil - } - seq := atomic.AddInt32(&signatureSeq, 1) - // the model always decides to call the tool - events <- providertypes.NewToolCallStartStreamEvent(0, "call_err", "tool_error") - events <- providertypes.NewToolCallDeltaStreamEvent( - 0, - "call_err", - `{"seq":`+strconv.FormatInt(int64(seq), 10)+`}`, - ) - events <- providertypes.NewMessageDoneStreamEvent("tool_calls", nil) - return nil - }, - }, - } - - manager := config.NewManager(config.NewLoader(t.TempDir(), &cfg)) - - service := NewWithFactory( - manager, - toolManager, - newMemoryStore(), - providerFactory, - nil, - ) - - input := UserInput{ - RunID: "run-progress", - Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger error loop")}, - } - - if err := service.Run(context.Background(), input); err != nil { - t.Fatalf("expected run to recover after no-progress reminder, got %v", err) - } - - events := collectRuntimeEvents(service.Events()) - assertStopReasonDecided(t, events, controlplane.StopReasonAccepted, "") - - if !promptInjected { - t.Error("expected self-healing prompt injection before recovery") - } - if providerCalls != 4 { - t.Fatalf("expected 4 provider turns including reminder recovery, got %d", providerCalls) - } -} - -func TestProgressStreakTerminatesAfterReminderIfStillStalled(t *testing.T) { - t.Setenv("TEST_KEY", "dummy") - - cfg := config.Config{ - Providers: []config.ProviderConfig{{Name: "test-progress-hard-stop", Driver: "test", BaseURL: "http://localhost", Model: "test", APIKeyEnv: "TEST_KEY"}}, - SelectedProvider: "test-progress-hard-stop", - Workdir: t.TempDir(), - Runtime: config.RuntimeConfig{ - MaxNoProgressStreak: 2, - MaxRepeatCycleStreak: 6, - }, - } - - var executeCalls int32 - toolManager := &stubToolManager{ - specs: []providertypes.ToolSpec{{Name: "tool_error"}}, - executeFn: func(ctx context.Context, input tools.ToolCallInput) (tools.ToolResult, error) { - atomic.AddInt32(&executeCalls, 1) - return tools.ToolResult{Name: input.Name, Content: "error occurred", IsError: true}, nil - }, - } - - var promptInjected bool - var providerCalls int32 - var signatureSeq int32 - providerFactory := &scriptedProviderFactory{ - provider: &scriptedProvider{ - chatFn: func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { - atomic.AddInt32(&providerCalls, 1) - if strings.Contains(req.SystemPrompt, selfHealingReminder) { - promptInjected = true - } - seq := atomic.AddInt32(&signatureSeq, 1) - events <- providertypes.NewToolCallStartStreamEvent(0, "call_err", "tool_error") - events <- providertypes.NewToolCallDeltaStreamEvent( - 0, - "call_err", - `{"seq":`+strconv.FormatInt(int64(seq), 10)+`}`, - ) - events <- providertypes.NewMessageDoneStreamEvent("tool_calls", nil) - return nil - }, - }, - } - - manager := config.NewManager(config.NewLoader(t.TempDir(), &cfg)) - service := NewWithFactory(manager, toolManager, newMemoryStore(), providerFactory, nil) - - if err := service.Run(context.Background(), UserInput{ - RunID: "run-progress-hard-stop", - Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger unrecovered error loop")}, - }); err != nil { - t.Fatalf("expected run to stop cleanly on no-progress, got %v", err) - } - - events := collectRuntimeEvents(service.Events()) - assertStopReasonDecided(t, events, controlplane.StopReasonNoProgress, "") - - if !promptInjected { - t.Fatal("expected self-healing prompt injection before hard no-progress termination") - } - if executeCalls != 3 { - t.Fatalf("expected 3 tool executions before no-progress termination, got %d", executeCalls) - } - if providerCalls != 3 { - t.Fatalf("expected 3 provider turns before no-progress termination, got %d", providerCalls) - } -} - -func TestProgressEvidenceResetsNoProgressStreak(t *testing.T) { - t.Setenv("TEST_KEY", "dummy") - - cfg := config.Config{ - Providers: []config.ProviderConfig{{Name: "test-progress", Driver: "test", BaseURL: "http://localhost", Model: "test", APIKeyEnv: "TEST_KEY"}}, - SelectedProvider: "test-progress", - Workdir: t.TempDir(), - } - - var executeCalls int32 - toolManager := &stubToolManager{ - specs: []providertypes.ToolSpec{ - {Name: "tool_mixed"}, - }, - executeFn: func(ctx context.Context, input tools.ToolCallInput) (tools.ToolResult, error) { - call := int(atomic.AddInt32(&executeCalls, 1)) - if call == 3 { - return tools.ToolResult{Name: input.Name, Content: "ok", IsError: false}, nil - } - return tools.ToolResult{Name: input.Name, Content: "error occurred", IsError: true}, nil - }, - } - - var providerCalls int32 - var signatureSeq int32 - providerFactory := &scriptedProviderFactory{ - provider: &scriptedProvider{ - chatFn: func(ctx context.Context, req providertypes.GenerateRequest, events chan<- providertypes.StreamEvent) error { - call := int(atomic.AddInt32(&providerCalls, 1)) - if call <= 4 { - seq := atomic.AddInt32(&signatureSeq, 1) - events <- providertypes.NewToolCallStartStreamEvent(0, "call_mixed", "tool_mixed") - events <- providertypes.NewToolCallDeltaStreamEvent( - 0, - "call_mixed", - `{"seq":`+strconv.FormatInt(int64(seq), 10)+`}`, - ) - events <- providertypes.NewMessageDoneStreamEvent("tool_calls", nil) - return nil - } - events <- providertypes.NewTextDeltaStreamEvent("{\"task_completion\":{\"completed\":true}}\ndone") - events <- providertypes.NewMessageDoneStreamEvent("stop", nil) - return nil - }, - }, - } - - manager := config.NewManager(config.NewLoader(t.TempDir(), &cfg)) - service := NewWithFactory( - manager, - toolManager, - newMemoryStore(), - providerFactory, - nil, - ) - - err := service.Run(context.Background(), UserInput{ - RunID: "run-progress-reset", - Parts: []providertypes.ContentPart{providertypes.NewTextPart("trigger mixed progress loop")}, - }) - if err != nil { - t.Fatalf("expected run to finish successfully, got %v", err) - } - - if executeCalls != 4 { - t.Fatalf("expected 4 tool executions, got %d", executeCalls) - } - if providerCalls != 5 { - t.Fatalf("expected 5 provider calls (4 tool turns + 1 done), got %d", providerCalls) - } - - events := collectRuntimeEvents(service.Events()) - assertStopReasonDecided(t, events, controlplane.StopReasonAccepted, "") -} - func TestRepeatCycleStreakNoLongerStopsRunAndInjectsReminder(t *testing.T) { t.Setenv("TEST_KEY", "dummy") @@ -248,7 +23,6 @@ func TestRepeatCycleStreakNoLongerStopsRunAndInjectsReminder(t *testing.T) { SelectedProvider: "test-repeat", Workdir: t.TempDir(), Runtime: config.RuntimeConfig{ - MaxNoProgressStreak: 10, MaxRepeatCycleStreak: 3, }, } @@ -325,7 +99,6 @@ func TestRepeatCycleTerminatesAfterReminderIfStillStalled(t *testing.T) { SelectedProvider: "test-repeat-hard-stop", Workdir: t.TempDir(), Runtime: config.RuntimeConfig{ - MaxNoProgressStreak: 10, MaxRepeatCycleStreak: 3, }, } @@ -388,7 +161,6 @@ func TestRepeatCycleFailedCallsNoLongerHardStop(t *testing.T) { SelectedProvider: "test-repeat-fail", Workdir: t.TempDir(), Runtime: config.RuntimeConfig{ - MaxNoProgressStreak: 10, MaxRepeatCycleStreak: 3, }, } @@ -576,7 +348,6 @@ func TestPrepareTurnSnapshotInjectRepeatReminderWithEmptyPrompt(t *testing.T) { func TestPrepareTurnBudgetSnapshotRepeatReminderTakesPriority(t *testing.T) { manager := newRuntimeConfigManager(t) if err := manager.Update(context.Background(), func(cfg *config.Config) error { - cfg.Runtime.MaxNoProgressStreak = 3 cfg.Runtime.MaxRepeatCycleStreak = 3 return nil }); err != nil { @@ -593,7 +364,6 @@ func TestPrepareTurnBudgetSnapshotRepeatReminderTakesPriority(t *testing.T) { toolManager: &stubToolManager{}, } state := newRunState("run-reminder-priority", newRuntimeSession("session-reminder-priority")) - state.progress.LastScore.NoProgressStreak = 2 state.progress.LastScore.RepeatCycleStreak = 2 state.progress.LastScore.StalledProgressState = controlplane.StalledProgressStalled state.progress.LastScore.ReminderKind = controlplane.ReminderKindRepeatCycle @@ -608,19 +378,9 @@ func TestPrepareTurnBudgetSnapshotRepeatReminderTakesPriority(t *testing.T) { if !strings.Contains(snapshot.Request.SystemPrompt, selfHealingRepeatReminder) { t.Fatalf("expected prompt to contain repeat reminder, got %q", snapshot.Request.SystemPrompt) } - if strings.Contains(snapshot.Request.SystemPrompt, selfHealingReminder) { - t.Fatalf("expected no-progress reminder to be skipped when repeat reminder is injected, got %q", snapshot.Request.SystemPrompt) - } } func TestResolveStreakLimitDefaults(t *testing.T) { - if got := resolveNoProgressStreakLimit(config.RuntimeConfig{MaxNoProgressStreak: 0}); got != config.DefaultMaxNoProgressStreak { - t.Fatalf("expected default no-progress limit %d, got %d", config.DefaultMaxNoProgressStreak, got) - } - if got := resolveNoProgressStreakLimit(config.RuntimeConfig{MaxNoProgressStreak: 8}); got != 8 { - t.Fatalf("expected explicit no-progress limit 8, got %d", got) - } - if got := resolveRepeatCycleStreakLimit(config.RuntimeConfig{MaxRepeatCycleStreak: -1}); got != config.DefaultMaxRepeatCycleStreak { t.Fatalf("expected default repeat limit %d, got %d", config.DefaultMaxRepeatCycleStreak, got) } @@ -636,59 +396,11 @@ func TestResolveStreakLimitDefaults(t *testing.T) { } } -func TestComputeTodoStateSignature(t *testing.T) { - t.Parallel() - - if got := computeTodoStateSignature(nil); got != "" { - t.Fatalf("computeTodoStateSignature(nil) = %q", got) - } - - base := []agentsession.TodoItem{ - { - ID: "t1", - Content: "task", - Status: agentsession.TodoStatusPending, - Executor: agentsession.TodoExecutorAgent, - }, - } - sig1 := computeTodoStateSignature(base) - if strings.TrimSpace(sig1) == "" { - t.Fatal("expected non-empty signature") - } - - same := []agentsession.TodoItem{ - { - ID: "t1", - Content: "task", - Status: agentsession.TodoStatusPending, - Executor: agentsession.TodoExecutorAgent, - }, - } - sig2 := computeTodoStateSignature(same) - if sig1 != sig2 { - t.Fatalf("expected stable signature, got %q vs %q", sig1, sig2) - } - - changed := []agentsession.TodoItem{ - { - ID: "t1", - Content: "task", - Status: agentsession.TodoStatusCompleted, - Executor: agentsession.TodoExecutorAgent, - }, - } - sig3 := computeTodoStateSignature(changed) - if sig3 == sig1 { - t.Fatalf("expected changed signature when todo state changes") - } -} - func TestNoToolIncompleteTurnStillEvaluatesProgressAndInjectsReminder(t *testing.T) { t.Parallel() manager := newRuntimeConfigManager(t) if err := manager.Update(context.Background(), func(cfg *config.Config) error { - cfg.Runtime.MaxNoProgressStreak = 5 return nil }); err != nil { t.Fatalf("update config: %v", err) @@ -799,7 +511,6 @@ func TestAcceptanceContinueWithoutToolCallStopsAsIncomplete(t *testing.T) { manager := newRuntimeConfigManager(t) if err := manager.Update(context.Background(), func(cfg *config.Config) error { - cfg.Runtime.MaxNoProgressStreak = 2 return nil }); err != nil { t.Fatalf("update config: %v", err) diff --git a/internal/runtime/runtime_test.go b/internal/runtime/runtime_test.go index a16d69dc..504a45a9 100644 --- a/internal/runtime/runtime_test.go +++ b/internal/runtime/runtime_test.go @@ -3968,10 +3968,10 @@ func TestServiceRunPlanModePersistsDraftPlan(t *testing.T) { if builder.lastInput.CurrentPlan != nil { t.Fatalf("expected initial plan-mode build input to have nil CurrentPlan") } - if len(saved.Messages) != 2 { - t.Fatalf("message count = %d, want 2", len(saved.Messages)) + if len(saved.Messages) != 3 { + t.Fatalf("message count = %d, want 3", len(saved.Messages)) } - if got := renderPartsForTest(saved.Messages[1].Parts); !strings.Contains(got, "目标") { + if got := renderPartsForTest(saved.Messages[2].Parts); !strings.Contains(got, "目标") { t.Fatalf("expected rendered plan content, got %q", got) } } @@ -4018,10 +4018,10 @@ func TestServiceRunPlanModeShowsExplanationTextOutsidePlanningJSON(t *testing.T) if saved.CurrentPlan == nil || saved.CurrentPlan.Spec.Goal != "Preserve prose around planning JSON" { t.Fatalf("expected current plan to be updated, got %+v", saved.CurrentPlan) } - if len(saved.Messages) != 2 { - t.Fatalf("message count = %d, want 2", len(saved.Messages)) + if len(saved.Messages) != 3 { + t.Fatalf("message count = %d, want 3", len(saved.Messages)) } - got := renderPartsForTest(saved.Messages[1].Parts) + got := renderPartsForTest(saved.Messages[2].Parts) if strings.Contains(got, "\"plan_spec\"") { t.Fatalf("expected persisted assistant text to strip planning JSON, got %q", got) } diff --git a/internal/runtime/todo_bootstrap.go b/internal/runtime/todo_bootstrap.go index 309c97b9..9e1d0cd3 100644 --- a/internal/runtime/todo_bootstrap.go +++ b/internal/runtime/todo_bootstrap.go @@ -54,3 +54,39 @@ func hasActivePlanForTodoBootstrap(plan *agentsession.PlanArtifact) bool { return false } } + +const planBootstrapRequiredReason = "plan_bootstrap_required" + +const planBootstrapRequiredReminder = `[Runtime Control] + +plan_bootstrap_required: You are in plan mode but no current plan exists. + +Before research, analysis, or conversational response, you MUST complete the following: + +1. Research the codebase as needed using read-only tools. +2. Output a JSON object containing "plan_spec" and "summary_candidate" that defines the current plan. +3. plan_spec.todos must be non-empty — include major actionable items with unique IDs and status "pending". + +Do not end this turn without producing a plan.` + +// maybeAppendPlanBootstrapReminder 在 plan 模式缺少 CurrentPlan 时注入一次结构化提醒。 +func (s *Service) maybeAppendPlanBootstrapReminder(ctx context.Context, state *runState) error { + if !shouldInjectPlanBootstrapReminder(state) { + return nil + } + return s.appendSystemMessageAndSave(ctx, state, planBootstrapRequiredReminder) +} + +// shouldInjectPlanBootstrapReminder 判断本轮 plan 模式是否需要先创建 plan。 +func shouldInjectPlanBootstrapReminder(state *runState) bool { + if state == nil || state.disableTools || !state.planningEnabled { + return false + } + if resolvePlanningStageForState(state) != planStagePlan { + return false + } + state.mu.Lock() + plan := state.session.CurrentPlan + state.mu.Unlock() + return plan == nil +} diff --git a/internal/runtime/toolexec.go b/internal/runtime/toolexec.go index 3b28a76d..c6cc6ee0 100644 --- a/internal/runtime/toolexec.go +++ b/internal/runtime/toolexec.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "os" "path/filepath" "strings" @@ -471,10 +472,27 @@ func hasSuccessfulWorkspaceWriteFact(result tools.ToolResult, execErr error) boo if execErr != nil || result.IsError { return false } + return hasConfirmedWorkspaceWriteResult(result) +} + +// hasConfirmedWorkspaceWriteResult 判断工具结果是否带有 runtime 可确认的真实写入。 +func hasConfirmedWorkspaceWriteResult(result tools.ToolResult) bool { if toolResultNoopWrite(result.Metadata) { return false } - return result.Facts.WorkspaceWrite + if !result.Facts.WorkspaceWrite { + return false + } + name := strings.TrimSpace(result.Name) + switch { + case isFileWriteTool(name): + _, ok := buildToolDiffPayload(result) + return ok + case strings.EqualFold(name, tools.ToolNameBash): + return len(toolResultWorkspaceWritePaths(result.Metadata)) > 0 + default: + return false + } } // toolResultNoopWrite 判断工具结果是否声明了 no-op 写入(内容未变化)。 @@ -505,6 +523,43 @@ func toolResultFilePath(metadata map[string]any) string { return strings.TrimSpace(p) } +// toolResultWorkspaceWritePaths 从工具结果中提取 runtime 确认的写入路径。 +func toolResultWorkspaceWritePaths(metadata map[string]any) []string { + if metadata == nil { + return nil + } + raw, ok := metadata["workspace_write_paths"] + if !ok || raw == nil { + return nil + } + seen := map[string]struct{}{} + var out []string + add := func(value any) { + text := strings.TrimSpace(fmt.Sprint(value)) + if text == "" { + return + } + if _, exists := seen[text]; exists { + return + } + seen[text] = struct{}{} + out = append(out, text) + } + switch typed := raw.(type) { + case []string: + for _, value := range typed { + add(value) + } + case []any: + for _, value := range typed { + add(value) + } + case string: + add(typed) + } + return out +} + // isFileWriteTool 判断工具调用是否为文件写入类工具,需在执行前后做 diff。 func isFileWriteTool(name string) bool { switch strings.TrimSpace(name) { diff --git a/internal/runtime/turn_control.go b/internal/runtime/turn_control.go index 6047b447..a2f0b844 100644 --- a/internal/runtime/turn_control.go +++ b/internal/runtime/turn_control.go @@ -75,7 +75,7 @@ func applyToolExecutionCompletion(current controlplane.CompletionState, summary } } } - if result.Facts.WorkspaceWrite && !toolResultNoopWrite(result.Metadata) { + if hasConfirmedWorkspaceWriteResult(result) { current.HasUnverifiedWrites = true } if result.Facts.VerificationPerformed && result.Facts.VerificationPassed { @@ -87,65 +87,19 @@ func applyToolExecutionCompletion(current controlplane.CompletionState, summary // collectProgressInput 基于执行前后事实组装 progress 评估输入。 func collectProgressInput( - runState controlplane.RunState, - beforeTask agentsession.TaskState, afterTask agentsession.TaskState, - beforeTodos []agentsession.TodoItem, afterTodos []agentsession.TodoItem, summary toolExecutionSummary, - noProgressLimit int, repeatLimit int, ) controlplane.ProgressInput { - evidence := deriveProgressEvidence(beforeTask, afterTask, beforeTodos, afterTodos, summary) return controlplane.ProgressInput{ - RunState: runState, - Evidence: evidence, CurrentToolSignature: computeToolSignature(summary.Calls), ResultFingerprint: computeToolResultFingerprint(summary.Results), SubgoalFingerprint: computeSubgoalFingerprint(afterTask, afterTodos, summary.Calls), - NoProgressLimit: noProgressLimit, RepeatCycleLimit: repeatLimit, } } -// deriveProgressEvidence 从本轮前后快照和工具摘要中提取结构化 evidence。 -func deriveProgressEvidence( - beforeTask agentsession.TaskState, - afterTask agentsession.TaskState, - beforeTodos []agentsession.TodoItem, - afterTodos []agentsession.TodoItem, - summary toolExecutionSummary, -) []controlplane.ProgressEvidenceRecord { - var evidence []controlplane.ProgressEvidenceRecord - - if computeTaskStateSignature(beforeTask) != computeTaskStateSignature(afterTask) { - evidence = append(evidence, controlplane.ProgressEvidenceRecord{Kind: controlplane.EvidenceTaskStateChanged}) - } - if computeTodoStateSignature(beforeTodos) != computeTodoStateSignature(afterTodos) { - evidence = append(evidence, controlplane.ProgressEvidenceRecord{Kind: controlplane.EvidenceTodoStateChanged}) - } - if summary.HasSuccessfulWorkspaceWrite { - evidence = append(evidence, controlplane.ProgressEvidenceRecord{Kind: controlplane.EvidenceWriteApplied}) - } - if summary.HasSuccessfulVerification { - evidence = append(evidence, controlplane.ProgressEvidenceRecord{Kind: controlplane.EvidenceVerifyPassed}) - } - if hasSuccessfulInformationalResult(summary.Results) { - evidence = append(evidence, controlplane.ProgressEvidenceRecord{Kind: controlplane.EvidenceNewInfoNonDup}) - } - return evidence -} - -// computeTaskStateSignature 计算 task_state 的结构化签名。 -func computeTaskStateSignature(task agentsession.TaskState) string { - encoded, err := json.Marshal(task.Clone()) - if err != nil { - return "" - } - hash := sha256.Sum256(encoded) - return hex.EncodeToString(hash[:]) -} - // computeToolResultFingerprint 计算本轮工具结果的聚合指纹。 func computeToolResultFingerprint(results []tools.ToolResult) string { if len(results) == 0 { @@ -245,64 +199,6 @@ func hasCompletedRequiredTodos(items []agentsession.TodoItem) bool { return hasRequired } -// hasSuccessfulInformationalResult 判断本轮是否至少获得一个成功的非写入工具结果。 -func hasSuccessfulInformationalResult(results []tools.ToolResult) bool { - for _, result := range results { - if result.IsError { - continue - } - switch strings.TrimSpace(result.Name) { - case tools.ToolNameFilesystemWriteFile, tools.ToolNameFilesystemEdit: - continue - default: - return true - } - } - return false -} - -// shouldPromotePendingFinalProgress 判断本轮执行结果是否可以作为 final 拦截后的有效推进信号。 -func shouldPromotePendingFinalProgress( - score controlplane.ProgressScore, - summary toolExecutionSummary, - completion controlplane.CompletionState, - lastBlockedReason string, -) bool { - if score.HasBusinessProgress { - return true - } - if !score.HasExplorationProgress { - return false - } - - // 只读 read/glob 首次探索可算推进;同签名/同结果/同子目标且阻塞原因未变化时,不再重置 final 拦截计数。 - if hasSuccessfulReadOrGlobResult(summary.Results) && - score.SameToolSignature && - score.SameResultFingerprint && - score.SameSubgoal == controlplane.SubgoalRelationSame && - strings.EqualFold( - strings.TrimSpace(lastBlockedReason), - strings.TrimSpace(string(completion.CompletionBlockedReason)), - ) { - return false - } - return true -} - -// hasSuccessfulReadOrGlobResult 判断本轮是否存在成功的 filesystem_read_file / filesystem_glob 结果。 -func hasSuccessfulReadOrGlobResult(results []tools.ToolResult) bool { - for _, result := range results { - if result.IsError { - continue - } - switch strings.TrimSpace(result.Name) { - case tools.ToolNameFilesystemReadFile, tools.ToolNameFilesystemGlob: - return true - } - } - return false -} - // hasSuccessfulVerificationResult 判断本轮是否存在显式验证成功的结构化事实。 func hasSuccessfulVerificationResult(results []tools.ToolResult) bool { if len(results) == 0 { diff --git a/internal/runtime/turn_control_test.go b/internal/runtime/turn_control_test.go index 5e7172a3..39fa9130 100644 --- a/internal/runtime/turn_control_test.go +++ b/internal/runtime/turn_control_test.go @@ -29,7 +29,7 @@ func TestApplyToolExecutionCompletionTracksWriteAndVerification(t *testing.T) { written := applyToolExecutionCompletion(controlplane.CompletionState{}, toolExecutionSummary{ Results: []tools.ToolResult{ - {Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}}, + confirmedFilesystemWriteResult("a.txt"), }, }) if !written.HasUnverifiedWrites { @@ -52,7 +52,7 @@ func TestApplyToolExecutionCompletionKeepsUnverifiedWhenVerifyBeforeWrite(t *tes got := applyToolExecutionCompletion(controlplane.CompletionState{}, toolExecutionSummary{ Results: []tools.ToolResult{ {Facts: tools.ToolExecutionFacts{VerificationPerformed: true, VerificationPassed: true}}, - {Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}}, + confirmedFilesystemWriteResult("a.txt"), }, }) if !got.HasUnverifiedWrites { @@ -65,7 +65,7 @@ func TestApplyToolExecutionCompletionClearsWhenVerifyAfterWrite(t *testing.T) { got := applyToolExecutionCompletion(controlplane.CompletionState{}, toolExecutionSummary{ Results: []tools.ToolResult{ - {Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}}, + confirmedFilesystemWriteResult("a.txt"), {Facts: tools.ToolExecutionFacts{VerificationPerformed: true, VerificationPassed: true}}, }, }) @@ -109,6 +109,100 @@ func TestToolResultNoopWrite(t *testing.T) { } } +func TestHasConfirmedWorkspaceWriteResultRequiresToolDiffEvidence(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + result tools.ToolResult + want bool + }{ + { + name: "filesystem write with tool diff payload", + result: confirmedFilesystemWriteResult("a.txt"), + want: true, + }, + { + name: "filesystem write without tool diff payload", + result: tools.ToolResult{ + Name: tools.ToolNameFilesystemEdit, + Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}, + }, + want: false, + }, + { + name: "noop write", + result: tools.ToolResult{ + Name: tools.ToolNameFilesystemWriteFile, + Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}, + Metadata: map[string]any{ + "path": "a.txt", + "noop_write": true, + }, + }, + want: false, + }, + { + name: "tool error", + result: tools.ToolResult{ + Name: tools.ToolNameFilesystemEdit, + IsError: true, + Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}, + Metadata: map[string]any{ + "path": "a.txt", + }, + }, + want: false, + }, + { + name: "bash write paths", + result: tools.ToolResult{ + Name: tools.ToolNameBash, + Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}, + Metadata: map[string]any{ + "workspace_write_paths": []string{"a.txt"}, + }, + }, + want: true, + }, + { + name: "bash without write paths", + result: tools.ToolResult{ + Name: tools.ToolNameBash, + Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}, + }, + want: false, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + if got := hasSuccessfulWorkspaceWriteFact(tc.result, nil); got != tc.want { + t.Fatalf("hasSuccessfulWorkspaceWriteFact() = %v, want %v", got, tc.want) + } + }) + } +} + +func confirmedFilesystemWriteResult(path string) tools.ToolResult { + return tools.ToolResult{ + Name: tools.ToolNameFilesystemEdit, + Facts: tools.ToolExecutionFacts{WorkspaceWrite: true}, + Metadata: map[string]any{ + "path": path, + "tool_diffs": []map[string]any{ + { + "path": path, + "diff": "--- a\n+++ b\n@@ -1 +1 @@\n-a\n+b", + "kind": FileChangeKindModified, + }, + }, + }, + } +} + func TestHasPendingAgentTodosBlocksOnAnyNonTerminalTodo(t *testing.T) { t.Parallel() @@ -186,70 +280,6 @@ func TestClassifyToolErrorPrefersExplicitErrorClass(t *testing.T) { } } -func TestShouldPromotePendingFinalProgress(t *testing.T) { - t.Parallel() - - t.Run("business progress always promotes", func(t *testing.T) { - t.Parallel() - score := controlplane.ProgressScore{HasBusinessProgress: true} - if !shouldPromotePendingFinalProgress(score, toolExecutionSummary{}, controlplane.CompletionState{}, "") { - t.Fatal("expected business progress to promote pending final progress") - } - }) - - t.Run("duplicate read result with same blocked reason does not promote", func(t *testing.T) { - t.Parallel() - score := controlplane.ProgressScore{ - HasExplorationProgress: true, - SameToolSignature: true, - SameResultFingerprint: true, - SameSubgoal: controlplane.SubgoalRelationSame, - } - summary := toolExecutionSummary{ - Results: []tools.ToolResult{ - {Name: tools.ToolNameFilesystemReadFile, Content: "same result"}, - }, - } - completion := controlplane.CompletionState{ - CompletionBlockedReason: controlplane.CompletionBlockedReasonPendingTodo, - } - if shouldPromotePendingFinalProgress( - score, - summary, - completion, - string(controlplane.CompletionBlockedReasonPendingTodo), - ) { - t.Fatal("expected duplicate informational read to not promote progress") - } - }) - - t.Run("read result still promotes when blocked reason changed", func(t *testing.T) { - t.Parallel() - score := controlplane.ProgressScore{ - HasExplorationProgress: true, - SameToolSignature: true, - SameResultFingerprint: true, - SameSubgoal: controlplane.SubgoalRelationSame, - } - summary := toolExecutionSummary{ - Results: []tools.ToolResult{ - {Name: tools.ToolNameFilesystemGlob, Content: "same result"}, - }, - } - completion := controlplane.CompletionState{ - CompletionBlockedReason: controlplane.CompletionBlockedReasonPendingTodo, - } - if !shouldPromotePendingFinalProgress( - score, - summary, - completion, - string(controlplane.CompletionBlockedReasonUnverifiedWrite), - ) { - t.Fatal("expected changed blocked reason to allow one more exploration promotion") - } - }) -} - func TestApplyToolExecutionCompletionTracksTodoStateFacts(t *testing.T) { t.Parallel() diff --git a/internal/tools/filesystem/glob_test.go b/internal/tools/filesystem/glob_test.go index 439699ed..aae0be3e 100644 --- a/internal/tools/filesystem/glob_test.go +++ b/internal/tools/filesystem/glob_test.go @@ -242,6 +242,36 @@ func TestGlobToolFiltersSensitiveAndSymlinkEscapes(t *testing.T) { } } +func TestGlobToolSkipsNoisyDirectories(t *testing.T) { + t.Parallel() + + workspace := t.TempDir() + mustWriteFile(t, filepath.Join(workspace, "src", "main.go"), "package main\n") + for _, dir := range []string{".cache", ".tmp", "tmp", "build", "dist", "out", "target", "coverage", ".next", ".nuxt", ".turbo", ".parcel-cache", ".vite", "vendor", "bin", "obj"} { + mustWriteFile(t, filepath.Join(workspace, dir, "skip.go"), "package skip\n") + } + + tool := NewGlob(workspace) + result, err := tool.Execute(context.Background(), tools.ToolCallInput{ + Name: tool.Name(), + Arguments: mustMarshalFSArgs(t, map[string]string{"pattern": "**/*.go"}), + Workdir: workspace, + }) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + + content := normalizeSlashPath(result.Content) + if !strings.Contains(content, normalizeSlashPath(filepath.Join("src", "main.go"))) { + t.Fatalf("expected src match, got %q", result.Content) + } + for _, dir := range []string{".cache", ".tmp", "tmp", "build", "dist", "out", "target", "coverage", ".next", ".nuxt", ".turbo", ".parcel-cache", ".vite", "vendor", "bin", "obj"} { + if strings.Contains(content, dir) { + t.Fatalf("expected noisy dir %q to be skipped, got %q", dir, result.Content) + } + } +} + func TestGlobToolVerificationFacts(t *testing.T) { t.Parallel() diff --git a/internal/tools/filesystem/grep_test.go b/internal/tools/filesystem/grep_test.go index f89cdab8..80d5f277 100644 --- a/internal/tools/filesystem/grep_test.go +++ b/internal/tools/filesystem/grep_test.go @@ -239,3 +239,33 @@ func TestGrepToolFiltersSensitiveAndSymlinkEscapes(t *testing.T) { t.Fatalf("expected symlink_escape reason count, got %#v", reasons) } } + +func TestGrepToolSkipsNoisyDirectories(t *testing.T) { + t.Parallel() + + workspace := t.TempDir() + mustWriteFile(t, filepath.Join(workspace, "src", "main.go"), "needle\n") + for _, dir := range []string{".cache", ".tmp", "tmp", "build", "dist", "out", "target", "coverage", ".next", ".nuxt", ".turbo", ".parcel-cache", ".vite", "vendor", "bin", "obj"} { + mustWriteFile(t, filepath.Join(workspace, dir, "skip.txt"), "needle from "+dir+"\n") + } + + tool := NewGrep(workspace) + result, err := tool.Execute(context.Background(), tools.ToolCallInput{ + Name: tool.Name(), + Arguments: mustMarshalFSArgs(t, map[string]any{"pattern": "needle"}), + Workdir: workspace, + }) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + + content := normalizeSlashPath(result.Content) + if !strings.Contains(content, normalizeSlashPath(filepath.Join("src", "main.go"))+":1: needle") { + t.Fatalf("expected src match, got %q", result.Content) + } + for _, dir := range []string{".cache", ".tmp", "tmp", "build", "dist", "out", "target", "coverage", ".next", ".nuxt", ".turbo", ".parcel-cache", ".vite", "vendor", "bin", "obj"} { + if strings.Contains(content, dir) { + t.Fatalf("expected noisy dir %q to be skipped, got %q", dir, result.Content) + } + } +} diff --git a/internal/tools/filesystem/helpers.go b/internal/tools/filesystem/helpers.go index 5296914f..c7b6366b 100644 --- a/internal/tools/filesystem/helpers.go +++ b/internal/tools/filesystem/helpers.go @@ -47,7 +47,9 @@ func skipDirEntry(path string, entry os.DirEntry) bool { name := strings.ToLower(strings.TrimSpace(entry.Name())) switch name { - case ".git", ".idea", ".vscode", "node_modules": + case ".git", ".idea", ".vscode", "node_modules", + ".cache", ".tmp", "tmp", "build", "dist", "out", "target", "coverage", + ".next", ".nuxt", ".turbo", ".parcel-cache", ".vite", "vendor", "bin", "obj": return true } diff --git a/internal/tools/filesystem/helpers_test.go b/internal/tools/filesystem/helpers_test.go index 3fb75c34..e3c0fdc2 100644 --- a/internal/tools/filesystem/helpers_test.go +++ b/internal/tools/filesystem/helpers_test.go @@ -26,6 +26,10 @@ func TestSkipDirEntry(t *testing.T) { root := t.TempDir() mustCreateDir(t, filepath.Join(root, ".git")) mustCreateDir(t, filepath.Join(root, "node_modules")) + mustCreateDir(t, filepath.Join(root, ".cache")) + mustCreateDir(t, filepath.Join(root, "build")) + mustCreateDir(t, filepath.Join(root, "dist")) + mustCreateDir(t, filepath.Join(root, "vendor")) mustCreateDir(t, filepath.Join(root, "keep")) mustWriteTestFile(t, filepath.Join(root, ".vscode"), "not-a-dir") @@ -45,6 +49,11 @@ func TestSkipDirEntry(t *testing.T) { if !got["node_modules"] { t.Fatalf("node_modules skip = false, want true") } + for _, name := range []string{".cache", "build", "dist", "vendor"} { + if !got[name] { + t.Fatalf("%s skip = false, want true", name) + } + } if got["keep"] { t.Fatalf("keep skip = true, want false") } diff --git a/internal/tui/core/app/update.go b/internal/tui/core/app/update.go index e16df225..03a1adf8 100644 --- a/internal/tui/core/app/update.go +++ b/internal/tui/core/app/update.go @@ -3695,9 +3695,7 @@ func runtimeEventStopReasonDecidedHandler(a *App, event tuiservices.RuntimeEvent case strings.ToLower(string(tuiservices.StopReasonTodoNotConverged)), strings.ToLower(string(tuiservices.StopReasonTodoWaitingExternal)), strings.ToLower(string(tuiservices.StopReasonMissingCompletionSignal)), - strings.ToLower(string(tuiservices.StopReasonNoProgress)), strings.ToLower(string(tuiservices.StopReasonRepeatCycle)), - strings.ToLower(string(tuiservices.StopReasonNoProgressAfterFinalIntercept)), strings.ToLower(string(tuiservices.StopReasonMaxTurnExceededWithUnconvergedTodos)), strings.ToLower(string(tuiservices.StopReasonMaxTurnExceededWithFailedVerification)): detail := strings.TrimSpace(payload.Detail) @@ -4814,8 +4812,6 @@ func humanizeDecisionReason(reason string, missingFacts []map[string]any) string } } return "任务仍缺少关键事实。" - case "no_progress_after_final_intercept": - return "连续多轮未产生新事实。" case "required_todo_failed": return "存在 required todo 失败。" default: diff --git a/internal/tui/services/runtime_contract.go b/internal/tui/services/runtime_contract.go index 2d23886e..23894e02 100644 --- a/internal/tui/services/runtime_contract.go +++ b/internal/tui/services/runtime_contract.go @@ -313,10 +313,6 @@ const ( StopReasonTodoNotConverged StopReason = "todo_not_converged" // StopReasonTodoWaitingExternal 表示 todo 等待外部输入。 StopReasonTodoWaitingExternal StopReason = "todo_waiting_external" - // StopReasonNoProgressAfterFinalIntercept 表示 final 被拦截后长期无进展。 - StopReasonNoProgressAfterFinalIntercept StopReason = "no_progress_after_final_intercept" - // StopReasonNoProgress 表示运行连续缺少实质进展。 - StopReasonNoProgress StopReason = "no_progress" // StopReasonRepeatCycle 表示运行重复相同动作或结果。 StopReasonRepeatCycle StopReason = "repeat_cycle" // StopReasonMaxTurnExceededWithUnconvergedTodos 表示 max turn + todo 未收敛。 From b4ed86dfcdc88ee64c31f4d73214c001dc491c8d Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sun, 10 May 2026 01:32:04 +0800 Subject: [PATCH 09/15] =?UTF-8?q?fix(runtime):=E5=88=A0=E9=99=A4=E5=86=97?= =?UTF-8?q?=E4=BD=99=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/config/context_budget_migration.go | 29 +++++++++++++++++++ .../config/context_budget_migration_test.go | 29 +++++++++++++++++++ internal/runtime/acceptgate_runtime.go | 5 ++-- internal/runtime/ask.go | 3 +- internal/runtime/message_render.go | 13 --------- internal/runtime/run.go | 5 ++-- 6 files changed, 66 insertions(+), 18 deletions(-) delete mode 100644 internal/runtime/message_render.go diff --git a/internal/config/context_budget_migration.go b/internal/config/context_budget_migration.go index acf2e4bc..4c0f0027 100644 --- a/internal/config/context_budget_migration.go +++ b/internal/config/context_budget_migration.go @@ -134,6 +134,10 @@ func MigrateContextBudgetConfigContent(raw []byte) ([]byte, bool, []string, erro if verificationChanged { changed = true } + legacyRuntimeChanged := migrateLegacyRuntimeConfigFields(doc) + if legacyRuntimeChanged { + changed = true + } if !changed { return raw, false, nil, nil @@ -146,6 +150,31 @@ func MigrateContextBudgetConfigContent(raw []byte) ([]byte, bool, []string, erro return out, true, notes, nil } +// migrateLegacyRuntimeConfigFields 清理 runtime 下已废弃且会导致严格解析失败的历史字段。 +func migrateLegacyRuntimeConfigFields(doc map[string]any) bool { + runtimeValue, ok := doc["runtime"] + if !ok { + return false + } + runtimeMap, ok := migrationStringMap(runtimeValue) + if !ok { + return false + } + + changed := false + for _, key := range []string{"max_no_progress_streak"} { + if _, exists := runtimeMap[key]; exists { + delete(runtimeMap, key) + changed = true + } + } + if !changed { + return false + } + doc["runtime"] = runtimeMap + return true +} + // migrateVerificationConfig 清理已废弃的 verification 字段,并将安全的旧 command string 收敛成 argv。 func migrateVerificationConfig(doc map[string]any) (bool, error) { runtimeValue, ok := doc["runtime"] diff --git a/internal/config/context_budget_migration_test.go b/internal/config/context_budget_migration_test.go index a31deb68..ff0960c8 100644 --- a/internal/config/context_budget_migration_test.go +++ b/internal/config/context_budget_migration_test.go @@ -206,6 +206,35 @@ runtime: } } +func TestMigrateContextBudgetConfigContentRemovesLegacyRuntimeNoProgressField(t *testing.T) { + t.Parallel() + + input := []byte(strings.TrimSpace(` +runtime: + max_no_progress_streak: 5 + max_repeat_cycle_streak: 3 +`) + "\n") + + out, changed, notes, err := MigrateContextBudgetConfigContent(input) + if err != nil { + t.Fatalf("MigrateContextBudgetConfigContent() error = %v", err) + } + if !changed { + t.Fatal("expected migration change") + } + if len(notes) != 0 { + t.Fatalf("expected no migration notes, got %v", notes) + } + + text := string(out) + if strings.Contains(text, "max_no_progress_streak") { + t.Fatalf("expected max_no_progress_streak removed, got:\n%s", text) + } + if !strings.Contains(text, "max_repeat_cycle_streak: 3") { + t.Fatalf("expected max_repeat_cycle_streak preserved, got:\n%s", text) + } +} + func TestMigrateContextBudgetConfigFileCreatesBackup(t *testing.T) { t.Parallel() diff --git a/internal/runtime/acceptgate_runtime.go b/internal/runtime/acceptgate_runtime.go index 9195c6f7..a8c890f4 100644 --- a/internal/runtime/acceptgate_runtime.go +++ b/internal/runtime/acceptgate_runtime.go @@ -4,6 +4,7 @@ import ( "context" "strings" + "neo-code/internal/partsrender" "neo-code/internal/promptasset" providertypes "neo-code/internal/provider/types" "neo-code/internal/runtime/acceptgate" @@ -108,7 +109,7 @@ func (s *Service) emitAcceptGateReport(state *runState, report acceptgate.Report } func renderAssistantTextWithoutCompletion(message providertypes.Message) string { - text := strings.TrimSpace(renderPartsForVerification(message.Parts)) + text := strings.TrimSpace(partsrender.RenderDisplayParts(message.Parts)) if text == "" { return "" } @@ -122,7 +123,7 @@ func renderAssistantTextWithoutCompletion(message providertypes.Message) string // stripCompletionSignalFromAssistantMessage 移除仅供 runtime 控制使用的 task_completion JSON,保留用户可见回复。 func stripCompletionSignalFromAssistantMessage(message providertypes.Message) providertypes.Message { text := renderAssistantTextWithoutCompletion(message) - if strings.TrimSpace(text) == strings.TrimSpace(renderPartsForVerification(message.Parts)) { + if strings.TrimSpace(text) == strings.TrimSpace(partsrender.RenderDisplayParts(message.Parts)) { return message } message.Parts = []providertypes.ContentPart{providertypes.NewTextPart(text)} diff --git a/internal/runtime/ask.go b/internal/runtime/ask.go index 76041989..b1ee47a5 100644 --- a/internal/runtime/ask.go +++ b/internal/runtime/ask.go @@ -11,6 +11,7 @@ import ( "neo-code/internal/config" agentcontext "neo-code/internal/context" + "neo-code/internal/partsrender" "neo-code/internal/provider" providertypes "neo-code/internal/provider/types" "neo-code/internal/runtime/streaming" @@ -145,7 +146,7 @@ func (s *Service) Ask(ctx context.Context, input AskInput) error { return failAsk(streamOutcome.err) } - reply := strings.TrimSpace(renderPartsForVerification(streamOutcome.message.Parts)) + reply := strings.TrimSpace(partsrender.RenderDisplayParts(streamOutcome.message.Parts)) session = appendAskMessage(session, "assistant", reply) if err := s.askStore.Save(ctx, session); err != nil { return failAsk(err) diff --git a/internal/runtime/message_render.go b/internal/runtime/message_render.go deleted file mode 100644 index fc2e4344..00000000 --- a/internal/runtime/message_render.go +++ /dev/null @@ -1,13 +0,0 @@ -package runtime - -import ( - "strings" - - "neo-code/internal/partsrender" - providertypes "neo-code/internal/provider/types" -) - -// renderPartsForVerification 将多模态消息压平成验收与完成信号解析使用的稳定文本。 -func renderPartsForVerification(parts []providertypes.ContentPart) string { - return strings.TrimSpace(partsrender.RenderDisplayParts(parts)) -} diff --git a/internal/runtime/run.go b/internal/runtime/run.go index 4784f4ca..028972d4 100644 --- a/internal/runtime/run.go +++ b/internal/runtime/run.go @@ -14,6 +14,7 @@ import ( "neo-code/internal/config" agentcontext "neo-code/internal/context" contextcompact "neo-code/internal/context/compact" + "neo-code/internal/partsrender" "neo-code/internal/promptasset" "neo-code/internal/provider" providertypes "neo-code/internal/provider/types" @@ -162,7 +163,7 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { agentsession.NormalizeAgentMode(session.AgentMode) == agentsession.AgentModePlan state.taskID = strings.TrimSpace(input.TaskID) state.agentID = strings.TrimSpace(input.AgentID) - state.userGoal = strings.TrimSpace(renderPartsForVerification(input.Parts)) + state.userGoal = strings.TrimSpace(partsrender.RenderDisplayParts(input.Parts)) if input.CapabilityToken != nil { token := input.CapabilityToken.Normalize() state.capabilityToken = &token @@ -379,7 +380,7 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { s.emitRunScoped(ctx, EventAgentDone, &state, planMessage) return nil } - if strings.TrimSpace(renderPartsForVerification(turnOutput.assistant.Parts)) != "" { + if strings.TrimSpace(partsrender.RenderDisplayParts(turnOutput.assistant.Parts)) != "" { if err := s.appendAssistantMessageOnlyAndSave(ctx, &state, turnOutput.assistant); err != nil { return s.handleRunError(err) } From 6ae3b5f1e330a443e17d1f15b9e1429cbc87ac63 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sun, 10 May 2026 01:35:18 +0800 Subject: [PATCH 10/15] =?UTF-8?q?fix(runtime):=E8=A1=A5=E5=85=85=E6=BC=8F?= =?UTF-8?q?=E4=BA=A4=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/runtime/acceptgate/checks_test.go | 56 ++++++++++++++ internal/runtime/acceptgate_runtime_test.go | 81 +++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 internal/runtime/acceptgate/checks_test.go create mode 100644 internal/runtime/acceptgate_runtime_test.go diff --git a/internal/runtime/acceptgate/checks_test.go b/internal/runtime/acceptgate/checks_test.go new file mode 100644 index 00000000..32333756 --- /dev/null +++ b/internal/runtime/acceptgate/checks_test.go @@ -0,0 +1,56 @@ +package acceptgate + +import ( + "context" + "testing" + + runtimefacts "neo-code/internal/runtime/facts" + agentsession "neo-code/internal/session" +) + +func TestNormalizeCommandKeepsCLIFlags(t *testing.T) { + t.Parallel() + + got := normalizeCommand("go test ./... -run=TestFoo --filter=a=b -count=1") + want := "go test ./... -run=testfoo --filter=a=b -count=1" + if got != want { + t.Fatalf("normalizeCommand() = %q, want %q", got, want) + } +} + +func TestNormalizeCommandStripsEnvVars(t *testing.T) { + t.Parallel() + + got := normalizeCommand("CGO_ENABLED=0 GOFLAGS=-count=1 go test ./...") + if got != "go test ./..." { + t.Fatalf("normalizeCommand() = %q, want %q", got, "go test ./...") + } +} + +func TestNormalizeCommandKeepsPathAssignments(t *testing.T) { + t.Parallel() + + got := normalizeCommand("PKG=./cmd/... go test") + if got != "pkg=./cmd/... go test" { + t.Fatalf("normalizeCommand() = %q, want %q", got, "pkg=./cmd/... go test") + } +} + +func TestEvaluateCommandSuccessKeepsFlagSpecificity(t *testing.T) { + t.Parallel() + + report := Evaluate(context.Background(), Input{ + PlanVerify: agentsession.AcceptChecks{ + {Kind: agentsession.AcceptCheckCommandSuccess, Target: "go test ./... -run=TestFoo"}, + }, + Facts: runtimefacts.RuntimeFacts{ + Commands: runtimefacts.CommandFacts{Executed: []runtimefacts.CommandFact{ + {Tool: "bash", Command: "go test ./...", Succeeded: true}, + }}, + }, + LastAssistantText: "done", + }) + if report.Outcome != OutcomeFailed { + t.Fatalf("report = %+v, want failed because broad command must not satisfy -run-specific check", report) + } +} diff --git a/internal/runtime/acceptgate_runtime_test.go b/internal/runtime/acceptgate_runtime_test.go new file mode 100644 index 00000000..32bab233 --- /dev/null +++ b/internal/runtime/acceptgate_runtime_test.go @@ -0,0 +1,81 @@ +package runtime + +import ( + "testing" + "time" + + agentsession "neo-code/internal/session" +) + +func TestSelectPlanOwnedTodosIncludesPostPlanRequired(t *testing.T) { + t.Parallel() + + required := true + optional := false + createdAt := time.Date(2026, 5, 9, 10, 0, 0, 0, time.UTC) + plan := &agentsession.PlanArtifact{ + Status: agentsession.PlanStatusApproved, + CreatedAt: createdAt, + Spec: agentsession.PlanSpec{Todos: []agentsession.TodoItem{ + {ID: "plan-owned", Status: agentsession.TodoStatusPending}, + }}, + } + todos := []agentsession.TodoItem{ + { + ID: "plan-owned", + Status: agentsession.TodoStatusPending, + Required: &required, + CreatedAt: createdAt.Add(-time.Hour), + }, + { + ID: "post-required", + Status: agentsession.TodoStatusPending, + Required: &required, + CreatedAt: createdAt.Add(time.Minute), + }, + { + ID: "old-required", + Status: agentsession.TodoStatusPending, + Required: &required, + CreatedAt: createdAt.Add(-time.Minute), + }, + { + ID: "post-optional", + Status: agentsession.TodoStatusPending, + Required: &optional, + CreatedAt: createdAt.Add(time.Minute), + }, + { + ID: "post-completed", + Status: agentsession.TodoStatusCompleted, + Required: &required, + CreatedAt: createdAt.Add(time.Minute), + }, + } + + selected := selectPlanOwnedTodos(plan, todos) + if len(selected) != 2 { + t.Fatalf("selected length = %d, want 2: %+v", len(selected), selected) + } + if selected[0].ID != "plan-owned" || selected[1].ID != "post-required" { + t.Fatalf("selected IDs = [%s %s], want [plan-owned post-required]", selected[0].ID, selected[1].ID) + } +} + +func TestSelectPlanOwnedTodosRequiresPlanForPostPlanRequired(t *testing.T) { + t.Parallel() + + required := true + todos := []agentsession.TodoItem{ + { + ID: "post-required", + Status: agentsession.TodoStatusPending, + Required: &required, + CreatedAt: time.Now(), + }, + } + + if selected := selectPlanOwnedTodos(nil, todos); selected != nil { + t.Fatalf("selectPlanOwnedTodos(nil) = %+v, want nil", selected) + } +} From f0f997cfb76c0ce449380ff442c6a878cf79ece5 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sun, 10 May 2026 01:38:25 +0800 Subject: [PATCH 11/15] Update provider.go --- internal/config/provider.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/config/provider.go b/internal/config/provider.go index fc968a1d..494c8994 100644 --- a/internal/config/provider.go +++ b/internal/config/provider.go @@ -734,7 +734,7 @@ const ( GLMDefaultAPIKeyEnv = "GLM_API_KEY" MiMoName = "mimo" - MiMoDefaultBaseURL = "https://api.xiaomimimo.com/v1" + MiMoDefaultBaseURL = "https://token-plan-cn.xiaomimimo.com/v1" MiMoDefaultModel = "mimo-v2.5-pro" MiMoDefaultAPIKeyEnv = "MIMO_API_KEY" From 3c8e0b404b2490eb431fe4870c47baf2208074b5 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sun, 10 May 2026 02:17:38 +0800 Subject: [PATCH 12/15] =?UTF-8?q?fix(runtime):=E5=88=A0=E9=99=A4=E5=A4=9A?= =?UTF-8?q?=E4=BD=99=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/runtime/events.go | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/internal/runtime/events.go b/internal/runtime/events.go index 0809b092..d4b3e1ee 100644 --- a/internal/runtime/events.go +++ b/internal/runtime/events.go @@ -95,15 +95,10 @@ type VerificationFailedPayload struct { // AcceptanceDecidedPayload 描述 acceptance engine 决议结果。 type AcceptanceDecidedPayload struct { - Status string `json:"status"` - StopReason controlplane.StopReason `json:"stop_reason,omitempty"` - ErrorClass verify.ErrorClass `json:"error_class,omitempty"` - CompletionBlockedReason string `json:"completion_blocked_reason,omitempty"` - UserVisibleSummary string `json:"user_visible_summary,omitempty"` - InternalSummary string `json:"internal_summary,omitempty"` - ContinueHint string `json:"continue_hint,omitempty"` - Summary string `json:"summary,omitempty"` - Results []acceptgate.CheckResult `json:"results,omitempty"` + Status string `json:"status"` + StopReason controlplane.StopReason `json:"stop_reason,omitempty"` + Summary string `json:"summary,omitempty"` + Results []acceptgate.CheckResult `json:"results,omitempty"` } // LedgerReconciledPayload 为账本对账预留负载。 From 1464dfe4cb62f3be385eb9297aaa163057946f13 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sun, 10 May 2026 11:13:24 +0800 Subject: [PATCH 13/15] =?UTF-8?q?fix(runtime):=E4=BF=AE=E5=A4=8D=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E4=BF=A1=E6=81=AF=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../promptasset/templates/core/tool_usage.md | 4 ++ internal/runtime/run.go | 33 +++++++++++-- internal/runtime/runtime_progress_test.go | 47 +++++++++++-------- internal/runtime/state.go | 1 + 4 files changed, 62 insertions(+), 23 deletions(-) diff --git a/internal/promptasset/templates/core/tool_usage.md b/internal/promptasset/templates/core/tool_usage.md index c720174d..67598517 100644 --- a/internal/promptasset/templates/core/tool_usage.md +++ b/internal/promptasset/templates/core/tool_usage.md @@ -8,6 +8,10 @@ - Verify file content with `filesystem_read_file` + `expect_contains` and `verification_scope`; avoid `bash Get-Content` for routine checks. - Use `filesystem_grep` to locate symbols, strings, and relevant code paths efficiently. - Read tool results carefully before acting. Treat `status`, `ok`, `tool_call_id`, `truncated`, `meta.*`, exit codes, and `content` as the authoritative model-visible outcome of that call. +- For explanation, Q&A, or concept-clarification requests, use tools only until you have enough evidence to answer. +- After reading or searching the relevant files for an informational request, stop exploring and answer directly. +- Do not restart the same interpretation loop after you already have enough context. +- If two search/read passes do not change your conclusion, provide the answer and briefly state any remaining uncertainty. ## Repository exploration When exploring the codebase, Git state, or current changes: diff --git a/internal/runtime/run.go b/internal/runtime/run.go index 028972d4..364d44d0 100644 --- a/internal/runtime/run.go +++ b/internal/runtime/run.go @@ -403,9 +403,7 @@ func (s *Service) Run(ctx context.Context, input UserInput) (err error) { state.mu.Unlock() if missingCompletionSignalStreak < missingCompletionSignalLimit { reminder := completionProtocolReminderForStreak(missingCompletionSignalStreak) - if err := s.appendSystemMessageAndSave(ctx, &state, reminder); err != nil { - return s.handleRunError(err) - } + setPendingSystemReminder(&state, reminder) break turnAttempt } state.markTerminalDecision( @@ -587,6 +585,9 @@ func (s *Service) prepareTurnBudgetSnapshot(ctx context.Context, state *runState repeatLimit := resolveRepeatCycleStreakLimit(cfg.Runtime) systemPrompt := withProgressReminder(builtContext.SystemPrompt, score) + if pendingReminder := drainPendingSystemReminder(state); pendingReminder != "" { + systemPrompt = mergeEphemeralHookNotificationIntoSystemPrompt(systemPrompt, pendingReminder) + } if notificationHint := strings.TrimSpace(s.drainHookNotificationsForTurn(state)); notificationHint != "" { systemPrompt = mergeEphemeralHookNotificationIntoSystemPrompt(systemPrompt, notificationHint) } @@ -649,6 +650,32 @@ func resolveRuntimeMaxTurns(rc config.RuntimeConfig) int { return rc.MaxTurns } +// setPendingSystemReminder 暂存只用于下一轮 provider 请求的系统提醒,避免写入会话历史。 +func setPendingSystemReminder(state *runState, reminder string) { + if state == nil { + return + } + reminder = strings.TrimSpace(reminder) + if reminder == "" { + return + } + state.mu.Lock() + state.pendingSystemReminder = reminder + state.mu.Unlock() +} + +// drainPendingSystemReminder 读取并清空本轮待注入的系统提醒,保证提醒只进入一次 provider 请求。 +func drainPendingSystemReminder(state *runState) string { + if state == nil { + return "" + } + state.mu.Lock() + defer state.mu.Unlock() + reminder := strings.TrimSpace(state.pendingSystemReminder) + state.pendingSystemReminder = "" + return reminder +} + // callProvider 使用冻结后的 TurnBudgetSnapshot 执行单次 provider 调用。 func (s *Service) callProvider( ctx context.Context, diff --git a/internal/runtime/runtime_progress_test.go b/internal/runtime/runtime_progress_test.go index 0acae70f..16b1e4c8 100644 --- a/internal/runtime/runtime_progress_test.go +++ b/internal/runtime/runtime_progress_test.go @@ -487,19 +487,28 @@ func TestNoToolIncompleteTurnStillEvaluatesProgressAndInjectsReminder(t *testing if len(providerImpl.requests) < 2 { t.Fatalf("expected at least 2 provider requests, got %d", len(providerImpl.requests)) } - foundReminder := false - for _, message := range providerImpl.requests[1].Messages { + secondSystemPrompt := providerImpl.requests[1].SystemPrompt + if !strings.Contains(secondSystemPrompt, "[Runtime Control]") || + !strings.Contains(secondSystemPrompt, "task_completion") { + t.Fatalf("expected runtime protocol note in second provider request system prompt, got %q", secondSystemPrompt) + } + if len(providerImpl.requests) > 2 { + thirdSystemPrompt := providerImpl.requests[2].SystemPrompt + if strings.Contains(thirdSystemPrompt, "[Runtime Control]") && + strings.Contains(thirdSystemPrompt, "task_completion") { + t.Fatalf("expected runtime protocol note to be injected once, got third system prompt %q", thirdSystemPrompt) + } + } + + savedSession := store.sessions[session.ID] + for _, message := range savedSession.Messages { content := renderPartsForTest(message.Parts) if message.Role == providertypes.RoleSystem && strings.Contains(content, "[Runtime Control]") && strings.Contains(content, "task_completion") { - foundReminder = true - break + t.Fatalf("expected completion reminder to stay out of session transcript, found %q", content) } } - if !foundReminder { - t.Fatalf("expected runtime protocol note in second provider request messages, got %+v", providerImpl.requests[1].Messages) - } events := collectRuntimeEvents(service.Events()) assertEventContains(t, events, EventProgressEvaluated) @@ -562,23 +571,21 @@ func TestAcceptanceContinueWithoutToolCallStopsAsIncomplete(t *testing.T) { t.Fatalf("expected runtime to stop after six missing completion signals, got %d requests", len(providerImpl.requests)) } // 第 6 个请求(streak=5 时注入最终提醒后)应包含最终协议提醒 - fifthRequestMessages := providerImpl.requests[5].Messages - foundFinalHint := false - for _, message := range fifthRequestMessages { - if message.Role != providertypes.RoleSystem { - continue - } + fifthSystemPrompt := providerImpl.requests[5].SystemPrompt + if !strings.Contains(fifthSystemPrompt, "[Runtime Control]") || + !strings.Contains(fifthSystemPrompt, "final protocol reminder") || + !strings.Contains(fifthSystemPrompt, "task_completion") { + t.Fatalf("expected final runtime protocol note in request 5 system prompt, got %q", fifthSystemPrompt) + } + savedSession := store.sessions[session.ID] + for _, message := range savedSession.Messages { content := renderPartsForTest(message.Parts) - if strings.Contains(content, "[Runtime Control]") && - strings.Contains(content, "final protocol reminder") && + if message.Role == providertypes.RoleSystem && + strings.Contains(content, "[Runtime Control]") && strings.Contains(content, "task_completion") { - foundFinalHint = true - break + t.Fatalf("expected completion reminder to stay out of session transcript, found %q", content) } } - if !foundFinalHint { - t.Fatalf("expected final runtime protocol note in request 5, got messages: %+v", fifthRequestMessages) - } events := collectRuntimeEvents(service.Events()) assertStopReasonDecided(t, events, controlplane.StopReasonMissingCompletionSignal, "") diff --git a/internal/runtime/state.go b/internal/runtime/state.go index d924e594..d7accc61 100644 --- a/internal/runtime/state.go +++ b/internal/runtime/state.go @@ -35,6 +35,7 @@ type runState struct { maxTurnsLimit int userGoal string missingCompletionSignalStreak int + pendingSystemReminder string factsCollector *runtimefacts.Collector terminalStatus controlplane.TerminalStatus terminalStopReason controlplane.StopReason From 241241ea6b5df828fd781743269b834d73e6ec7b Mon Sep 17 00:00:00 2001 From: xgopilot Date: Sun, 10 May 2026 11:13:16 +0000 Subject: [PATCH 14/15] fix(runtime): restore tool timeout backoff state Generated with [codeagent](https://github.com/qbox/codeagent) Co-authored-by: phantom5099 <245659304+phantom5099@users.noreply.github.com> --- internal/runtime/state.go | 85 ++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/internal/runtime/state.go b/internal/runtime/state.go index 85d2ef0f..cd956648 100644 --- a/internal/runtime/state.go +++ b/internal/runtime/state.go @@ -12,50 +12,51 @@ import ( // runState 汇总单次 Run 生命周期内会变化的会话与计量状态。 type runState struct { - mu sync.Mutex - runID string - runToken uint64 - session agentsession.Session - effectiveWorkdir string - compactCount int - reactiveCompactAttempts int - rememberedThisRun bool - planningEnabled bool - taskID string - agentID string - capabilityToken *security.CapabilityToken - nextAttemptSeq int - turn int - baseLifecycle controlplane.RunState - lifecycle controlplane.RunState - waitingPermissionCount int - compactingCount int - stopEmitted bool - budgetExceeded bool - maxTurnsReached bool - maxTurnsLimit int - userGoal string + mu sync.Mutex + runID string + runToken uint64 + session agentsession.Session + effectiveWorkdir string + compactCount int + reactiveCompactAttempts int + rememberedThisRun bool + planningEnabled bool + taskID string + agentID string + capabilityToken *security.CapabilityToken + nextAttemptSeq int + turn int + baseLifecycle controlplane.RunState + lifecycle controlplane.RunState + waitingPermissionCount int + compactingCount int + stopEmitted bool + budgetExceeded bool + maxTurnsReached bool + maxTurnsLimit int + userGoal string missingCompletionSignalStreak int pendingSystemReminder string - factsCollector *runtimefacts.Collector - terminalStatus controlplane.TerminalStatus - terminalStopReason controlplane.StopReason - terminalStopDetail string - terminalSet bool - hasUnknownUsage bool - completion controlplane.CompletionState - progress controlplane.ProgressState - lastEndOfTurnCheckpointID string - runCheckpointID string - hasRunWorkspaceWrite bool - hookAnnotations []string - hookNotifications []queuedHookNotification - hookNotificationSeen map[string]time.Time - hookNotificationOmitted int - reportedMissingSkills map[string]struct{} - thinkingOverride *ThinkingOverride - pendingUserQuestion *UserQuestionRequestedPayload - disableTools bool + toolTimeoutBackoff map[string]int + factsCollector *runtimefacts.Collector + terminalStatus controlplane.TerminalStatus + terminalStopReason controlplane.StopReason + terminalStopDetail string + terminalSet bool + hasUnknownUsage bool + completion controlplane.CompletionState + progress controlplane.ProgressState + lastEndOfTurnCheckpointID string + runCheckpointID string + hasRunWorkspaceWrite bool + hookAnnotations []string + hookNotifications []queuedHookNotification + hookNotificationSeen map[string]time.Time + hookNotificationOmitted int + reportedMissingSkills map[string]struct{} + thinkingOverride *ThinkingOverride + pendingUserQuestion *UserQuestionRequestedPayload + disableTools bool } // newRunState 基于持久化会话创建一次运行的内存状态镜像。 From 2e98c14af058d46f4a9f3c9538366ad0a9f8d272 Mon Sep 17 00:00:00 2001 From: phantom5099 <1011668688@qq.com> Date: Sun, 10 May 2026 21:24:20 +0800 Subject: [PATCH 15/15] =?UTF-8?q?pref=EF=BC=88prompt=EF=BC=89:=E5=8A=A0?= =?UTF-8?q?=E5=BC=BA=E6=8F=90=E7=A4=BA=E8=AF=8D=E6=8F=90=E7=A4=BA=E5=8A=9B?= =?UTF-8?q?=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/config/context_budget_migration.go | 35 ++++++++++- .../config/context_budget_migration_test.go | 62 +++++++++++++++++++ internal/promptasset/assets_test.go | 14 +++++ .../completion_protocol_final_reminder.md | 16 ++++- .../runtime/completion_protocol_reminder.md | 14 ++++- internal/runtime/runtime_test.go | 4 +- internal/runtime/state.go | 3 +- 7 files changed, 139 insertions(+), 9 deletions(-) diff --git a/internal/config/context_budget_migration.go b/internal/config/context_budget_migration.go index 4c0f0027..36fcbfed 100644 --- a/internal/config/context_budget_migration.go +++ b/internal/config/context_budget_migration.go @@ -138,6 +138,10 @@ func MigrateContextBudgetConfigContent(raw []byte) ([]byte, bool, []string, erro if legacyRuntimeChanged { changed = true } + legacyMemoChanged := migrateLegacyMemoConfigFields(doc) + if legacyMemoChanged { + changed = true + } if !changed { return raw, false, nil, nil @@ -175,6 +179,35 @@ func migrateLegacyRuntimeConfigFields(doc map[string]any) bool { return true } +// migrateLegacyMemoConfigFields 清理 memo 下已移除且可安全丢弃的历史字段。 +func migrateLegacyMemoConfigFields(doc map[string]any) bool { + memoValue, ok := doc["memo"] + if !ok { + return false + } + memoMap, ok := migrationStringMap(memoValue) + if !ok { + return false + } + + changed := false + for _, key := range []string{"extract_recent_messages"} { + if _, exists := memoMap[key]; exists { + delete(memoMap, key) + changed = true + } + } + if !changed { + return false + } + if len(memoMap) == 0 { + delete(doc, "memo") + } else { + doc["memo"] = memoMap + } + return true +} + // migrateVerificationConfig 清理已废弃的 verification 字段,并将安全的旧 command string 收敛成 argv。 func migrateVerificationConfig(doc map[string]any) (bool, error) { runtimeValue, ok := doc["runtime"] @@ -195,7 +228,7 @@ func migrateVerificationConfig(doc map[string]any) (bool, error) { } changed := false - for _, key := range []string{"enabled", "default_task_policy", "final_intercept", "max_retries", "hooks"} { + for _, key := range []string{"enabled", "default_task_policy", "final_intercept", "max_retries", "hooks", "max_no_progress"} { if _, exists := verificationMap[key]; exists { delete(verificationMap, key) changed = true diff --git a/internal/config/context_budget_migration_test.go b/internal/config/context_budget_migration_test.go index ff0960c8..51e648ce 100644 --- a/internal/config/context_budget_migration_test.go +++ b/internal/config/context_budget_migration_test.go @@ -235,6 +235,68 @@ runtime: } } +func TestMigrateContextBudgetConfigContentRemovesLegacyVerificationNoProgressField(t *testing.T) { + t.Parallel() + + input := []byte(strings.TrimSpace(` +runtime: + verification: + max_no_progress: 3 + verifiers: + test: + timeout_sec: 30 +`) + "\n") + + out, changed, notes, err := MigrateContextBudgetConfigContent(input) + if err != nil { + t.Fatalf("MigrateContextBudgetConfigContent() error = %v", err) + } + if !changed { + t.Fatal("expected migration change") + } + if len(notes) != 0 { + t.Fatalf("expected no migration notes, got %v", notes) + } + + text := string(out) + if strings.Contains(text, "max_no_progress") { + t.Fatalf("expected max_no_progress removed, got:\n%s", text) + } + if !strings.Contains(text, "timeout_sec: 30") { + t.Fatalf("expected verifier config preserved, got:\n%s", text) + } +} + +func TestMigrateContextBudgetConfigContentRemovesLegacyMemoExtractRecentMessagesField(t *testing.T) { + t.Parallel() + + input := []byte(strings.TrimSpace(` +memo: + auto_extract: true + extract_recent_messages: 4 + extract_timeout_sec: 9 +`) + "\n") + + out, changed, notes, err := MigrateContextBudgetConfigContent(input) + if err != nil { + t.Fatalf("MigrateContextBudgetConfigContent() error = %v", err) + } + if !changed { + t.Fatal("expected migration change") + } + if len(notes) != 0 { + t.Fatalf("expected no migration notes, got %v", notes) + } + + text := string(out) + if strings.Contains(text, "extract_recent_messages") { + t.Fatalf("expected extract_recent_messages removed, got:\n%s", text) + } + if !strings.Contains(text, "auto_extract: true") || !strings.Contains(text, "extract_timeout_sec: 9") { + t.Fatalf("expected supported memo fields preserved, got:\n%s", text) + } +} + func TestMigrateContextBudgetConfigFileCreatesBackup(t *testing.T) { t.Parallel() diff --git a/internal/promptasset/assets_test.go b/internal/promptasset/assets_test.go index 03c4b13a..169a09c8 100644 --- a/internal/promptasset/assets_test.go +++ b/internal/promptasset/assets_test.go @@ -61,6 +61,20 @@ func TestRuntimeReminderTemplates(t *testing.T) { if !strings.Contains(RepeatCycleReminder(), "exact same arguments") { t.Fatalf("expected repeat-cycle reminder guidance, got %q", RepeatCycleReminder()) } + for name, prompt := range map[string]string{ + "completion": CompletionProtocolReminder(), + "final_completion": CompletionProtocolFinalReminder(), + } { + if !strings.Contains(prompt, "Completion retry rule") { + t.Fatalf("%s reminder should contain retry rule, got %q", name, prompt) + } + if !strings.Contains(prompt, "Do not repeat file lists") { + t.Fatalf("%s reminder should prevent repeated summaries, got %q", name, prompt) + } + if !strings.Contains(prompt, "at most one brief final sentence") { + t.Fatalf("%s reminder should keep final prose concise, got %q", name, prompt) + } + } } func TestPlanModePromptTemplates(t *testing.T) { diff --git a/internal/promptasset/templates/runtime/completion_protocol_final_reminder.md b/internal/promptasset/templates/runtime/completion_protocol_final_reminder.md index 8b1005a3..bd9f374a 100644 --- a/internal/promptasset/templates/runtime/completion_protocol_final_reminder.md +++ b/internal/promptasset/templates/runtime/completion_protocol_final_reminder.md @@ -1,5 +1,17 @@ -[Runtime Control] +[Runtime Control] You again stopped calling tools without outputting `task_completion`. -This is the final protocol reminder: if the task is done, output the structured completion signal. Otherwise, continue calling tools. Missing it again will terminate this run. +This is the final protocol reminder. Missing it again will terminate this run. + +Completion retry rule: +Your previous prose may already be visible to the user. Do not duplicate, restate, expand, or re-list prior summaries. + +If the task is done: +- Emit the required `task_completion` JSON exactly once. +- After the JSON, write at most one brief final sentence. +- Do not repeat file lists, completed steps, tool results, or previous summaries. + +If the task is not done: +- Continue with the next necessary tool call. +- Do not write another prose summary until the work is actually complete. diff --git a/internal/promptasset/templates/runtime/completion_protocol_reminder.md b/internal/promptasset/templates/runtime/completion_protocol_reminder.md index 44b40cd9..f9deeada 100644 --- a/internal/promptasset/templates/runtime/completion_protocol_reminder.md +++ b/internal/promptasset/templates/runtime/completion_protocol_reminder.md @@ -1,5 +1,15 @@ -[Runtime Control] +[Runtime Control] You stopped calling tools without outputting `task_completion`. -If the task is done, end with the structured completion signal. Otherwise, continue calling tools to make progress. +Completion retry rule: +Your previous prose may already be visible to the user. Do not duplicate, restate, expand, or re-list prior summaries. + +If the task is done: +- Emit the required `task_completion` JSON exactly once. +- After the JSON, write at most one brief final sentence. +- Do not repeat file lists, completed steps, tool results, or previous summaries. + +If the task is not done: +- Continue with the next necessary tool call. +- Do not write another prose summary until the work is actually complete. diff --git a/internal/runtime/runtime_test.go b/internal/runtime/runtime_test.go index 2f799065..f87bede5 100644 --- a/internal/runtime/runtime_test.go +++ b/internal/runtime/runtime_test.go @@ -1306,8 +1306,8 @@ func TestServiceRunSchedulesMemoExtractionFromCurrentRunBoundary(t *testing.T) { if len(messages) != 2 { t.Fatalf("scheduled messages = %#v, want current run user+assistant only", messages) } - if renderPartsForVerification(messages[0].Parts) != "new user" || - renderPartsForVerification(messages[1].Parts) != "new final" { + if renderPartsForTest(messages[0].Parts) != "new user" || + renderPartsForTest(messages[1].Parts) != "new final" { t.Fatalf("scheduled messages crossed run boundary: %#v", messages) } } diff --git a/internal/runtime/state.go b/internal/runtime/state.go index a1db31e6..2b6f6609 100644 --- a/internal/runtime/state.go +++ b/internal/runtime/state.go @@ -25,7 +25,7 @@ type runState struct { taskID string agentID string capabilityToken *security.CapabilityToken - memoRunMessages []providertypes.Message + memoRunMessages []providertypes.Message nextAttemptSeq int turn int baseLifecycle controlplane.RunState @@ -59,7 +59,6 @@ type runState struct { thinkingOverride *ThinkingOverride pendingUserQuestion *UserQuestionRequestedPayload disableTools bool - } // newRunState 基于持久化会话创建一次运行的内存状态镜像。