diff --git a/docs/context-compact.md b/docs/context-compact.md index 0da33c6c..c93d7c57 100644 --- a/docs/context-compact.md +++ b/docs/context-compact.md @@ -69,8 +69,8 @@ BuildRequest -> FreezeSnapshot -> EstimateInput -> DecideBudget -> (allow | comp - `context.Builder` 只构建 provider-facing request,不再返回旧的 builder 压缩建议布尔值。 - provider 发送前一定先做输入 token estimate。 - estimate 首次超预算时,runtime 执行一次 `proactive` compact,然后重建 request 并重新估算。 -- compact 后仍超预算且估算高置信(`accurate=true`)时,runtime 停止本次 run,并返回 `STOP_BUDGET_EXCEEDED`。 -- compact 后仍超预算但估算低置信(`accurate=false`)时,runtime 继续发送请求,不因低置信估算直接硬停。 +- compact 后仍超预算且 `gate_policy=gateable` 时,runtime 停止本次 run,并返回 `STOP_BUDGET_EXCEEDED`。 +- compact 后仍超预算但 `gate_policy=advisory` 时,runtime 继续发送请求,不直接硬停。 - provider 返回 `context_too_long` 时,runtime 触发 `reactive` compact,并重新进入同一预算闭环。 ## compact 如何压缩 diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index cb9ed7f0..6a4241f3 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -125,8 +125,8 @@ BuildRequest -> FreezeSnapshot -> EstimateInput -> DecideBudget -> (allow | comp - provider 发送前一定先做输入 token estimate。 - 如果 estimate 没超过 `prompt_budget`,本轮允许发送。 - 如果 estimate 首次超预算,先执行一次 `proactive` compact,然后重建请求并重新估算。 -- 如果 compact 后仍超预算且估算为高置信(`accurate=true`),停止当前 run,并产出 `STOP_BUDGET_EXCEEDED`。 -- 如果 compact 后仍超预算但估算为低置信(`accurate=false`),不直接硬停,继续发送请求。 +- 如果 compact 后仍超预算且 `gate_policy=gateable`,停止当前 run,并产出 `STOP_BUDGET_EXCEEDED`。 +- 如果 compact 后仍超预算但 `gate_policy=advisory`,不直接硬停,继续发送请求。 - 如果 provider 返回 `context_too_long`,runtime 会进入 `reactive` compact 恢复链路,并重新进入同一预算闭环。 ## provider 策略 diff --git a/docs/runtime-provider-event-flow.md b/docs/runtime-provider-event-flow.md index bcea98b4..d0966bd1 100644 --- a/docs/runtime-provider-event-flow.md +++ b/docs/runtime-provider-event-flow.md @@ -27,7 +27,7 @@ - `compact_applied` - `compact_error` -当前事件 envelope 的唯一有效 `payload_version` 为 `3`。 +当前事件 envelope 的唯一有效 `payload_version` 为 `4`。 ## ReAct 主循环 @@ -63,14 +63,14 @@ runtime 不再消费旧的 builder 压缩建议,而是使用冻结快照上的 - `estimated_input_tokens` - `prompt_budget` - `estimate_source` -- `estimate_accurate` +- `estimate_gate_policy` 语义: - `allow`:本轮请求在预算内 - `compact`:首次超预算,需要先压缩 -- `stop`:压缩后仍超预算且估算高置信,停止当前 run -- `allow` + `reason=exceeds_budget_inaccurate_after_compact_allow`:压缩后仍超预算但估算低置信,继续放行 +- `stop` + `reason=exceeds_budget_after_compact_stop`:压缩后仍超预算且估算可门禁(`gateable`),停止当前 run +- `allow` + `reason=exceeds_budget_after_compact_allow_advisory`:压缩后仍超预算但估算仅 advisory,继续放行 ## Context Builder 职责 diff --git a/internal/app/bootstrap_test.go b/internal/app/bootstrap_test.go index 878f88aa..adc9a182 100644 --- a/internal/app/bootstrap_test.go +++ b/internal/app/bootstrap_test.go @@ -1913,7 +1913,7 @@ func (s *stubMemoProvider) EstimateInputTokens( return providertypes.BudgetEstimate{ EstimatedInputTokens: provider.EstimateTextTokens(req.SystemPrompt), EstimateSource: provider.EstimateSourceLocal, - Accurate: false, + GatePolicy: provider.EstimateGateGateable, }, nil } diff --git a/internal/config/atomic_write.go b/internal/config/atomic_write.go new file mode 100644 index 00000000..5a1af5b6 --- /dev/null +++ b/internal/config/atomic_write.go @@ -0,0 +1,78 @@ +package config + +import ( + "bytes" + "errors" + "fmt" + "os" + "path/filepath" + "syscall" +) + +var ( + atomicCreateTemp = os.CreateTemp + atomicReadFile = os.ReadFile + atomicRename = os.Rename +) + +// writeFileAtomically 通过同目录临时文件与原子替换写入目标文件,并在写后做回读校验。 +func writeFileAtomically(path string, data []byte, perm os.FileMode) error { + dir := filepath.Dir(path) + pattern := "." + filepath.Base(path) + ".tmp-*" + tempFile, err := atomicCreateTemp(dir, pattern) + if err != nil { + return fmt.Errorf("create temp file: %w", err) + } + + tempPath := tempFile.Name() + cleanupTemp := true + defer func() { + if cleanupTemp { + _ = os.Remove(tempPath) + } + }() + + if _, err := tempFile.Write(data); err != nil { + _ = tempFile.Close() + return fmt.Errorf("write temp file: %w", err) + } + if err := tempFile.Sync(); err != nil { + _ = tempFile.Close() + return fmt.Errorf("sync temp file: %w", err) + } + if err := tempFile.Close(); err != nil { + return fmt.Errorf("close temp file: %w", err) + } + if err := os.Chmod(tempPath, perm); err != nil { + return fmt.Errorf("chmod temp file: %w", err) + } + if err := atomicRename(tempPath, path); err != nil { + return fmt.Errorf("rename temp file: %w", err) + } + cleanupTemp = false + + written, err := atomicReadFile(path) + if err != nil { + return fmt.Errorf("read back written file: %w", err) + } + if !bytes.Equal(written, data) { + return errors.New("read back mismatch") + } + if err := fsyncDirectory(dir); err != nil { + return fmt.Errorf("sync target directory: %w", err) + } + return nil +} + +// fsyncDirectory 尝试同步目录元数据,确保 rename 后的目录项在支持的平台尽快落盘。 +func fsyncDirectory(dir string) error { + handle, err := os.Open(dir) + if err != nil { + return err + } + defer handle.Close() + if err := handle.Sync(); err != nil && !errors.Is(err, syscall.EINVAL) && !errors.Is(err, os.ErrInvalid) { + return err + } + return nil +} diff --git a/internal/config/context_budget_migration.go b/internal/config/context_budget_migration.go index e053c252..2e5139c5 100644 --- a/internal/config/context_budget_migration.go +++ b/internal/config/context_budget_migration.go @@ -66,10 +66,10 @@ func MigrateContextBudgetConfigFile(path string, dryRun bool) (ContextBudgetMigr } backup := path + ".bak" - if err := os.WriteFile(backup, raw, 0o644); err != nil { + if err := writeFileAtomically(backup, raw, 0o644); err != nil { return result, fmt.Errorf("config: write migration backup %s: %w", backup, err) } - if err := os.WriteFile(path, migrated, 0o644); err != nil { + if err := writeFileAtomically(path, migrated, 0o644); err != nil { return result, fmt.Errorf("config: write migrated config %s: %w", path, err) } result.Backup = backup diff --git a/internal/config/context_budget_migration_test.go b/internal/config/context_budget_migration_test.go index 763463a2..2bbd38ef 100644 --- a/internal/config/context_budget_migration_test.go +++ b/internal/config/context_budget_migration_test.go @@ -1,6 +1,7 @@ package config import ( + "errors" "os" "path/filepath" "strings" @@ -179,3 +180,130 @@ context: t.Fatalf("expected note %q, got %v", ContextBudgetMigrationNoteEnabledDeprecated, result.Notes) } } + +func TestMigrateContextBudgetConfigFileKeepsOriginalWhenBackupWriteFails(t *testing.T) { + dir := t.TempDir() + target := filepath.Join(dir, configName) + original := strings.TrimSpace(` +context: + auto_compact: + input_token_threshold: 120000 +`) + "\n" + if err := os.WriteFile(target, []byte(original), 0o644); err != nil { + t.Fatalf("write target: %v", err) + } + + restore := stubAtomicWriteOps(t) + defer restore() + atomicCreateTemp = func(dir string, pattern string) (*os.File, error) { + return nil, errors.New("create temp failed") + } + + _, err := MigrateContextBudgetConfigFile(target, false) + if err == nil || !strings.Contains(err.Error(), "write migration backup") { + t.Fatalf("expected backup write error, got %v", err) + } + raw, readErr := os.ReadFile(target) + if readErr != nil { + t.Fatalf("read target: %v", readErr) + } + if string(raw) != original { + t.Fatalf("expected original config to stay unchanged, got:\n%s", raw) + } +} + +func TestMigrateContextBudgetConfigFileKeepsOriginalWhenTargetReplaceFails(t *testing.T) { + dir := t.TempDir() + target := filepath.Join(dir, configName) + original := strings.TrimSpace(` +context: + auto_compact: + input_token_threshold: 120000 +`) + "\n" + if err := os.WriteFile(target, []byte(original), 0o644); err != nil { + t.Fatalf("write target: %v", err) + } + + restore := stubAtomicWriteOps(t) + defer restore() + renameCount := 0 + atomicRename = func(oldpath string, newpath string) error { + renameCount++ + if renameCount == 2 { + return errors.New("rename target failed") + } + return os.Rename(oldpath, newpath) + } + + _, err := MigrateContextBudgetConfigFile(target, false) + if err == nil || !strings.Contains(err.Error(), "write migrated config") { + t.Fatalf("expected migrated config write error, got %v", err) + } + if renameCount < 2 { + t.Fatalf("expected second rename to fail, got renameCount=%d", renameCount) + } + + raw, readErr := os.ReadFile(target) + if readErr != nil { + t.Fatalf("read target: %v", readErr) + } + if string(raw) != original { + t.Fatalf("expected original config to stay unchanged, got:\n%s", raw) + } + + backupRaw, backupErr := os.ReadFile(target + ".bak") + if backupErr != nil { + t.Fatalf("read backup: %v", backupErr) + } + if string(backupRaw) != original { + t.Fatalf("expected backup to keep original content, got:\n%s", backupRaw) + } +} + +func TestMigrateContextBudgetConfigFileKeepsOriginalWhenBackupVerificationFails(t *testing.T) { + dir := t.TempDir() + target := filepath.Join(dir, configName) + original := strings.TrimSpace(` +context: + auto_compact: + input_token_threshold: 120000 +`) + "\n" + if err := os.WriteFile(target, []byte(original), 0o644); err != nil { + t.Fatalf("write target: %v", err) + } + + restore := stubAtomicWriteOps(t) + defer restore() + readCount := 0 + atomicReadFile = func(path string) ([]byte, error) { + readCount++ + if readCount == 1 { + return []byte("corrupted"), nil + } + return os.ReadFile(path) + } + + _, err := MigrateContextBudgetConfigFile(target, false) + if err == nil || !strings.Contains(err.Error(), "read back mismatch") { + t.Fatalf("expected read back mismatch error, got %v", err) + } + raw, readErr := os.ReadFile(target) + if readErr != nil { + t.Fatalf("read target: %v", readErr) + } + if string(raw) != original { + t.Fatalf("expected original config to stay unchanged, got:\n%s", raw) + } +} + +func stubAtomicWriteOps(t *testing.T) func() { + t.Helper() + prevCreateTemp := atomicCreateTemp + prevReadFile := atomicReadFile + prevRename := atomicRename + return func() { + atomicCreateTemp = prevCreateTemp + atomicReadFile = prevReadFile + atomicRename = prevRename + } +} diff --git a/internal/config/loader.go b/internal/config/loader.go index f559d9fb..ead5f0f0 100644 --- a/internal/config/loader.go +++ b/internal/config/loader.go @@ -165,7 +165,7 @@ func (l *Loader) Save(ctx context.Context, cfg *Config) error { return err } - if err := os.WriteFile(l.ConfigPath(), data, 0o644); err != nil { + if err := writeFileAtomically(l.ConfigPath(), data, 0o644); err != nil { return fmt.Errorf("config: write config file: %w", err) } diff --git a/internal/provider/anthropic/provider.go b/internal/provider/anthropic/provider.go index ffbb548a..f7db2c17 100644 --- a/internal/provider/anthropic/provider.go +++ b/internal/provider/anthropic/provider.go @@ -43,7 +43,7 @@ func (p *Provider) EstimateInputTokens( return providertypes.BudgetEstimate{ EstimatedInputTokens: tokens, EstimateSource: provider.EstimateSourceLocal, - Accurate: false, + GatePolicy: provider.EstimateGateGateable, }, nil } diff --git a/internal/provider/anthropic/provider_test.go b/internal/provider/anthropic/provider_test.go index 49c36ac9..32b3e43f 100644 --- a/internal/provider/anthropic/provider_test.go +++ b/internal/provider/anthropic/provider_test.go @@ -199,6 +199,40 @@ func TestBuildRequestRejectsSessionAssetWithoutReader(t *testing.T) { } } +func TestEstimateInputTokensReturnsGateableLocalEstimate(t *testing.T) { + t.Parallel() + + p, err := New(provider.RuntimeConfig{ + Driver: provider.DriverAnthropic, + BaseURL: "https://api.anthropic.com/v1", + DefaultModel: "claude-3-7-sonnet", + APIKeyEnv: "ANTHROPIC_TEST_KEY", + APIKeyResolver: provider.StaticAPIKeyResolver("test-key"), + }) + if err != nil { + t.Fatalf("New() error = %v", err) + } + + estimate, err := p.EstimateInputTokens(context.Background(), providertypes.GenerateRequest{ + Messages: []providertypes.Message{{ + Role: providertypes.RoleUser, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("hi")}, + }}, + }) + if err != nil { + t.Fatalf("EstimateInputTokens() error = %v", err) + } + if estimate.EstimateSource != provider.EstimateSourceLocal { + t.Fatalf("estimate source = %q, want %q", estimate.EstimateSource, provider.EstimateSourceLocal) + } + if estimate.GatePolicy != provider.EstimateGateGateable { + t.Fatalf("gate policy = %q, want %q", estimate.GatePolicy, provider.EstimateGateGateable) + } + if estimate.EstimatedInputTokens <= 0 { + t.Fatalf("expected positive estimate tokens, got %d", estimate.EstimatedInputTokens) + } +} + func drainEvents(events <-chan providertypes.StreamEvent) []providertypes.StreamEvent { var drained []providertypes.StreamEvent for { diff --git a/internal/provider/estimate.go b/internal/provider/estimate.go index 07e0c9d8..8467a62b 100644 --- a/internal/provider/estimate.go +++ b/internal/provider/estimate.go @@ -8,6 +8,8 @@ import ( const ( EstimateSourceNative = "native" EstimateSourceLocal = "local" + EstimateGateAdvisory = "advisory" + EstimateGateGateable = "gateable" localEstimateSlack = 1.15 ) diff --git a/internal/provider/gemini/provider.go b/internal/provider/gemini/provider.go index 789fd8ca..af1a9b5d 100644 --- a/internal/provider/gemini/provider.go +++ b/internal/provider/gemini/provider.go @@ -46,7 +46,7 @@ func (p *Provider) EstimateInputTokens( return providertypes.BudgetEstimate{ EstimatedInputTokens: tokens, EstimateSource: provider.EstimateSourceLocal, - Accurate: false, + GatePolicy: provider.EstimateGateGateable, }, nil } diff --git a/internal/provider/gemini/provider_test.go b/internal/provider/gemini/provider_test.go index 37eaedd9..e9e866fc 100644 --- a/internal/provider/gemini/provider_test.go +++ b/internal/provider/gemini/provider_test.go @@ -186,6 +186,40 @@ func TestBuildRequestRejectsSessionAssetWithoutReader(t *testing.T) { } } +func TestEstimateInputTokensReturnsGateableLocalEstimate(t *testing.T) { + t.Parallel() + + p, err := New(provider.RuntimeConfig{ + Driver: provider.DriverGemini, + BaseURL: "https://generativelanguage.googleapis.com/v1beta", + DefaultModel: "gemini-2.5-flash", + APIKeyEnv: "GEMINI_TEST_KEY", + APIKeyResolver: provider.StaticAPIKeyResolver("test-key"), + }) + if err != nil { + t.Fatalf("New() error = %v", err) + } + + estimate, err := p.EstimateInputTokens(context.Background(), providertypes.GenerateRequest{ + Messages: []providertypes.Message{{ + Role: providertypes.RoleUser, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("hi")}, + }}, + }) + if err != nil { + t.Fatalf("EstimateInputTokens() error = %v", err) + } + if estimate.EstimateSource != provider.EstimateSourceLocal { + t.Fatalf("estimate source = %q, want %q", estimate.EstimateSource, provider.EstimateSourceLocal) + } + if estimate.GatePolicy != provider.EstimateGateGateable { + t.Fatalf("gate policy = %q, want %q", estimate.GatePolicy, provider.EstimateGateGateable) + } + if estimate.EstimatedInputTokens <= 0 { + t.Fatalf("expected positive estimate tokens, got %d", estimate.EstimatedInputTokens) + } +} + func drainEvents(events <-chan providertypes.StreamEvent) []providertypes.StreamEvent { var drained []providertypes.StreamEvent for { diff --git a/internal/provider/generate_test.go b/internal/provider/generate_test.go index 0658e187..b35e2717 100644 --- a/internal/provider/generate_test.go +++ b/internal/provider/generate_test.go @@ -23,7 +23,7 @@ func (s *stubTextGenProvider) EstimateInputTokens( return providertypes.BudgetEstimate{ EstimatedInputTokens: provider.EstimateTextTokens(req.SystemPrompt + renderEstimateMessages(req.Messages)), EstimateSource: provider.EstimateSourceLocal, - Accurate: false, + GatePolicy: provider.EstimateGateGateable, }, nil } diff --git a/internal/provider/openaicompat/openaicompat_test.go b/internal/provider/openaicompat/openaicompat_test.go index 34a00001..3cb176be 100644 --- a/internal/provider/openaicompat/openaicompat_test.go +++ b/internal/provider/openaicompat/openaicompat_test.go @@ -245,6 +245,33 @@ func TestDiscoverModelsParsesNestedContainerAndAliasFields(t *testing.T) { } } +func TestEstimateInputTokensReturnsGateableLocalEstimate(t *testing.T) { + t.Parallel() + + p, err := New(resolvedConfig("", "")) + if err != nil { + t.Fatalf("New() error = %v", err) + } + estimate, err := p.EstimateInputTokens(context.Background(), providertypes.GenerateRequest{ + Messages: []providertypes.Message{{ + Role: providertypes.RoleUser, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("hi")}, + }}, + }) + if err != nil { + t.Fatalf("EstimateInputTokens() error = %v", err) + } + if estimate.EstimateSource != provider.EstimateSourceLocal { + t.Fatalf("estimate source = %q, want %q", estimate.EstimateSource, provider.EstimateSourceLocal) + } + if estimate.GatePolicy != provider.EstimateGateGateable { + t.Fatalf("gate policy = %q, want %q", estimate.GatePolicy, provider.EstimateGateGateable) + } + if estimate.EstimatedInputTokens <= 0 { + t.Fatalf("expected positive estimate tokens, got %d", estimate.EstimatedInputTokens) + } +} + func TestDiscoverModelsOpenAIProfileFallsBackToGenericListKeys(t *testing.T) { t.Parallel() diff --git a/internal/provider/openaicompat/provider.go b/internal/provider/openaicompat/provider.go index 2dea2883..6227f9a2 100644 --- a/internal/provider/openaicompat/provider.go +++ b/internal/provider/openaicompat/provider.go @@ -75,7 +75,7 @@ func (p *Provider) EstimateInputTokens( return providertypes.BudgetEstimate{ EstimatedInputTokens: tokens, EstimateSource: provider.EstimateSourceLocal, - Accurate: false, + GatePolicy: provider.EstimateGateGateable, }, nil } diff --git a/internal/provider/types/usage.go b/internal/provider/types/usage.go index 4250c6d1..a605919c 100644 --- a/internal/provider/types/usage.go +++ b/internal/provider/types/usage.go @@ -11,5 +11,5 @@ type Usage struct { type BudgetEstimate struct { EstimatedInputTokens int `json:"estimated_input_tokens"` EstimateSource string `json:"estimate_source"` - Accurate bool `json:"accurate"` + GatePolicy string `json:"gate_policy"` } diff --git a/internal/runtime/budget_models.go b/internal/runtime/budget_models.go index 58fd03ac..6283931e 100644 --- a/internal/runtime/budget_models.go +++ b/internal/runtime/budget_models.go @@ -91,11 +91,15 @@ func newTurnBudgetEstimate( id controlplane.TurnBudgetID, estimate providertypes.BudgetEstimate, ) controlplane.TurnBudgetEstimate { + gatePolicy := controlplane.TurnBudgetGatePolicyAdvisory + if estimate.GatePolicy == provider.EstimateGateGateable { + gatePolicy = controlplane.TurnBudgetGatePolicyGateable + } return controlplane.TurnBudgetEstimate{ ID: id, EstimatedInputTokens: estimate.EstimatedInputTokens, EstimateSource: estimate.EstimateSource, - Accurate: estimate.Accurate, + GatePolicy: gatePolicy, } } diff --git a/internal/runtime/controlplane/budget.go b/internal/runtime/controlplane/budget.go index 16a1915a..872496e5 100644 --- a/internal/runtime/controlplane/budget.go +++ b/internal/runtime/controlplane/budget.go @@ -9,17 +9,22 @@ const ( TurnBudgetActionStop TurnBudgetAction = "stop" ) +const ( + // TurnBudgetGatePolicyGateable 表示估算可作为预算硬停门禁依据。 + TurnBudgetGatePolicyGateable = "gateable" + // TurnBudgetGatePolicyAdvisory 表示估算仅用于提示或触发 compact,不能硬停。 + TurnBudgetGatePolicyAdvisory = "advisory" +) + const ( // BudgetDecisionReasonWithinBudget 表示估算在预算范围内。 BudgetDecisionReasonWithinBudget = "within_budget" // BudgetDecisionReasonExceedsBudgetFirstTime 表示首次超预算,需要先 compact。 BudgetDecisionReasonExceedsBudgetFirstTime = "exceeds_budget_first_time" - // BudgetDecisionReasonExceedsBudgetAfterCompact 表示高置信估算在 compact 后仍超预算,需要停止。 - BudgetDecisionReasonExceedsBudgetAfterCompact = "exceeds_budget_after_compact" - // BudgetDecisionReasonExceedsBudgetInaccurateFirstTime 表示低置信估算首次超预算,先 compact 再验证。 - BudgetDecisionReasonExceedsBudgetInaccurateFirstTime = "exceeds_budget_inaccurate_first_time" - // BudgetDecisionReasonExceedsBudgetInaccurateAfterCompactAllow 表示低置信估算 compact 后仍超预算但允许放行。 - BudgetDecisionReasonExceedsBudgetInaccurateAfterCompactAllow = "exceeds_budget_inaccurate_after_compact_allow" + // BudgetDecisionReasonExceedsBudgetAfterCompactStop 表示 compact 后仍超预算且可门禁,必须停止。 + BudgetDecisionReasonExceedsBudgetAfterCompactStop = "exceeds_budget_after_compact_stop" + // BudgetDecisionReasonExceedsBudgetAfterCompactAllowAdvisory 表示 compact 后仍超预算但仅 advisory,允许放行。 + BudgetDecisionReasonExceedsBudgetAfterCompactAllowAdvisory = "exceeds_budget_after_compact_allow_advisory" ) // TurnBudgetID 标识一次冻结预算尝试,避免 estimate、decision 与 usage observation 串用。 @@ -33,7 +38,7 @@ type TurnBudgetEstimate struct { ID TurnBudgetID `json:"id"` EstimatedInputTokens int `json:"estimated_input_tokens"` EstimateSource string `json:"estimate_source,omitempty"` - Accurate bool `json:"accurate"` + GatePolicy string `json:"gate_policy,omitempty"` } // TurnBudgetDecision 描述冻结请求在当前预算事实下的决策结果。 @@ -44,7 +49,7 @@ type TurnBudgetDecision struct { EstimatedInputTokens int `json:"estimated_input_tokens"` PromptBudget int `json:"prompt_budget"` EstimateSource string `json:"estimate_source,omitempty"` - EstimateAccurate bool `json:"estimate_accurate"` + EstimateGatePolicy string `json:"estimate_gate_policy,omitempty"` } // DecideTurnBudget 根据输入预算事实输出 allow、compact 或 stop 三种动作。 @@ -58,7 +63,7 @@ func DecideTurnBudget( EstimatedInputTokens: estimate.EstimatedInputTokens, PromptBudget: promptBudget, EstimateSource: estimate.EstimateSource, - EstimateAccurate: estimate.Accurate, + EstimateGatePolicy: estimate.GatePolicy, } if estimate.EstimatedInputTokens <= promptBudget { decision.Action = TurnBudgetActionAllow @@ -67,19 +72,15 @@ func DecideTurnBudget( } if compactCount == 0 { decision.Action = TurnBudgetActionCompact - if estimate.Accurate { - decision.Reason = BudgetDecisionReasonExceedsBudgetFirstTime - } else { - decision.Reason = BudgetDecisionReasonExceedsBudgetInaccurateFirstTime - } + decision.Reason = BudgetDecisionReasonExceedsBudgetFirstTime return decision } - if estimate.Accurate { + if estimate.GatePolicy == TurnBudgetGatePolicyGateable { decision.Action = TurnBudgetActionStop - decision.Reason = BudgetDecisionReasonExceedsBudgetAfterCompact + decision.Reason = BudgetDecisionReasonExceedsBudgetAfterCompactStop return decision } decision.Action = TurnBudgetActionAllow - decision.Reason = BudgetDecisionReasonExceedsBudgetInaccurateAfterCompactAllow + decision.Reason = BudgetDecisionReasonExceedsBudgetAfterCompactAllowAdvisory return decision } diff --git a/internal/runtime/controlplane/budget_test.go b/internal/runtime/controlplane/budget_test.go index 64b5380f..680f8856 100644 --- a/internal/runtime/controlplane/budget_test.go +++ b/internal/runtime/controlplane/budget_test.go @@ -12,7 +12,7 @@ func TestDecideTurnBudgetAccurateBranches(t *testing.T) { }, EstimatedInputTokens: 120, EstimateSource: "provider", - Accurate: true, + GatePolicy: TurnBudgetGatePolicyGateable, } within := DecideTurnBudget(baseEstimate, 120, 0) @@ -22,8 +22,8 @@ func TestDecideTurnBudgetAccurateBranches(t *testing.T) { if within.Reason != BudgetDecisionReasonWithinBudget { t.Fatalf("within.Reason = %q", within.Reason) } - if !within.EstimateAccurate { - t.Fatalf("within.EstimateAccurate = false, want true") + if within.EstimateGatePolicy != TurnBudgetGatePolicyGateable { + t.Fatalf("within.EstimateGatePolicy = %q, want %q", within.EstimateGatePolicy, TurnBudgetGatePolicyGateable) } firstExceed := DecideTurnBudget(baseEstimate, 100, 0) @@ -33,23 +33,23 @@ func TestDecideTurnBudgetAccurateBranches(t *testing.T) { if firstExceed.Reason != BudgetDecisionReasonExceedsBudgetFirstTime { t.Fatalf("firstExceed.Reason = %q", firstExceed.Reason) } - if !firstExceed.EstimateAccurate { - t.Fatalf("firstExceed.EstimateAccurate = false, want true") + if firstExceed.EstimateGatePolicy != TurnBudgetGatePolicyGateable { + t.Fatalf("firstExceed.EstimateGatePolicy = %q, want %q", firstExceed.EstimateGatePolicy, TurnBudgetGatePolicyGateable) } afterCompact := DecideTurnBudget(baseEstimate, 100, 1) if afterCompact.Action != TurnBudgetActionStop { t.Fatalf("afterCompact.Action = %q", afterCompact.Action) } - if afterCompact.Reason != BudgetDecisionReasonExceedsBudgetAfterCompact { + if afterCompact.Reason != BudgetDecisionReasonExceedsBudgetAfterCompactStop { t.Fatalf("afterCompact.Reason = %q", afterCompact.Reason) } - if !afterCompact.EstimateAccurate { - t.Fatalf("afterCompact.EstimateAccurate = false, want true") + if afterCompact.EstimateGatePolicy != TurnBudgetGatePolicyGateable { + t.Fatalf("afterCompact.EstimateGatePolicy = %q, want %q", afterCompact.EstimateGatePolicy, TurnBudgetGatePolicyGateable) } } -func TestDecideTurnBudgetInaccurateBranches(t *testing.T) { +func TestDecideTurnBudgetAdvisoryBranches(t *testing.T) { t.Parallel() estimate := TurnBudgetEstimate{ @@ -59,28 +59,28 @@ func TestDecideTurnBudgetInaccurateBranches(t *testing.T) { }, EstimatedInputTokens: 200, EstimateSource: "local", - Accurate: false, + GatePolicy: TurnBudgetGatePolicyAdvisory, } firstExceed := DecideTurnBudget(estimate, 100, 0) if firstExceed.Action != TurnBudgetActionCompact { t.Fatalf("firstExceed.Action = %q", firstExceed.Action) } - if firstExceed.Reason != BudgetDecisionReasonExceedsBudgetInaccurateFirstTime { + if firstExceed.Reason != BudgetDecisionReasonExceedsBudgetFirstTime { t.Fatalf("firstExceed.Reason = %q", firstExceed.Reason) } - if firstExceed.EstimateAccurate { - t.Fatalf("firstExceed.EstimateAccurate = true, want false") + if firstExceed.EstimateGatePolicy != TurnBudgetGatePolicyAdvisory { + t.Fatalf("firstExceed.EstimateGatePolicy = %q, want %q", firstExceed.EstimateGatePolicy, TurnBudgetGatePolicyAdvisory) } afterCompact := DecideTurnBudget(estimate, 100, 1) if afterCompact.Action != TurnBudgetActionAllow { t.Fatalf("afterCompact.Action = %q", afterCompact.Action) } - if afterCompact.Reason != BudgetDecisionReasonExceedsBudgetInaccurateAfterCompactAllow { + if afterCompact.Reason != BudgetDecisionReasonExceedsBudgetAfterCompactAllowAdvisory { t.Fatalf("afterCompact.Reason = %q", afterCompact.Reason) } - if afterCompact.EstimateAccurate { - t.Fatalf("afterCompact.EstimateAccurate = true, want false") + if afterCompact.EstimateGatePolicy != TurnBudgetGatePolicyAdvisory { + t.Fatalf("afterCompact.EstimateGatePolicy = %q, want %q", afterCompact.EstimateGatePolicy, TurnBudgetGatePolicyAdvisory) } } diff --git a/internal/runtime/controlplane/envelope.go b/internal/runtime/controlplane/envelope.go index a1c65626..f0d08da4 100644 --- a/internal/runtime/controlplane/envelope.go +++ b/internal/runtime/controlplane/envelope.go @@ -1,4 +1,4 @@ package controlplane // PayloadVersion 为 runtime 事件 envelope 的当前协议版本号。 -const PayloadVersion = 3 +const PayloadVersion = 4 diff --git a/internal/runtime/events.go b/internal/runtime/events.go index 0ff7adf8..03416ba2 100644 --- a/internal/runtime/events.go +++ b/internal/runtime/events.go @@ -36,7 +36,7 @@ type BudgetCheckedPayload struct { EstimatedInputTokens int `json:"estimated_input_tokens"` PromptBudget int `json:"prompt_budget"` EstimateSource string `json:"estimate_source,omitempty"` - EstimateAccurate bool `json:"estimate_accurate"` + EstimateGatePolicy string `json:"estimate_gate_policy,omitempty"` } // ProgressEvaluatedPayload 汇总 progress 控制面的评估结果。 @@ -71,7 +71,7 @@ func newBudgetCheckedPayload(decision controlplane.TurnBudgetDecision) BudgetChe EstimatedInputTokens: decision.EstimatedInputTokens, PromptBudget: decision.PromptBudget, EstimateSource: decision.EstimateSource, - EstimateAccurate: decision.EstimateAccurate, + EstimateGatePolicy: decision.EstimateGatePolicy, } } diff --git a/internal/runtime/runtime_test.go b/internal/runtime/runtime_test.go index bbf48ac9..c06059b1 100644 --- a/internal/runtime/runtime_test.go +++ b/internal/runtime/runtime_test.go @@ -541,7 +541,7 @@ func (p *scriptedProvider) EstimateInputTokens( return providertypes.BudgetEstimate{ EstimatedInputTokens: provider.EstimateTextTokens(req.SystemPrompt + renderMessagesForEstimate(req.Messages)), EstimateSource: provider.EstimateSourceLocal, - Accurate: false, + GatePolicy: provider.EstimateGateGateable, }, nil } @@ -4476,7 +4476,7 @@ func TestResolvePromptBudgetFallsBackWhenResolverErrors(t *testing.T) { } } -func TestServiceRunAllowsAfterProactiveCompactWhenEstimateInaccurate(t *testing.T) { +func TestServiceRunStopsAfterProactiveCompactWhenEstimateGateable(t *testing.T) { t.Parallel() manager := newRuntimeConfigManager(t) @@ -4497,7 +4497,7 @@ func TestServiceRunAllowsAfterProactiveCompactWhenEstimateInaccurate(t *testing. return providertypes.BudgetEstimate{ EstimatedInputTokens: 99, EstimateSource: provider.EstimateSourceLocal, - Accurate: false, + GatePolicy: provider.EstimateGateGateable, }, nil }, responses: []scriptedResponse{ @@ -4532,7 +4532,122 @@ func TestServiceRunAllowsAfterProactiveCompactWhenEstimateInaccurate(t *testing. } if err := service.Run(context.Background(), UserInput{ - RunID: "run-budget-inaccurate-allow", + RunID: "run-budget-gateable-stop", + Parts: []providertypes.ContentPart{providertypes.NewTextPart("continue")}, + }); err != nil { + t.Fatalf("Run() error = %v", err) + } + + compactRunner := service.compactRunner.(*stubCompactRunner) + if len(compactRunner.calls) != 1 { + t.Fatalf("expected one proactive compact, got %d", len(compactRunner.calls)) + } + if compactRunner.calls[0].Mode != contextcompact.ModeProactive { + t.Fatalf("expected compact mode %q, got %q", contextcompact.ModeProactive, compactRunner.calls[0].Mode) + } + if scripted.callCount != 0 { + t.Fatalf("expected provider Generate to be skipped after budget stop, got %d calls", scripted.callCount) + } + + events := collectRuntimeEvents(service.Events()) + var budgetActions []string + var budgetReasons []string + var budgetGatePolicies []string + var stopPayload StopReasonDecidedPayload + for _, event := range events { + switch event.Type { + case EventBudgetChecked: + payload, ok := event.Payload.(BudgetCheckedPayload) + if !ok { + t.Fatalf("expected BudgetCheckedPayload, got %T", event.Payload) + } + budgetActions = append(budgetActions, payload.Action) + budgetReasons = append(budgetReasons, payload.Reason) + budgetGatePolicies = append(budgetGatePolicies, payload.EstimateGatePolicy) + case EventStopReasonDecided: + payload, ok := event.Payload.(StopReasonDecidedPayload) + if !ok { + t.Fatalf("expected StopReasonDecidedPayload, got %T", event.Payload) + } + stopPayload = payload + } + } + + if len(budgetActions) != 2 || budgetActions[0] != "compact" || budgetActions[1] != "stop" { + t.Fatalf("expected budget actions [compact stop], got %v", budgetActions) + } + if len(budgetReasons) != 2 || + budgetReasons[0] != controlplane.BudgetDecisionReasonExceedsBudgetFirstTime || + budgetReasons[1] != controlplane.BudgetDecisionReasonExceedsBudgetAfterCompactStop { + t.Fatalf("unexpected budget reasons %v", budgetReasons) + } + if len(budgetGatePolicies) != 2 || + budgetGatePolicies[0] != provider.EstimateGateGateable || + budgetGatePolicies[1] != provider.EstimateGateGateable { + t.Fatalf("expected gateable estimates, got %v", budgetGatePolicies) + } + if stopPayload.Reason != controlplane.StopReasonBudgetExceeded { + t.Fatalf("expected stop reason %q, got %q", controlplane.StopReasonBudgetExceeded, stopPayload.Reason) + } +} + +func TestServiceRunAllowsAfterProactiveCompactWhenEstimateAdvisory(t *testing.T) { + t.Parallel() + + manager := newRuntimeConfigManager(t) + if err := manager.Update(context.Background(), func(cfg *config.Config) error { + cfg.Context.Budget.PromptBudget = 10 + cfg.Context.Budget.FallbackPromptBudget = 10 + return nil + }); err != nil { + t.Fatalf("update config: %v", err) + } + + store := newMemoryStore() + registry := tools.NewRegistry() + scripted := &scriptedProvider{ + estimateFn: func(ctx context.Context, req providertypes.GenerateRequest) (providertypes.BudgetEstimate, error) { + _ = ctx + _ = req + return providertypes.BudgetEstimate{ + EstimatedInputTokens: 99, + EstimateSource: provider.EstimateSourceLocal, + GatePolicy: provider.EstimateGateAdvisory, + }, nil + }, + responses: []scriptedResponse{ + { + Message: providertypes.Message{ + Role: providertypes.RoleAssistant, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("继续执行")}, + }, + FinishReason: "stop", + }, + }, + } + + service := NewWithFactory(manager, registry, store, &scriptedProviderFactory{provider: scripted}, &stubContextBuilder{}) + service.compactRunner = &stubCompactRunner{ + result: contextcompact.Result{ + Applied: true, + Messages: []providertypes.Message{ + { + Role: providertypes.RoleAssistant, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("[compact_summary]\ndone:\n- archived\n\nin_progress:\n- continue")}, + }, + { + Role: providertypes.RoleUser, + Parts: []providertypes.ContentPart{providertypes.NewTextPart("continue")}, + }, + }, + Metrics: contextcompact.Metrics{ + TriggerMode: string(contextcompact.ModeProactive), + }, + }, + } + + if err := service.Run(context.Background(), UserInput{ + RunID: "run-budget-advisory-allow", Parts: []providertypes.ContentPart{providertypes.NewTextPart("continue")}, }); err != nil { t.Fatalf("Run() error = %v", err) @@ -4552,7 +4667,7 @@ func TestServiceRunAllowsAfterProactiveCompactWhenEstimateInaccurate(t *testing. events := collectRuntimeEvents(service.Events()) var budgetActions []string var budgetReasons []string - var budgetAccuracies []bool + var budgetGatePolicies []string var stopPayload StopReasonDecidedPayload for _, event := range events { switch event.Type { @@ -4563,7 +4678,7 @@ func TestServiceRunAllowsAfterProactiveCompactWhenEstimateInaccurate(t *testing. } budgetActions = append(budgetActions, payload.Action) budgetReasons = append(budgetReasons, payload.Reason) - budgetAccuracies = append(budgetAccuracies, payload.EstimateAccurate) + budgetGatePolicies = append(budgetGatePolicies, payload.EstimateGatePolicy) case EventStopReasonDecided: payload, ok := event.Payload.(StopReasonDecidedPayload) if !ok { @@ -4577,12 +4692,14 @@ func TestServiceRunAllowsAfterProactiveCompactWhenEstimateInaccurate(t *testing. t.Fatalf("expected budget actions [compact allow], got %v", budgetActions) } if len(budgetReasons) != 2 || - budgetReasons[0] != controlplane.BudgetDecisionReasonExceedsBudgetInaccurateFirstTime || - budgetReasons[1] != controlplane.BudgetDecisionReasonExceedsBudgetInaccurateAfterCompactAllow { + budgetReasons[0] != controlplane.BudgetDecisionReasonExceedsBudgetFirstTime || + budgetReasons[1] != controlplane.BudgetDecisionReasonExceedsBudgetAfterCompactAllowAdvisory { t.Fatalf("unexpected budget reasons %v", budgetReasons) } - if len(budgetAccuracies) != 2 || budgetAccuracies[0] || budgetAccuracies[1] { - t.Fatalf("expected inaccurate estimates, got %v", budgetAccuracies) + if len(budgetGatePolicies) != 2 || + budgetGatePolicies[0] != provider.EstimateGateAdvisory || + budgetGatePolicies[1] != provider.EstimateGateAdvisory { + t.Fatalf("expected advisory estimates, got %v", budgetGatePolicies) } if stopPayload.Reason != controlplane.StopReasonCompleted { t.Fatalf("expected stop reason %q, got %q", controlplane.StopReasonCompleted, stopPayload.Reason) @@ -4602,7 +4719,7 @@ func TestServiceRunReconcilesUnknownOutputUsage(t *testing.T) { return providertypes.BudgetEstimate{ EstimatedInputTokens: 17, EstimateSource: provider.EstimateSourceLocal, - Accurate: false, + GatePolicy: provider.EstimateGateGateable, }, nil }, responses: []scriptedResponse{ diff --git a/internal/tui/services/gateway_stream_client.go b/internal/tui/services/gateway_stream_client.go index afdf7c57..34219c1d 100644 --- a/internal/tui/services/gateway_stream_client.go +++ b/internal/tui/services/gateway_stream_client.go @@ -14,7 +14,7 @@ import ( "neo-code/internal/tools" ) -const runtimeEventPayloadVersion = 3 +const runtimeEventPayloadVersion = 4 // GatewayStreamClient 负责消费 gateway.event 并恢复为 TUI 事件。 type GatewayStreamClient struct {