diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3511f7fe0..73697c579 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,10 +2,73 @@ name: build on: [push, pull_request] jobs: + govulncheck: + runs-on: ubuntu-latest + env: + GH_PAT: "${{ secrets.PERSONAL_ACCESS_TOKEN }}" + GOTOOLCHAIN: local + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + # The reusable charmbracelet/meta govulncheck job always reads go.mod. + # Keep the module baseline at Go 1.25.0, but run the scanner with Go 1.26.4 + # so it uses a fixed standard library. + - uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version: "1.26.4" + cache: true + check-latest: true + - run: | + git config --global url."https://${{ secrets.PERSONAL_ACCESS_TOKEN }}@github.com/charmbracelet".insteadOf "https://github.com/charmbracelet" + git config --global url."https://${{ secrets.PERSONAL_ACCESS_TOKEN }}@github.com/charmcli".insteadOf "https://github.com/charmcli" + if: env.GH_PAT != '' + - run: go install golang.org/x/vuln/cmd/govulncheck@latest + - run: go mod tidy + - run: govulncheck ./... + build: - uses: charmbracelet/meta/.github/workflows/build.yml@main - with: - go-version: "" - go-version-file: ./go.mod - secrets: - gh_pat: "${{ secrets.PERSONAL_ACCESS_TOKEN }}" + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + env: + GO111MODULE: "on" + GH_PAT: "${{ secrets.PERSONAL_ACCESS_TOKEN }}" + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - run: | + git config --global url."https://${{ secrets.PERSONAL_ACCESS_TOKEN }}@github.com/charmbracelet".insteadOf "https://github.com/charmbracelet" + git config --global url."https://${{ secrets.PERSONAL_ACCESS_TOKEN }}@github.com/charmcli".insteadOf "https://github.com/charmcli" + if: env.GH_PAT != '' + - name: Install Go + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version-file: go.mod + cache: true + - name: Tidy Go modules + run: go mod tidy + - name: Check for changes + run: git diff --exit-code + - name: Build + run: go build ./... + - name: Test + run: go test ./... + + dependabot: + needs: [build, govulncheck] + runs-on: ubuntu-latest + permissions: + pull-requests: write + contents: write + if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'pull_request' }} + steps: + - id: metadata + uses: dependabot/fetch-metadata@25dd0e34f4fe68f24cc83900b1fe3fe149efef98 # v3.1.0 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + - run: | + gh pr review --approve "$PR_URL" + gh pr merge --squash --auto "$PR_URL" + env: + PR_URL: ${{ github.event.pull_request.html_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/go.mod b/go.mod index e8abe95e9..2c6f4f359 100644 --- a/go.mod +++ b/go.mod @@ -66,11 +66,11 @@ require ( go.opentelemetry.io/otel/metric v1.39.0 // indirect go.opentelemetry.io/otel/trace v1.39.0 // indirect go.yaml.in/yaml/v4 v4.0.0-rc.3 // indirect - golang.org/x/crypto v0.47.0 // indirect - golang.org/x/net v0.49.0 // indirect - golang.org/x/sync v0.19.0 // indirect - golang.org/x/sys v0.40.0 // indirect - golang.org/x/text v0.33.0 // indirect + golang.org/x/crypto v0.51.0 // indirect + golang.org/x/net v0.55.0 // indirect + golang.org/x/sync v0.20.0 // indirect + golang.org/x/sys v0.45.0 // indirect + golang.org/x/text v0.37.0 // indirect golang.org/x/time v0.14.0 // indirect google.golang.org/api v0.264.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect diff --git a/go.sum b/go.sum index 8920215f4..ed978e6e9 100644 --- a/go.sum +++ b/go.sum @@ -152,18 +152,18 @@ go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6 go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= go.yaml.in/yaml/v4 v4.0.0-rc.3 h1:3h1fjsh1CTAPjW7q/EMe+C8shx5d8ctzZTrLcs/j8Go= go.yaml.in/yaml/v4 v4.0.0-rc.3/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0= -golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= -golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= -golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= -golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI= +golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8= +golang.org/x/net v0.55.0 h1:bcvxaJn3e1U6InsFWt1JUq1aSjnRxLzT2rtD2KfkDF8= +golang.org/x/net v0.55.0/go.mod h1:L5U2KuzuOe1lY7Z+aWVIKK6qEeJXnXV9yzGA+WCHJww= golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= -golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= -golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= +golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= +golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= diff --git a/providers/anthropic/anthropic.go b/providers/anthropic/anthropic.go index 6c2bc38f2..66b38a414 100644 --- a/providers/anthropic/anthropic.go +++ b/providers/anthropic/anthropic.go @@ -11,6 +11,7 @@ import ( "io" "maps" "math" + "strconv" "strings" "charm.land/fantasy" @@ -38,6 +39,51 @@ func betaRequestOptions(flags []string) []option.RequestOption { return opts } +func thinkingDisplay(providerOptions *ProviderOptions, modelID string) (ThinkingDisplay, bool) { + if providerOptions != nil && providerOptions.ThinkingDisplay != nil && *providerOptions.ThinkingDisplay != "" { + return *providerOptions.ThinkingDisplay, true + } + if defaultsToOmittedThinkingDisplay(modelID) { + return ThinkingDisplaySummarized, true + } + return "", false +} + +func defaultsToAdaptiveThinking(model string) bool { + model = strings.ToLower(strings.TrimSpace(model)) + return strings.Contains(model, "claude-mythos-preview") +} + +func requiresAdaptiveThinking(model string) bool { + return defaultsToAdaptiveThinking(model) || defaultsToOmittedOpusThinkingDisplay(model) +} + +func setThinkingDisplay(param interface{ SetExtraFields(map[string]any) }, display ThinkingDisplay) { + param.SetExtraFields(map[string]any{"display": string(display)}) +} + +func defaultsToOmittedThinkingDisplay(model string) bool { + model = strings.ToLower(strings.TrimSpace(model)) + return defaultsToAdaptiveThinking(model) || defaultsToOmittedOpusThinkingDisplay(model) +} + +func defaultsToOmittedOpusThinkingDisplay(model string) bool { + _, suffix, ok := strings.Cut(model, "claude-opus-4-") + if !ok { + return false + } + + versionEnd := 0 + for versionEnd < len(suffix) && suffix[versionEnd] >= '0' && suffix[versionEnd] <= '9' { + versionEnd++ + } + if versionEnd == 0 || versionEnd > 2 { + return false + } + minor, err := strconv.Atoi(suffix[:versionEnd]) + return err == nil && minor >= 7 +} + // buildRequestOptions constructs the common request options shared // by Generate and Stream: user-agent, raw tool injection, and any // beta API flags. @@ -335,12 +381,26 @@ func (a languageModel) prepareParams(call fantasy.Call) ( Effort: anthropic.OutputConfigEffort(effort), } adaptive := anthropic.NewThinkingConfigAdaptiveParam() + if display, ok := thinkingDisplay(providerOptions, a.modelID); ok { + setThinkingDisplay(&adaptive, display) + } params.Thinking.OfAdaptive = &adaptive case providerOptions.Thinking != nil: if providerOptions.Thinking.BudgetTokens == 0 { return nil, nil, nil, nil, &fantasy.Error{Title: "no budget", Message: "thinking requires budget"} } - params.Thinking = anthropic.ThinkingConfigParamOfEnabled(providerOptions.Thinking.BudgetTokens) + if requiresAdaptiveThinking(a.modelID) { + adaptive := anthropic.NewThinkingConfigAdaptiveParam() + if display, ok := thinkingDisplay(providerOptions, a.modelID); ok { + setThinkingDisplay(&adaptive, display) + } + params.Thinking.OfAdaptive = &adaptive + } else { + params.Thinking = anthropic.ThinkingConfigParamOfEnabled(providerOptions.Thinking.BudgetTokens) + if display, ok := thinkingDisplay(providerOptions, a.modelID); ok { + setThinkingDisplay(params.Thinking.OfEnabled, display) + } + } if call.Temperature != nil { params.Temperature = param.Opt[float64]{} warnings = append(warnings, fantasy.CallWarning{ @@ -365,6 +425,12 @@ func (a languageModel) prepareParams(call fantasy.Call) ( Details: "TopK is not supported when thinking is enabled", }) } + case defaultsToAdaptiveThinking(a.modelID): + adaptive := anthropic.NewThinkingConfigAdaptiveParam() + if display, ok := thinkingDisplay(providerOptions, a.modelID); ok { + setThinkingDisplay(&adaptive, display) + } + params.Thinking.OfAdaptive = &adaptive } if len(call.Tools) > 0 { diff --git a/providers/anthropic/anthropic_test.go b/providers/anthropic/anthropic_test.go index d2a8a83d7..6ec1b6e40 100644 --- a/providers/anthropic/anthropic_test.go +++ b/providers/anthropic/anthropic_test.go @@ -561,6 +561,7 @@ func TestParseOptions_Effort(t *testing.T) { "send_reasoning": true, "thinking": map[string]any{"budget_tokens": int64(2048)}, "effort": "medium", + "thinking_display": "summarized", "disable_parallel_tool_use": true, }) require.NoError(t, err) @@ -570,6 +571,8 @@ func TestParseOptions_Effort(t *testing.T) { require.Equal(t, int64(2048), options.Thinking.BudgetTokens) require.NotNil(t, options.Effort) require.Equal(t, EffortMedium, *options.Effort) + require.NotNil(t, options.ThinkingDisplay) + require.Equal(t, ThinkingDisplaySummarized, *options.ThinkingDisplay) require.NotNil(t, options.DisableParallelToolUse) require.True(t, *options.DisableParallelToolUse) } @@ -604,6 +607,194 @@ func TestGenerate_SendsOutputConfigEffort(t *testing.T) { requireAnthropicEffort(t, call.body, EffortMedium) } +func TestGenerate_SendsThinkingDisplay(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + model string + options func() *ProviderOptions + wantType string + wantDisplay string + wantBudget int64 + }{ + { + name: "explicit display with adaptive thinking", + model: "claude-sonnet-4-20250514", + options: func() *ProviderOptions { + effort := EffortMedium + display := ThinkingDisplayOmitted + return &ProviderOptions{Effort: &effort, ThinkingDisplay: &display} + }, + wantType: "adaptive", + wantDisplay: "omitted", + }, + { + name: "explicit display with budget thinking", + model: "claude-sonnet-4-20250514", + options: func() *ProviderOptions { + display := ThinkingDisplaySummarized + return &ProviderOptions{ + Thinking: &ThinkingProviderOption{BudgetTokens: 2048}, + ThinkingDisplay: &display, + } + }, + wantType: "enabled", + wantDisplay: "summarized", + wantBudget: 2048, + }, + { + name: "opus models default to summarized display", + model: "claude-opus-4-7-20260101", + options: func() *ProviderOptions { + effort := EffortHigh + return &ProviderOptions{Effort: &effort} + }, + wantType: "adaptive", + wantDisplay: "summarized", + }, + { + name: "bedrock opus models default to summarized display", + model: "us.anthropic.claude-opus-4-8-20260101-v1:0", + options: func() *ProviderOptions { + effort := EffortHigh + return &ProviderOptions{Effort: &effort} + }, + wantType: "adaptive", + wantDisplay: "summarized", + }, + { + name: "explicit display overrides opus default", + model: "claude-opus-4-8-20260101", + options: func() *ProviderOptions { + effort := EffortHigh + display := ThinkingDisplayOmitted + return &ProviderOptions{Effort: &effort, ThinkingDisplay: &display} + }, + wantType: "adaptive", + wantDisplay: "omitted", + }, + { + name: "mythos models default to adaptive thinking", + model: "claude-mythos-preview", + options: func() *ProviderOptions { + return &ProviderOptions{} + }, + wantType: "adaptive", + wantDisplay: "summarized", + }, + { + name: "opus models use adaptive thinking when budget thinking configured", + model: "claude-opus-4-7", + options: func() *ProviderOptions { + return &ProviderOptions{Thinking: &ThinkingProviderOption{BudgetTokens: 2048}} + }, + wantType: "adaptive", + wantDisplay: "summarized", + }, + { + name: "older opus models keep provider default", + model: "claude-opus-4-6-20260101", + options: func() *ProviderOptions { + effort := EffortHigh + return &ProviderOptions{Effort: &effort} + }, + wantType: "adaptive", + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + server, calls := newAnthropicJSONServer(mockAnthropicGenerateResponse()) + defer server.Close() + + provider, err := New( + WithAPIKey("test-api-key"), + WithBaseURL(server.URL), + ) + require.NoError(t, err) + + model, err := provider.LanguageModel(context.Background(), tt.model) + require.NoError(t, err) + + _, err = model.Generate(context.Background(), fantasy.Call{ + Prompt: testPrompt(), + ProviderOptions: NewProviderOptions(tt.options()), + }) + require.NoError(t, err) + + call := awaitAnthropicCall(t, calls) + thinking, ok := call.body["thinking"].(map[string]any) + require.True(t, ok) + require.Equal(t, tt.wantType, thinking["type"]) + if tt.wantDisplay == "" { + require.NotContains(t, thinking, "display") + } else { + require.Equal(t, tt.wantDisplay, thinking["display"]) + } + if tt.wantBudget != 0 { + require.InDelta(t, tt.wantBudget, thinking["budget_tokens"], 0) + } + }) + } +} + +func TestGenerate_DoesNotEnableThinkingForPlainOpus(t *testing.T) { + t.Parallel() + + server, calls := newAnthropicJSONServer(mockAnthropicGenerateResponse()) + defer server.Close() + + provider, err := New( + WithAPIKey("test-api-key"), + WithBaseURL(server.URL), + ) + require.NoError(t, err) + + model, err := provider.LanguageModel(context.Background(), "claude-opus-4-7") + require.NoError(t, err) + + _, err = model.Generate(context.Background(), fantasy.Call{Prompt: testPrompt()}) + require.NoError(t, err) + + call := awaitAnthropicCall(t, calls) + require.NotContains(t, call.body, "thinking") +} + +func TestDefaultsToOmittedThinkingDisplay(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + model string + want bool + }{ + {name: "opus 4.7 alias", model: "claude-opus-4-7", want: true}, + {name: "opus 4.7", model: "claude-opus-4-7-20260101", want: true}, + {name: "opus 4.10", model: "claude-opus-4-10-20260101", want: true}, + {name: "bedrock opus 4.8 alias", model: "us.anthropic.claude-opus-4-8-v1", want: true}, + {name: "bedrock opus 4.8", model: "us.anthropic.claude-opus-4-8-20260101-v1:0", want: true}, + {name: "mythos preview", model: "claude-mythos-preview", want: true}, + {name: "bedrock mythos preview", model: "anthropic.claude-mythos-preview", want: true}, + {name: "opus 4.6", model: "claude-opus-4-6-20260101", want: false}, + {name: "opus 4 date only", model: "claude-opus-4-20250514", want: false}, + {name: "bedrock opus 4 date only", model: "us.anthropic.claude-opus-4-20250514-v1:0", want: false}, + {name: "sonnet", model: "claude-sonnet-4-20250514", want: false}, + {name: "no minor", model: "claude-opus-4", want: false}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + require.Equal(t, tt.want, defaultsToOmittedThinkingDisplay(tt.model)) + }) + } +} + func TestStream_SendsOutputConfigEffort(t *testing.T) { t.Parallel() diff --git a/providers/anthropic/provider_options.go b/providers/anthropic/provider_options.go index 14ae8111b..ed93005b0 100644 --- a/providers/anthropic/provider_options.go +++ b/providers/anthropic/provider_options.go @@ -27,6 +27,16 @@ const ( EffortMax Effort = "max" ) +// ThinkingDisplay controls whether Anthropic returns visible thinking content. +type ThinkingDisplay string + +const ( + // ThinkingDisplaySummarized requests visible summarized thinking content. + ThinkingDisplaySummarized ThinkingDisplay = "summarized" + // ThinkingDisplayOmitted requests hidden thinking content. + ThinkingDisplayOmitted ThinkingDisplay = "omitted" +) + // Global type identifiers for Anthropic-specific provider data. const ( TypeProviderOptions = Name + ".options" @@ -72,6 +82,7 @@ type ProviderOptions struct { SendReasoning *bool `json:"send_reasoning"` Thinking *ThinkingProviderOption `json:"thinking"` Effort *Effort `json:"effort"` + ThinkingDisplay *ThinkingDisplay `json:"thinking_display"` DisableParallelToolUse *bool `json:"disable_parallel_tool_use"` } diff --git a/providers/openai/responses_language_model.go b/providers/openai/responses_language_model.go index 077c27325..26b4cea47 100644 --- a/providers/openai/responses_language_model.go +++ b/providers/openai/responses_language_model.go @@ -166,7 +166,8 @@ func (o responsesLanguageModel) prepareParams(call fantasy.Call) (*responses.Res params.Store = param.NewOpt(false) } - if openaiOptions != nil && openaiOptions.PreviousResponseID != nil && *openaiOptions.PreviousResponseID != "" { + hasPreviousResponseID := openaiOptions != nil && openaiOptions.PreviousResponseID != nil && *openaiOptions.PreviousResponseID != "" + if hasPreviousResponseID { if err := validatePreviousResponseIDPrompt(call.Prompt); err != nil { return nil, warnings, err } @@ -177,7 +178,7 @@ func (o responsesLanguageModel) prepareParams(call fantasy.Call) (*responses.Res } storeEnabled := openaiOptions != nil && openaiOptions.Store != nil && *openaiOptions.Store - input, inputWarnings, err := toResponsesPrompt(call.Prompt, modelConfig.systemMessageMode, storeEnabled) + input, inputWarnings, err := toResponsesPrompt(call.Prompt, modelConfig.systemMessageMode, storeEnabled, hasPreviousResponseID) warnings = append(warnings, inputWarnings...) if err != nil { return nil, warnings, err @@ -400,9 +401,10 @@ func responsesUsage(resp responses.Response) fantasy.Usage { return usage } -func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bool) (responses.ResponseInputParam, []fantasy.CallWarning, error) { +func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bool, allowOrphanFunctionOutputs ...bool) (responses.ResponseInputParam, []fantasy.CallWarning, error) { var input responses.ResponseInputParam var warnings []fantasy.CallWarning + allowOrphanOutputs := len(allowOrphanFunctionOutputs) > 0 && allowOrphanFunctionOutputs[0] // First pass: collect raw JSON for computer_call output items. // This enables faithful round-tripping via param.Override. @@ -741,7 +743,7 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo } } - if err := validateResponsesInput(input); err != nil { + if err := validateResponsesInput(input, allowOrphanOutputs); err != nil { return nil, warnings, err } @@ -753,14 +755,15 @@ func isResponsesWebSearchToolCall(toolCallPart fantasy.ToolCallPart) bool { toolCallPart.ToolName == "web_search_preview" } -func validateResponsesInput(input responses.ResponseInputParam) error { - if err := validateResponsesFunctionCallOutputs(input); err != nil { +func validateResponsesInput(input responses.ResponseInputParam, allowOrphanFunctionOutputs ...bool) error { + allowOrphanOutputs := len(allowOrphanFunctionOutputs) > 0 && allowOrphanFunctionOutputs[0] + if err := validateResponsesFunctionCallOutputs(input, allowOrphanOutputs); err != nil { return err } return validateResponsesItemReferences(input) } -func validateResponsesFunctionCallOutputs(input responses.ResponseInputParam) error { +func validateResponsesFunctionCallOutputs(input responses.ResponseInputParam, allowOrphanOutputs bool) error { type callState struct { calls int outputs int @@ -818,6 +821,9 @@ func validateResponsesFunctionCallOutputs(input responses.ResponseInputParam) er for _, callID := range outputIDs { state := states[callID] if state.calls == 0 { + if allowOrphanOutputs { + continue + } return fmt.Errorf("openai responses prompt has function_call_output without function_call for call_id %q", callID) } if state.firstOutput < state.firstCall { diff --git a/providertests/openai_computer_use_test.go b/providertests/openai_computer_use_test.go index eda625920..ff939cd29 100644 --- a/providertests/openai_computer_use_test.go +++ b/providertests/openai_computer_use_test.go @@ -95,7 +95,7 @@ func TestOpenAIComputerUse(t *testing.T) { result, err := agent.Generate(t.Context(), fantasy.AgentCall{ Prompt: "Take a screenshot of the desktop", - MaxOutputTokens: new(int64(4000)), + MaxOutputTokens: fantasy.Opt[int64](4000), ProviderOptions: providerOpts, }) require.NoError(t, err) @@ -134,7 +134,7 @@ func TestOpenAIComputerUse(t *testing.T) { result, err := agent.Stream(t.Context(), fantasy.AgentStreamCall{ Prompt: "Take a screenshot of the desktop", - MaxOutputTokens: new(int64(4000)), + MaxOutputTokens: fantasy.Opt[int64](4000), ProviderOptions: providerOpts, }) require.NoError(t, err) @@ -238,7 +238,7 @@ func TestOpenAIComputerUse_AllActions(t *testing.T) { result, err := agent.Generate(t.Context(), fantasy.AgentCall{ Prompt: prompt, - MaxOutputTokens: new(int64(16000)), + MaxOutputTokens: fantasy.Opt[int64](16000), ProviderOptions: providerOpts, }) require.NoError(t, err)