Skip to content

Commit bfd6fac

Browse files
waleedlatif1claude
andcommitted
fix(litellm): spread payload into deferred final call so reasoning_effort carries over
The non-streaming deferred finalPayload hand-picked fields and dropped reasoning_effort (and any future payload field), diverging from the streaming path which spreads ...payload. Spread payload here too for consistency. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 7f6c49a commit bfd6fac

2 files changed

Lines changed: 8 additions & 8 deletions

File tree

apps/sim/providers/litellm/index.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ describe('litellmProvider.executeRequest', () => {
165165

166166
const result = await run({
167167
tools: [tool('known')],
168+
reasoningEffort: 'high',
168169
responseFormat: { name: 'r', schema: { type: 'object', properties: {} } },
169170
})
170171

@@ -176,6 +177,7 @@ describe('litellmProvider.executeRequest', () => {
176177
expect(final.tools).toBeDefined()
177178
expect(final.tool_choice).toBe('none')
178179
expect(final.parallel_tool_calls).toBe(false)
180+
expect(final.reasoning_effort).toBe('high')
179181
expect(result.content).toBe('{"answer":1}')
180182
})
181183

apps/sim/providers/litellm/index.ts

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -687,20 +687,18 @@ export const litellmProvider: ProviderConfig = {
687687
logger.info('Applying deferred JSON schema response format after tool processing')
688688

689689
const finalFormatStartTime = Date.now()
690+
// Spread payload so all request fields carry over (model, temperature,
691+
// max_completion_tokens, reasoning_effort, tools) — matching the streaming path.
692+
// 'none' forces the structured answer instead of another tool_calls round that
693+
// would leave content stale; tools stay defined for backends like Anthropic that
694+
// reject a tool-result history without them; parallel calls off per OpenAI's rule.
690695
const finalPayload: any = {
691-
model: payload.model,
696+
...payload,
692697
messages: currentMessages,
693698
response_format: responseFormatPayload,
694-
// Force the structured answer: 'none' stops the model from returning another
695-
// tool_calls round (which would leave content stale). Keep tools defined for
696-
// backends (e.g. Anthropic) that reject a tool-result history without them, and
697-
// disable parallel calls per OpenAI's strict-outputs-with-tools rule.
698-
tools: payload.tools,
699699
tool_choice: 'none',
700700
parallel_tool_calls: false,
701701
}
702-
if (request.temperature !== undefined) finalPayload.temperature = request.temperature
703-
if (request.maxTokens != null) finalPayload.max_completion_tokens = request.maxTokens
704702

705703
currentResponse = await litellm.chat.completions.create(
706704
finalPayload,

0 commit comments

Comments
 (0)