fix(litellm): spread payload into deferred final call so reasoning_effort carries over

waleedlatif1 · claude · waleedlatif1 · commit bfd6fac6d3c5 · 2026-05-29T13:31:20.000-07:00
The non-streaming deferred finalPayload hand-picked fields and dropped
reasoning_effort (and any future payload field), diverging from the streaming
path which spreads ...payload. Spread payload here too for consistency.

Co-Authored-By: Claude Opus 4.8 &lt;noreply@anthropic.com&gt;
diff --git a/apps/sim/providers/litellm/index.test.ts b/apps/sim/providers/litellm/index.test.ts
@@ -165,6 +165,7 @@ describe('litellmProvider.executeRequest', () => {
 
     const result = await run({
       tools: [tool('known')],
+      reasoningEffort: 'high',
       responseFormat: { name: 'r', schema: { type: 'object', properties: {} } },
     })
 
@@ -176,6 +177,7 @@ describe('litellmProvider.executeRequest', () => {
     expect(final.tools).toBeDefined()
     expect(final.tool_choice).toBe('none')
     expect(final.parallel_tool_calls).toBe(false)
+    expect(final.reasoning_effort).toBe('high')
     expect(result.content).toBe('{"answer":1}')
   })
 
diff --git a/apps/sim/providers/litellm/index.ts b/apps/sim/providers/litellm/index.ts
@@ -687,20 +687,18 @@ export const litellmProvider: ProviderConfig = {
         logger.info('Applying deferred JSON schema response format after tool processing')
 
         const finalFormatStartTime = Date.now()
+        // Spread payload so all request fields carry over (model, temperature,
+        // max_completion_tokens, reasoning_effort, tools) — matching the streaming path.
+        // 'none' forces the structured answer instead of another tool_calls round that
+        // would leave content stale; tools stay defined for backends like Anthropic that
+        // reject a tool-result history without them; parallel calls off per OpenAI's rule.
         const finalPayload: any = {
-          model: payload.model,
+          ...payload,
           messages: currentMessages,
           response_format: responseFormatPayload,
-          // Force the structured answer: 'none' stops the model from returning another
-          // tool_calls round (which would leave content stale). Keep tools defined for
-          // backends (e.g. Anthropic) that reject a tool-result history without them, and
-          // disable parallel calls per OpenAI's strict-outputs-with-tools rule.
-          tools: payload.tools,
           tool_choice: 'none',
           parallel_tool_calls: false,
         }
-        if (request.temperature !== undefined) finalPayload.temperature = request.temperature
-        if (request.maxTokens != null) finalPayload.max_completion_tokens = request.maxTokens
 
         currentResponse = await litellm.chat.completions.create(
           finalPayload,