@@ -160,8 +160,6 @@ export const litellmProvider: ProviderConfig = {
160160 type : 'json_schema' as const ,
161161 json_schema : {
162162 name : request . responseFormat . name || 'response_schema' ,
163- // Strict mode requires additionalProperties:false and all-required keys;
164- // OpenAI-backed routes 400 without it.
165163 schema : isStrictResponseFormat
166164 ? enforceStrictSchema ( request . responseFormat . schema || request . responseFormat )
167165 : request . responseFormat . schema || request . responseFormat ,
@@ -195,8 +193,6 @@ export const litellmProvider: ProviderConfig = {
195193 }
196194 }
197195
198- // response_format + tools conflict on some backends (Anthropic rejects the pair,
199- // vLLM guided decoding suppresses tool calls), so defer the format past the tool loop.
200196 const deferResponseFormat = ! ! responseFormatPayload && hasActiveTools
201197 if ( responseFormatPayload && ! deferResponseFormat ) {
202198 payload . response_format = responseFormatPayload
@@ -499,8 +495,6 @@ export const litellmProvider: ProviderConfig = {
499495 respondedToolCallIds . add ( toolCall . id )
500496 }
501497
502- // Every tool_call needs a matching `tool` response or the next request 400s;
503- // stub any the model left unanswered (e.g. an unknown/filtered tool name).
504498 for ( const tc of toolCallsInResponse ) {
505499 if ( respondedToolCallIds . has ( tc . id ) ) continue
506500 currentMessages . push ( {
@@ -593,15 +587,11 @@ export const litellmProvider: ProviderConfig = {
593587 const streamingParams : ChatCompletionCreateParamsStreaming = {
594588 ...payload ,
595589 messages : currentMessages ,
596- // Tools are resolved; force a final answer so the model can't emit another
597- // tool_calls round the stream reader would drop. Keep tools defined for
598- // backends (e.g. Anthropic) that reject a tool-result history without them.
599590 tool_choice : 'none' ,
600591 stream : true ,
601592 stream_options : { include_usage : true } ,
602593 }
603594 if ( deferResponseFormat && responseFormatPayload ) {
604- // Disable parallel calls — OpenAI's rule for strict outputs alongside tools.
605595 streamingParams . response_format = responseFormatPayload
606596 streamingParams . parallel_tool_calls = false
607597 }
@@ -687,11 +677,6 @@ export const litellmProvider: ProviderConfig = {
687677 logger . info ( 'Applying deferred JSON schema response format after tool processing' )
688678
689679 const finalFormatStartTime = Date . now ( )
690- // Spread payload so all request fields carry over (model, temperature,
691- // max_completion_tokens, reasoning_effort, tools) — matching the streaming path.
692- // 'none' forces the structured answer instead of another tool_calls round that
693- // would leave content stale; tools stay defined for backends like Anthropic that
694- // reject a tool-result history without them; parallel calls off per OpenAI's rule.
695680 const finalPayload : any = {
696681 ...payload ,
697682 messages : currentMessages ,
0 commit comments