diff --git a/src/fast_agent/llm/provider/openai/llm_openai.py b/src/fast_agent/llm/provider/openai/llm_openai.py index 22d6a1df4..d0b596716 100644 --- a/src/fast_agent/llm/provider/openai/llm_openai.py +++ b/src/fast_agent/llm/provider/openai/llm_openai.py @@ -1208,7 +1208,10 @@ def _prepare_api_request( } ) else: - base_args["max_tokens"] = request_params.maxTokens + if self.provider is Provider.AZURE: + base_args["max_completion_tokens"] = request_params.maxTokens + else: + base_args["max_tokens"] = request_params.maxTokens if tools: base_args["parallel_tool_calls"] = request_params.parallel_tool_calls diff --git a/tests/unit/fast_agent/llm/test_prepare_arguments.py b/tests/unit/fast_agent/llm/test_prepare_arguments.py index da9b3b4bd..5c4b4b00a 100644 --- a/tests/unit/fast_agent/llm/test_prepare_arguments.py +++ b/tests/unit/fast_agent/llm/test_prepare_arguments.py @@ -159,6 +159,18 @@ def test_openai_provider_arguments(self): assert "max_iterations" not in result # Should be excluded assert "parallel_tool_calls" not in result # Should be excluded + def test_openai_azure_uses_max_completion_tokens(self): + """Azure OpenAI should use max_completion_tokens instead of max_tokens.""" + llm = OpenAILLM(provider=Provider.AZURE, request_params=RequestParams(model="gpt-4.1")) + params = RequestParams(model="gpt-4.1", maxTokens=123) + + assert llm._reasoning is False + + result = llm._prepare_api_request(messages=[], tools=None, request_params=params) + + assert result["max_completion_tokens"] == 123 + assert "max_tokens" not in result + def test_anthropic_provider_arguments(self): """Test prepare_provider_arguments with Anthropic provider""" # Create an Anthropic LLM instance without initializing provider connections