evalstate · zerone0x · Apr 11, 2026 · Apr 13, 2026
diff --git a/src/fast_agent/llm/provider/openai/llm_openai.py b/src/fast_agent/llm/provider/openai/llm_openai.py
@@ -1208,7 +1208,10 @@ def _prepare_api_request(
                 }
             )
         else:
-            base_args["max_tokens"] = request_params.maxTokens
+            if self.provider is Provider.AZURE:
+                base_args["max_completion_tokens"] = request_params.maxTokens
+            else:
+                base_args["max_tokens"] = request_params.maxTokens
             if tools:
                 base_args["parallel_tool_calls"] = request_params.parallel_tool_calls
 

diff --git a/tests/unit/fast_agent/llm/test_prepare_arguments.py b/tests/unit/fast_agent/llm/test_prepare_arguments.py
@@ -159,6 +159,18 @@ def test_openai_provider_arguments(self):
         assert "max_iterations" not in result  # Should be excluded
         assert "parallel_tool_calls" not in result  # Should be excluded
 
+    def test_openai_azure_uses_max_completion_tokens(self):
+        """Azure OpenAI should use max_completion_tokens instead of max_tokens."""
+        llm = OpenAILLM(provider=Provider.AZURE, request_params=RequestParams(model="gpt-4.1"))
+        params = RequestParams(model="gpt-4.1", maxTokens=123)
+
+        assert llm._reasoning is False
+
+        result = llm._prepare_api_request(messages=[], tools=None, request_params=params)
+
+        assert result["max_completion_tokens"] == 123
+        assert "max_tokens" not in result
+
     def test_anthropic_provider_arguments(self):
         """Test prepare_provider_arguments with Anthropic provider"""
         # Create an Anthropic LLM instance without initializing provider connections