diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py index a80e912534ab..3ff7016f748c 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py @@ -20,6 +20,7 @@ Optional, Sequence, Set, + Tuple, Type, Union, cast, @@ -305,6 +306,33 @@ def normalize_name(name: str) -> str: return re.sub(r"[^a-zA-Z0-9_-]", "_", name)[:64] +def _normalize_list_content(content: List[Any]) -> Tuple[str, str | None]: + """Flatten a list of content blocks into visible text and reasoning text. + + Some OpenAI-compatible reasoning endpoints (e.g. gpt-5, o1) return + ``message.content`` as a list of typed blocks instead of a string, such as + ``[{"type": "reasoning", "text": ...}, {"type": "text", "text": ...}]``. + Visible text comes from ``text``/``output_text`` blocks; reasoning comes from + ``reasoning``/``thinking`` blocks. Unknown block types are ignored. + """ + text_parts: List[str] = [] + reasoning_parts: List[str] = [] + for block in content: + if not isinstance(block, Mapping): + continue + typed_block = cast(Mapping[str, Any], block) + block_text = typed_block.get("text") + if not isinstance(block_text, str): + continue + block_type = typed_block.get("type") + if block_type in ("text", "output_text"): + text_parts.append(block_text) + elif block_type in ("reasoning", "thinking"): + reasoning_parts.append(block_text) + thought = "".join(reasoning_parts) if reasoning_parts else None + return "".join(text_parts), thought + + def count_tokens_openai( messages: Sequence[LLMMessage], model: str, @@ -775,7 +803,14 @@ async def create( else: # if not tool_calls, then it is a text response and we populate the content and thought fields. finish_reason = choice.finish_reason - content = choice.message.content or "" + message_content = choice.message.content + if isinstance(message_content, list): + # Some OpenAI-compatible reasoning endpoints (e.g. gpt-5, o1) return content + # as a list of typed blocks instead of a string. Flatten it so downstream + # callers receive a string rather than the raw list. + content, thought = _normalize_list_content(cast(List[Any], message_content)) + else: + content = message_content or "" # if there is a reasoning_content field, then we populate the thought field. This is for models such as R1 - direct from deepseek api. if choice.message.model_extra is not None: reasoning_content = choice.message.model_extra.get("reasoning_content") diff --git a/python/packages/autogen-ext/tests/models/test_openai_model_client.py b/python/packages/autogen-ext/tests/models/test_openai_model_client.py index ba79795d1ed7..405628c27032 100644 --- a/python/packages/autogen-ext/tests/models/test_openai_model_client.py +++ b/python/packages/autogen-ext/tests/models/test_openai_model_client.py @@ -1114,6 +1114,35 @@ async def _mock_create(*args: Any, **kwargs: Any) -> ChatCompletion: assert result.thought == "This is the reasoning content" +@pytest.mark.asyncio +async def test_create_normalizes_list_content(monkeypatch: pytest.MonkeyPatch) -> None: + """Some OpenAI-compatible reasoning endpoints return content as a list of blocks.""" + + async def _mock_create(*args: Any, **kwargs: Any) -> ChatCompletion: + message = ChatCompletionMessage(role="assistant", content=None) + # Reasoning endpoints (gpt-5, o1) may return a list of typed blocks. + message.content = [ # type: ignore[assignment] + {"type": "reasoning", "text": "let me think"}, + {"type": "text", "text": "the answer"}, + {"type": "image", "text": "ignored"}, + ] + return ChatCompletion( + id="test_id", + model="gpt-4o", + object="chat.completion", + created=0, + choices=[Choice(index=0, finish_reason="stop", message=message)], + usage=CompletionUsage(prompt_tokens=10, completion_tokens=10, total_tokens=20), + ) + + monkeypatch.setattr(AsyncCompletions, "create", _mock_create) + model_client = OpenAIChatCompletionClient(model="gpt-4o", api_key="") + result = await model_client.create([UserMessage(content="Test message", source="user")]) + + assert result.content == "the answer" + assert result.thought == "let me think" + + @pytest.mark.asyncio async def test_r1_reasoning_content_streaming(monkeypatch: pytest.MonkeyPatch) -> None: """Test that reasoning_content in model_extra is correctly extracted and streamed."""