diff --git a/examples/06_openai_codex_oauth.py b/examples/06_openai_codex_oauth.py index 03493d9..31cc09b 100644 --- a/examples/06_openai_codex_oauth.py +++ b/examples/06_openai_codex_oauth.py @@ -27,7 +27,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument( "--model", - default=os.getenv("REPUBLIC_CODEX_MODEL", "openai:gpt-5-codex"), + default=os.getenv("REPUBLIC_CODEX_MODEL", "openai:gpt-5.3-codex"), help="Model to use after login.", ) parser.add_argument( @@ -48,13 +48,15 @@ def prompt_for_redirect(authorize_url: str) -> str: def main() -> None: args = parse_args() + resolver = openai_codex_oauth_resolver() tokens = load_openai_codex_oauth_tokens() - if tokens is None or args.force_login: + if args.force_login or resolver("openai") is None: tokens = login_openai_codex_oauth( prompt_for_redirect=None, ) print("login: ok") else: + tokens = load_openai_codex_oauth_tokens() print("login: reused") print("account_id:", tokens.account_id or "-") @@ -63,7 +65,7 @@ def main() -> None: llm = LLM( model=args.model, - api_key_resolver=openai_codex_oauth_resolver(), + api_key_resolver=resolver, ) out = llm.chat(args.prompt) print("text:", out) diff --git a/examples/07_github_copilot_oauth.py b/examples/07_github_copilot_oauth.py index 59d0c38..8662f62 100644 --- a/examples/07_github_copilot_oauth.py +++ b/examples/07_github_copilot_oauth.py @@ -121,14 +121,14 @@ def _run_mock(model: str) -> None: import republic.core.execution as execution_module original_http_client = auth_module.httpx.Client - original_anyllm_create = execution_module.AnyLLM.create + original_create_anyllm_client = execution_module.create_anyllm_client def _create_mock_client(provider: str, **kwargs: Any) -> FakeGitHubModelsClient: del provider, kwargs return FakeGitHubModelsClient() auth_module.httpx.Client = FakeHTTPClient - execution_module.AnyLLM.create = _create_mock_client + execution_module.create_anyllm_client = _create_mock_client try: with tempfile.TemporaryDirectory(prefix="republic-copilot-smoke-") as temp_dir: @@ -151,7 +151,7 @@ def _create_mock_client(provider: str, **kwargs: Any) -> FakeGitHubModelsClient: print("mock chat:", text) finally: auth_module.httpx.Client = original_http_client - execution_module.AnyLLM.create = original_anyllm_create + execution_module.create_anyllm_client = original_create_anyllm_client def _run_live(model: str, prompt: str) -> None: diff --git a/pyproject.toml b/pyproject.toml index cdedef3..b71bc27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ keywords = [ ] requires-python = ">=3.11,<4.0" dependencies = [ - "any-llm-sdk>=1.13.0", + "any-llm-sdk>=1.14.0", "authlib>=1.6.5", "httpx>=0.28.1", "pydantic>=2.7.0", diff --git a/src/republic/clients/openai_codex.py b/src/republic/clients/openai_codex.py index 95755d1..fc3a057 100644 --- a/src/republic/clients/openai_codex.py +++ b/src/republic/clients/openai_codex.py @@ -135,7 +135,7 @@ def _build_response( payload: dict[str, Any] = { "id": getattr(completed_response, "id", None) or "resp_codex", "created_at": getattr(completed_response, "created_at", None) or 0, - "model": getattr(completed_response, "model", None) or "gpt-5-codex", + "model": getattr(completed_response, "model", None) or "gpt-5.3-codex", "object": getattr(completed_response, "object", None) or "response", "output": OpenAICodexProvider._build_response_output( completed_response=completed_response, diff --git a/src/republic/core/execution.py b/src/republic/core/execution.py index 6a363ba..793e224 100644 --- a/src/republic/core/execution.py +++ b/src/republic/core/execution.py @@ -128,13 +128,17 @@ def resolve_model_provider(model: str, provider: str | None) -> tuple[str, str]: ) return provider, model - if ":" not in model: - raise RepublicError(ErrorKind.INVALID_INPUT, "Model must be in 'provider:model' format.") + try: + provider_name, model_id = AnyLLM.split_model_provider(model) + except Exception as exc: + if ":" not in model: + raise RepublicError(ErrorKind.INVALID_INPUT, "Model must be in 'provider:model' format.") from exc + provider_name, model_id = model.split(":", 1) - provider_name, model_id = model.split(":", 1) - if not provider_name or not model_id: + provider_value = getattr(provider_name, "value", provider_name) + if not provider_value or not model_id: raise RepublicError(ErrorKind.INVALID_INPUT, "Model must be in 'provider:model' format.") - return provider_name, model_id + return str(provider_value), model_id def resolve_fallback(self, model: str) -> tuple[str, str]: if ":" in model: @@ -369,10 +373,9 @@ def _decide_kwargs_for_provider( self, provider: str, max_tokens: int | None, kwargs: dict[str, Any] ) -> dict[str, Any]: clean_kwargs = dict(kwargs) - max_tokens_arg = provider_policies.completion_max_tokens_arg(provider) - if max_tokens_arg in clean_kwargs: + if "max_tokens" in clean_kwargs or max_tokens is None: return clean_kwargs - return {**clean_kwargs, max_tokens_arg: max_tokens} + return {**clean_kwargs, "max_tokens": max_tokens} def _decide_responses_kwargs( self, @@ -461,14 +464,13 @@ def _selected_transport( ): raise RepublicError( ErrorKind.INVALID_INPUT, - f"{provider_name}:{model_id}: messages format is only valid for Anthropic models", + f"{provider_name}:{model_id}: messages format is not supported by this provider", ) return "messages" reason = provider_policies.responses_rejection_reason( provider_name=provider_name, model_id=model_id, - has_tools=bool(tools_payload), supports_responses=bool(getattr(client, "SUPPORTS_RESPONSES", False)), ) if reason is not None: diff --git a/src/republic/core/provider_policies.py b/src/republic/core/provider_policies.py index 334eee4..e67c83a 100644 --- a/src/republic/core/provider_policies.py +++ b/src/republic/core/provider_policies.py @@ -1,34 +1,48 @@ -"""Provider policy decisions shared across request paths.""" +"""Provider capability decisions shared across request paths.""" from __future__ import annotations from dataclasses import dataclass +from any_llm import AnyLLM +from any_llm.exceptions import UnsupportedProviderError +from any_llm.types.provider import ProviderMetadata + @dataclass(frozen=True) class ProviderPolicy: - enable_responses_without_capability: bool = False include_usage_in_completion_stream: bool = False - completion_max_tokens_arg: str = "max_tokens" - responses_tools_blocked_model_prefixes: tuple[str, ...] = () + metadata: ProviderMetadata | None = None _DEFAULT_POLICY = ProviderPolicy() _POLICIES: dict[str, ProviderPolicy] = { "github-copilot": ProviderPolicy( include_usage_in_completion_stream=True, - completion_max_tokens_arg="max_tokens", + metadata=ProviderMetadata( + name="github-copilot", + env_key="GITHUB_TOKEN", + env_api_base=None, + doc_url="https://docs.github.com/en/copilot", + streaming=True, + reasoning=False, + completion=True, + embedding=False, + responses=False, + image=True, + pdf=True, + class_name="GitHubCopilotProvider", + list_models=False, + messages=True, + batch_completion=False, + ), ), + # Stream usage is not represented in any-llm provider metadata. Keep this as + # a narrow default for providers whose SDK path accepts OpenAI stream_options. "openai": ProviderPolicy( include_usage_in_completion_stream=True, - completion_max_tokens_arg="max_completion_tokens", - ), - # any-llm supports OpenRouter responses in practice but still reports SUPPORTS_RESPONSES=False. - "openrouter": ProviderPolicy( - enable_responses_without_capability=True, - include_usage_in_completion_stream=True, - responses_tools_blocked_model_prefixes=("anthropic/",), ), + "openrouter": ProviderPolicy(include_usage_in_completion_stream=True), } @@ -40,37 +54,37 @@ def provider_policy(provider_name: str) -> ProviderPolicy: return _POLICIES.get(_normalize_provider_name(provider_name), _DEFAULT_POLICY) -def _responses_tools_blocked_for_model(provider_name: str, model_id: str) -> bool: - policy = provider_policy(provider_name) - lowered_model = model_id.strip().lower() - return any(lowered_model.startswith(prefix) for prefix in policy.responses_tools_blocked_model_prefixes) +def provider_metadata(provider_name: str) -> ProviderMetadata | None: + normalized_provider = _normalize_provider_name(provider_name) + local_metadata = provider_policy(normalized_provider).metadata + if local_metadata is not None: + return local_metadata + try: + return AnyLLM.get_provider_class(normalized_provider).get_provider_metadata() + except (AttributeError, ImportError, UnsupportedProviderError): + return None def responses_rejection_reason( *, provider_name: str, model_id: str, - has_tools: bool, supports_responses: bool, ) -> str | None: - if has_tools and _responses_tools_blocked_for_model(provider_name, model_id): - return "responses format is not supported for this model when tools are enabled" if supports_responses: return None - if provider_policy(provider_name).enable_responses_without_capability: + metadata = provider_metadata(provider_name) + if metadata is not None and metadata.responses: return None return "responses format is not supported by this provider" def supports_messages_format(*, provider_name: str, model_id: str) -> bool: - normalized_provider = _normalize_provider_name(provider_name) - normalized_model = model_id.strip().lower() - return normalized_provider == "anthropic" or normalized_model.startswith("anthropic/") + metadata = provider_metadata(provider_name) + if metadata is not None: + return metadata.messages + return model_id.strip().lower().startswith("anthropic/") def should_include_completion_stream_usage(provider_name: str) -> bool: return provider_policy(provider_name).include_usage_in_completion_stream - - -def completion_max_tokens_arg(provider_name: str) -> str: - return provider_policy(provider_name).completion_max_tokens_arg diff --git a/tests/fakes.py b/tests/fakes.py index 88fc635..dc325b4 100644 --- a/tests/fakes.py +++ b/tests/fakes.py @@ -219,7 +219,7 @@ def make_responses_response( return Response.model_validate({ "id": "resp_1", "created_at": 1, - "model": "gpt-5-codex", + "model": "gpt-5.3-codex", "object": "response", "output": output, "parallel_tool_calls": False, @@ -279,7 +279,7 @@ def make_responses_completed(usage: dict[str, Any] | None = None) -> Any: def make_responses_completed_with_empty_output( usage: dict[str, Any] | None = None, *, - model: str = "gpt-5-codex", + model: str = "gpt-5.3-codex", ) -> Any: """Simulate a Codex backend response.completed event with an SDK Response whose output is empty.""" full_usage: dict[str, Any] = { diff --git a/tests/test_openai_codex_transport.py b/tests/test_openai_codex_transport.py index 8d767d0..0d08f50 100644 --- a/tests/test_openai_codex_transport.py +++ b/tests/test_openai_codex_transport.py @@ -90,7 +90,7 @@ async def __anext__(self) -> Any: def _build_codex_llm( monkeypatch, *queued_responses: Any, - model: str = "openai:gpt-5-codex", + model: str = "openai:gpt-5.3-codex", ) -> tuple[LLM, list[dict[str, Any]], list[dict[str, Any]]]: init_calls: list[dict[str, Any]] = [] api_calls: list[dict[str, Any]] = [] diff --git a/tests/test_provider_policies.py b/tests/test_provider_policies.py index 7a7bf08..f330fb1 100644 --- a/tests/test_provider_policies.py +++ b/tests/test_provider_policies.py @@ -1,40 +1,25 @@ from republic.core import provider_policies -def test_responses_rejection_reason_none_when_openrouter_responses_available() -> None: - assert ( - provider_policies.responses_rejection_reason( - provider_name="openrouter", - model_id="openai/gpt-4o-mini", - has_tools=False, - supports_responses=False, - ) - is None +def test_responses_rejection_reason_follows_sdk_metadata() -> None: + reason = provider_policies.responses_rejection_reason( + provider_name="openrouter", + model_id="openai/gpt-4o-mini", + supports_responses=False, ) + assert reason == "responses format is not supported by this provider" def test_responses_rejection_reason_for_provider_without_responses() -> None: reason = provider_policies.responses_rejection_reason( provider_name="anthropic", model_id="claude-3-5-haiku-latest", - has_tools=False, supports_responses=False, ) assert reason is not None assert "not supported" in reason -def test_responses_rejection_reason_for_openrouter_anthropic_tools() -> None: - reason = provider_policies.responses_rejection_reason( - provider_name="openrouter", - model_id="anthropic/claude-3.5-haiku", - has_tools=True, - supports_responses=False, - ) - assert reason is not None - assert "tools" in reason - - def test_supports_messages_format() -> None: assert provider_policies.supports_messages_format( provider_name="anthropic", @@ -44,7 +29,7 @@ def test_supports_messages_format() -> None: provider_name="openrouter", model_id="anthropic/claude-3.5-haiku", ) - assert not provider_policies.supports_messages_format( + assert provider_policies.supports_messages_format( provider_name="openai", model_id="gpt-4o-mini", ) @@ -57,13 +42,5 @@ def test_completion_stream_usage_policy() -> None: assert not provider_policies.should_include_completion_stream_usage("anthropic") -def test_completion_max_tokens_arg_policy() -> None: - assert provider_policies.completion_max_tokens_arg("openai") == "max_completion_tokens" - assert provider_policies.completion_max_tokens_arg("openrouter") == "max_tokens" - assert provider_policies.completion_max_tokens_arg("github-copilot") == "max_tokens" - assert provider_policies.completion_max_tokens_arg("anthropic") == "max_tokens" - - def test_provider_policy_uses_exact_match_not_substring() -> None: assert not provider_policies.should_include_completion_stream_usage("my-openrouter-proxy") - assert provider_policies.completion_max_tokens_arg("my-openrouter-proxy") == "max_tokens" diff --git a/tests/test_responses_handling.py b/tests/test_responses_handling.py index 2282fcd..61220b0 100644 --- a/tests/test_responses_handling.py +++ b/tests/test_responses_handling.py @@ -106,16 +106,15 @@ def test_responses_api_format_uses_responses(fake_anyllm) -> None: assert client.calls[-1]["input_data"][0]["role"] == "user" -def test_openrouter_responses_works_even_if_provider_flag_is_false(fake_anyllm) -> None: +def test_openrouter_responses_follows_sdk_metadata(fake_anyllm) -> None: client = fake_anyllm.ensure("openrouter") client.SUPPORTS_RESPONSES = False - client.queue_responses(make_responses_response(text="hello")) llm = LLM(model="openrouter:openrouter/free", api_key="dummy", api_format="responses") - result = llm.chat("hi") - assert result == "hello" - assert client.calls[-1].get("responses") is True + with pytest.raises(RepublicError) as exc_info: + llm.chat("hi") + assert exc_info.value.kind == "invalid_input" def test_openrouter_anthropic_tools_rejects_responses_format(fake_anyllm) -> None: @@ -144,11 +143,12 @@ def test_messages_format_maps_to_completion(fake_anyllm) -> None: assert client.calls[-1].get("responses") is None -def test_messages_format_rejects_non_anthropic_model(fake_anyllm) -> None: +def test_messages_format_uses_sdk_metadata(fake_anyllm) -> None: + client = fake_anyllm.ensure("openai") + client.queue_completion(make_response(text="hello")) + llm = LLM(model="openai:gpt-4o-mini", api_key="dummy", api_format="messages") - with pytest.raises(RepublicError) as exc_info: - llm.chat("hi") - assert exc_info.value.kind == "invalid_input" + assert llm.chat("hi") == "hello" def test_responses_tool_choice_accepts_completion_function_shape(fake_anyllm) -> None: @@ -660,7 +660,7 @@ def test_stream_completion_defaults_include_usage(fake_anyllm) -> None: assert client.calls[-1].get("stream_options") == {"include_usage": True} -def test_openai_completion_uses_max_completion_tokens(fake_anyllm) -> None: +def test_openai_completion_passes_max_tokens_to_anyllm(fake_anyllm) -> None: client = fake_anyllm.ensure("openai") client.queue_completion(make_response(text="hello")) @@ -668,8 +668,8 @@ def test_openai_completion_uses_max_completion_tokens(fake_anyllm) -> None: assert llm.chat("Say hello", max_tokens=11) == "hello" call = client.calls[-1] - assert call.get("max_completion_tokens") == 11 - assert "max_tokens" not in call + assert call.get("max_tokens") == 11 + assert "max_completion_tokens" not in call def test_non_openai_completion_uses_max_tokens(fake_anyllm) -> None: diff --git a/uv.lock b/uv.lock index 5ee211b..3bf6843 100644 --- a/uv.lock +++ b/uv.lock @@ -32,7 +32,7 @@ wheels = [ [[package]] name = "any-llm-sdk" -version = "1.13.0" +version = "1.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anthropic" }, @@ -43,9 +43,9 @@ dependencies = [ { name = "rich" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/87/4b/dea4eeeb5e4e3e55fa6b37a7ec033a18a2527aabf565bd598071d6308fbd/any_llm_sdk-1.13.0.tar.gz", hash = "sha256:967c5f4dd099f5f6cc9673f2888d5550f6e821d25341d31133a05741c1ce903e", size = 153753, upload-time = "2026-03-23T10:27:29.843Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/da/94050acd3b81f5390ec16c3ad9967e9372e169bd85b294b3552e3957f58f/any_llm_sdk-1.14.0.tar.gz", hash = "sha256:223d8c83871a0f8667d53700f0eadaf27563651a00af62d6d94ccf70724232f0", size = 166266, upload-time = "2026-05-01T13:39:04.096Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/64/a1/0106667efdf870595981a0fc3e807675d1f8ded8dc8c20e936e6c42bcd95/any_llm_sdk-1.13.0-py3-none-any.whl", hash = "sha256:7e456a843cec249ae1cb3a498aa89df5baff8aa62d76a24718babb3b8a51e495", size = 216557, upload-time = "2026-03-23T10:27:28.306Z" }, + { url = "https://files.pythonhosted.org/packages/8e/d8/6834b916242071adde1ed945fe18e2e69f9f3f552addd41c6ed6be31aad2/any_llm_sdk-1.14.0-py3-none-any.whl", hash = "sha256:f5b554898ecfe814490688a7f2152130a45d4152abae453df37de8154fce4f80", size = 232195, upload-time = "2026-05-01T13:39:02.129Z" }, ] [[package]] @@ -1178,7 +1178,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "any-llm-sdk", specifier = ">=1.13.0" }, + { name = "any-llm-sdk", specifier = ">=1.14.0" }, { name = "authlib", specifier = ">=1.6.5" }, { name = "httpx", specifier = ">=0.28.1" }, { name = "pydantic", specifier = ">=2.7.0" },