diff --git a/.env.example b/.env.example index 924146613c45..84b2fdfc8f5f 100644 --- a/.env.example +++ b/.env.example @@ -124,6 +124,15 @@ # Optional base URL override: # XIAOMI_BASE_URL=https://api.xiaomimimo.com/v1 +# ============================================================================= +# LLM PROVIDER (Upstage Solar) +# ============================================================================= +# Upstage provides access to Upstage Solar models. +# Get your key at: https://console.upstage.ai/api-keys +# UPSTAGE_API_KEY=your_key_here +# Optional base URL override: +# UPSTAGE_BASE_URL=https://api.upstage.ai/v1 + # ============================================================================= # TOOL API KEYS # ============================================================================= diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 1080256e0ac1..c3b20fa8a5b6 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -227,6 +227,19 @@ def _strip_provider_prefix(model: str) -> str: "grok": 131072, # catch-all (grok-beta, unknown grok-*) # Kimi "kimi": 262144, + # Upstage Solar — api.upstage.ai/v1/models does not return context_length, + # so these fallbacks keep token budgeting / compression from probing down + # to the 128k default. Substring matching is longest-first, so the versioned + # ids win over the "solar-pro" rolling-alias entry, which in turn covers + # future solar-pro* releases at the Pro context size. + # Sources: Solar Pro 3 = 128K, Solar Pro 2 = 64K, Solar Mini = 32K, + # Solar Open 2 = 256K. + "solar-open2-preview": 262144, # 256K (longest-first: wins over solar-open2) + "solar-open2": 262144, # 256K + "solar-pro3": 131072, + "solar-pro": 131072, # rolling alias → latest Solar Pro (currently pro3) + "solar-pro2": 65536, + "solar-mini": 32768, # Tencent — Hy3 Preview (Hunyuan) with 256K context window. # OpenRouter live metadata reports 262144 (256 × 1024); align the # static fallback so cache and offline both agree (issue #22268). diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 021905c3ec05..ed1aa20584bf 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -287,6 +287,14 @@ class ProviderConfig: api_key_env_vars=("GMI_API_KEY",), base_url_env_var="GMI_BASE_URL", ), + "upstage": ProviderConfig( + id="upstage", + name="Upstage Solar", + auth_type="api_key", + inference_base_url="https://api.upstage.ai/v1", + api_key_env_vars=("UPSTAGE_API_KEY",), + base_url_env_var="UPSTAGE_BASE_URL", + ), "minimax": ProviderConfig( id="minimax", name="MiniMax", @@ -1491,6 +1499,7 @@ def resolve_provider( "step": "stepfun", "stepfun-coding-plan": "stepfun", "arcee-ai": "arcee", "arceeai": "arcee", "gmi-cloud": "gmi", "gmicloud": "gmi", + "solar": "upstage", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", "minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth", "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan", diff --git a/hermes_cli/models.py b/hermes_cli/models.py index b9b3c819c16c..cb73b59fce96 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -941,6 +941,7 @@ class ProviderEntry(NamedTuple): ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek; IAM or API key)"), ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint, your Azure AI deployment)"), ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (Reuses local Qwen CLI login)"), + ProviderEntry("upstage", "Upstage Solar", "Upstage (Solar API)"), ] # Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/ diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index ba25f7e6315a..a1e9813ceeed 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -196,6 +196,12 @@ class HermesOverlay: base_url_override="https://api.gmi-serving.com/v1", base_url_env_var="GMI_BASE_URL", ), + "upstage": HermesOverlay( + transport="openai_chat", + extra_env_vars=("UPSTAGE_API_KEY",), + base_url_override="https://api.upstage.ai/v1", + base_url_env_var="UPSTAGE_BASE_URL", + ), "ollama-cloud": HermesOverlay( transport="openai_chat", base_url_override="https://ollama.com/v1", @@ -348,6 +354,9 @@ class ProviderDef: "gmi-cloud": "gmi", "gmicloud": "gmi", + # upstage + "solar": "upstage", + # Local server aliases → virtual "local" concept (resolved via user config) "lmstudio": "lmstudio", "lm-studio": "lmstudio", @@ -371,6 +380,7 @@ class ProviderDef: "stepfun": "StepFun Step Plan", "xiaomi": "Xiaomi MiMo", "gmi": "GMI Cloud", + "upstage": "Upstage Solar", "tencent-tokenhub": "Tencent TokenHub", "lmstudio": "LM Studio", "local": "Local endpoint", diff --git a/plugins/model-providers/upstage/__init__.py b/plugins/model-providers/upstage/__init__.py new file mode 100644 index 000000000000..982e741f9e86 --- /dev/null +++ b/plugins/model-providers/upstage/__init__.py @@ -0,0 +1,114 @@ +"""Upstage Solar provider profile.""" + +from typing import Any + +from providers import register_provider +from providers.base import ProviderProfile + + +# Model-name markers for Solar families that do NOT accept ``reasoning_effort``. +# Deny-list on purpose: newly released Solar models are assumed +# reasoning-capable by default, so only the known non-reasoning families are +# listed here. Substring match (not startswith) so dated variants like +# ``solar-mini-250127`` are covered too. +_NON_REASONING_MODEL_MARKERS = ("solar-mini", "syn-pro") + +# When the user hasn't picked a reasoning effort, Hermes passes +# reasoning_config=None. Solar's own server default is "minimal" (reasoning +# off), which is the wrong default for an agentic workload. We default reasoning +# ON at this effort — matching the "medium (default)" that Hermes' /reasoning +# panel shows for an unset config, so the displayed default and the real wire +# value agree. An explicit saved setting or a `/reasoning ` change is +# always honored over this default; `/reasoning none` disables it. +_DEFAULT_REASONING_EFFORT = "medium" + + +def _model_supports_reasoning(model: str | None) -> bool: + """Solar reasoning-capable models — True unless the model is deny-listed. + + The Solar Pro family (``solar-pro``, ``solar-pro2``, ``solar-pro3`` and + dated variants like ``solar-pro3-250127``) and the Solar Open family + (``solar-open*``) accept ``reasoning_effort``; only ``solar-mini`` / + ``syn-pro`` ignore the parameter, so we deny-list those and treat every + other (incl. future) Solar model as reasoning-capable. + + ``None``/empty model → True: the provider default (``fallback_models[0]``, + the ``solar-pro`` rolling alias) is reasoning-capable, so an unset model + gets the same default-on behaviour. + """ + m = (model or "").strip().lower() + return not any(marker in m for marker in _NON_REASONING_MODEL_MARKERS) + + +class UpstageProfile(ProviderProfile): + """Upstage Solar — top-level ``reasoning_effort`` control. + + Solar Pro/Open expose reasoning through a top-level ``reasoning_effort`` + field (``minimal`` | ``low`` | ``medium`` | ``high``), mirroring OpenAI's + shape. Unlike DeepSeek/Kimi it does NOT require echoing ``reasoning_content`` + back on later turns, so only the request field needs wiring. We emit at most + ``low`` | ``medium`` | ``high`` — the explicit values both Solar Pro 2 and + Pro 3 accept. + + Default-on: Solar's own server default is ``minimal`` (off), but for an + agentic workload we default reasoning ON (``_DEFAULT_REASONING_EFFORT``) + when the user hasn't picked an effort. The user can still set any level or + turn it off with ``/reasoning none``. + """ + + def build_api_kwargs_extras( + self, *, reasoning_config: dict | None = None, model: str | None = None, **context + ) -> tuple[dict[str, Any], dict[str, Any]]: + top_level: dict[str, Any] = {} + + # solar-mini / syn-pro (the deny-list) ignore reasoning_effort — send + # nothing. Everything else, including future Solar models, gets it. + if not _model_supports_reasoning(model): + return {}, top_level + + # Unset (reasoning_config is None) → default reasoning ON for agents. + if not reasoning_config or not isinstance(reasoning_config, dict): + return {}, {"reasoning_effort": _DEFAULT_REASONING_EFFORT} + + # Explicitly disabled (`/reasoning none`) → omit the field so Solar + # applies its own default (minimal = off). + if reasoning_config.get("enabled") is False: + return {}, top_level + + # Map Hermes' effort vocabulary onto Solar's accepted set. xhigh/max + # collapse to high (Solar's strongest). minimal → off (omit). An + # enabled request with no recognised effort uses the default effort. + effort = (reasoning_config.get("effort") or "").strip().lower() + mapped = { + "minimal": None, + "low": "low", + "medium": "medium", + "high": "high", + "xhigh": "high", + "max": "high", + }.get(effort, _DEFAULT_REASONING_EFFORT) + + if mapped: + top_level["reasoning_effort"] = mapped + return {}, top_level + + +upstage = UpstageProfile( + name="upstage", + aliases=("solar",), + display_name="Upstage Solar", + description="Upstage (Solar API)", + signup_url="https://console.upstage.ai/api-keys", + env_vars=("UPSTAGE_API_KEY", "UPSTAGE_BASE_URL"), + base_url="https://api.upstage.ai/v1", + auth_type="api_key", + # default_aux_model left empty → auxiliary side tasks use the main model. + # entry [0] is the setup default. solar-pro is a rolling alias for the + # latest Solar Pro, so the default tracks the current flagship. + fallback_models=( + "solar-pro", + "solar-pro3", + ), +) + +register_provider(upstage) diff --git a/plugins/model-providers/upstage/plugin.yaml b/plugins/model-providers/upstage/plugin.yaml new file mode 100644 index 000000000000..4de2e3d4ba5d --- /dev/null +++ b/plugins/model-providers/upstage/plugin.yaml @@ -0,0 +1,5 @@ +name: upstage-provider +kind: model-provider +version: 1.0.0 +description: Upstage (Solar API) +author: Upstage AI diff --git a/tests/hermes_cli/test_upstage_provider.py b/tests/hermes_cli/test_upstage_provider.py new file mode 100644 index 000000000000..621b86f8d413 --- /dev/null +++ b/tests/hermes_cli/test_upstage_provider.py @@ -0,0 +1,95 @@ +"""Focused tests for Upstage Solar first-class provider wiring. + +Regression guard for the bug where `hermes model` saved `provider: upstage` +correctly but, on re-entry, showed a different provider as active. Root cause: +`hermes_cli/providers.py` (the resolver behind `resolve_provider_full`) had no +`upstage` overlay, so `resolve_provider_full("upstage")` returned None, the +config provider was discarded, and resolution fell through to env auto-detect. +""" + +from __future__ import annotations + +import sys +import types + +import pytest + +if "dotenv" not in sys.modules: + fake_dotenv = types.ModuleType("dotenv") + fake_dotenv.load_dotenv = lambda *args, **kwargs: None + sys.modules["dotenv"] = fake_dotenv + + +class TestUpstageResolver: + """The providers.py resolver must recognise upstage (the actual bug).""" + + def test_resolve_provider_full_recognizes_upstage(self): + from hermes_cli.providers import resolve_provider_full + + pdef = resolve_provider_full("upstage", {}, []) + assert pdef is not None, ( + "resolve_provider_full('upstage') returned None — config " + "`provider: upstage` would be discarded and auto-detect would win" + ) + assert pdef.id == "upstage" + assert pdef.base_url == "https://api.upstage.ai/v1" + assert "UPSTAGE_API_KEY" in pdef.api_key_env_vars + + def test_get_provider_returns_upstage_def(self): + from hermes_cli.providers import get_provider + + pdef = get_provider("upstage") + assert pdef is not None and pdef.id == "upstage" + assert pdef.transport == "openai_chat" + + def test_solar_alias_normalizes_to_upstage(self): + from hermes_cli.providers import normalize_provider, resolve_provider_full + + assert normalize_provider("solar") == "upstage" + pdef = resolve_provider_full("solar", {}, []) + assert pdef is not None and pdef.id == "upstage" + + +class TestUpstageOverlay: + def test_overlay_exists(self): + from hermes_cli.providers import HERMES_OVERLAYS + + assert "upstage" in HERMES_OVERLAYS + overlay = HERMES_OVERLAYS["upstage"] + assert overlay.transport == "openai_chat" + assert overlay.extra_env_vars == ("UPSTAGE_API_KEY",) + assert overlay.base_url_override == "https://api.upstage.ai/v1" + assert overlay.base_url_env_var == "UPSTAGE_BASE_URL" + assert not overlay.is_aggregator + + def test_provider_label(self): + from hermes_cli.providers import get_label + + assert get_label("upstage") == "Upstage Solar" + + +class TestUpstageConfigProviderWins: + """End-to-end: an explicit config provider must beat env auto-detect. + + Mirrors the display logic in `hermes_cli/main.py` (cmd_model): read + `model.provider`, resolve it, and only fall back to auto-detect when that + resolution fails. With a stray DEEPSEEK_API_KEY present (the user's case), + upstage must still win because it is configured explicitly. + """ + + def test_explicit_upstage_beats_stray_deepseek_key(self, monkeypatch): + from hermes_cli.providers import resolve_provider_full + + monkeypatch.setenv("DEEPSEEK_API_KEY", "junk") + monkeypatch.setenv("UPSTAGE_API_KEY", "up-test-key") + + config_provider = "upstage" # from config model.provider + active = "" + if config_provider and config_provider != "auto": + adef = resolve_provider_full(config_provider, {}, []) + active = adef.id if adef is not None else "" + + assert active == "upstage", ( + "explicit config provider should resolve to upstage, not fall " + "through to deepseek auto-detect" + ) diff --git a/tests/plugins/model_providers/test_upstage_profile.py b/tests/plugins/model_providers/test_upstage_profile.py new file mode 100644 index 000000000000..e78c2326ee11 --- /dev/null +++ b/tests/plugins/model_providers/test_upstage_profile.py @@ -0,0 +1,174 @@ +"""Unit tests for the Upstage Solar provider profile. + +Upstage Solar is a plain OpenAI-compatible api-key provider, so this verifies +the profile is registered correctly and wires the expected identity, endpoint, +auth, and catalog fields — the contract every downstream layer (auth, models, +doctor, runtime_provider, transport) reads from. +""" + +from __future__ import annotations + +import pytest + + +@pytest.fixture +def upstage_profile(): + """Resolve the registered Upstage profile via the provider registry. + + Importing ``model_tools`` triggers plugin discovery, which registers the + Upstage profile. Going through ``get_provider_profile`` keeps the test + honest about the actual registration path (name + alias resolution). + """ + import model_tools # noqa: F401 + import providers + + profile = providers.get_provider_profile("upstage") + assert profile is not None, "upstage provider profile must be registered" + return profile + + +class TestUpstageProfile: + def test_identity_and_endpoint(self, upstage_profile): + assert upstage_profile.name == "upstage" + assert upstage_profile.api_mode == "chat_completions" + assert upstage_profile.auth_type == "api_key" + assert upstage_profile.base_url == "https://api.upstage.ai/v1" + assert upstage_profile.get_hostname() == "api.upstage.ai" + + def test_solar_alias_resolves(self): + import model_tools # noqa: F401 + import providers + + assert providers.get_provider_profile("solar") is upstage_profile_singleton() + + def test_env_vars(self, upstage_profile): + # API key first, optional base-url override second (priority order). + assert upstage_profile.env_vars == ("UPSTAGE_API_KEY", "UPSTAGE_BASE_URL") + + def test_fallback_models_are_agentic_pro_only(self, upstage_profile): + # Only the agentic, tool-calling Solar Pro models belong in the offline + # catalog — Mini is capable but not agentic, so it's never promoted as a + # default. Live /v1/models still surfaces everything when a key is set. + assert upstage_profile.fallback_models == ( + "solar-pro", + "solar-pro3", + ) + + def test_default_model_is_solar_pro(self, upstage_profile): + # Entry [0] is the setup default (get_default_model_for_provider). + assert upstage_profile.fallback_models[0] == "solar-pro" + + def test_aux_model_left_empty(self, upstage_profile): + # Unset → auxiliary side tasks fall back to the user's main model. + assert upstage_profile.default_aux_model == "" + + +class TestUpstageReasoning: + """``build_api_kwargs_extras`` wires Solar's top-level ``reasoning_effort``. + + Solar Pro accepts ``reasoning_effort`` (minimal|low|medium|high, default + minimal=off) and never requires echoing ``reasoning_content`` back, so only + the request field is emitted — always top-level, never in extra_body. + """ + + @pytest.mark.parametrize("effort", ["low", "medium", "high"]) + def test_pro_explicit_effort_passes_through(self, upstage_profile, effort): + extra_body, top_level = upstage_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort}, model="solar-pro3" + ) + assert extra_body == {} + assert top_level == {"reasoning_effort": effort} + + @pytest.mark.parametrize("effort", ["xhigh", "max"]) + def test_pro_strong_efforts_collapse_to_high(self, upstage_profile, effort): + _, top_level = upstage_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort}, model="solar-pro2" + ) + assert top_level == {"reasoning_effort": "high"} + + def test_pro_enabled_without_effort_defaults_on(self, upstage_profile): + _, top_level = upstage_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True}, model="solar-pro3" + ) + assert top_level == {"reasoning_effort": "medium"} + + def test_pro_minimal_effort_is_omitted(self, upstage_profile): + # Explicit minimal == reasoning off → omit so Solar applies its default. + _, top_level = upstage_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "minimal"}, model="solar-pro3" + ) + assert top_level == {} + + def test_disabled_omits_field(self, upstage_profile): + # `/reasoning none` → enabled False → explicitly off. + _, top_level = upstage_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False, "effort": "high"}, model="solar-pro3" + ) + assert top_level == {} + + @pytest.mark.parametrize("model", ["solar-pro3", "solar-pro", "solar-open2"]) + def test_no_config_defaults_reasoning_on(self, upstage_profile, model): + # Unset reasoning_config → default ON at medium (matches the /reasoning + # "medium (default)" label), not Solar's server default of minimal/off. + _, top_level = upstage_profile.build_api_kwargs_extras(model=model) + assert top_level == {"reasoning_effort": "medium"} + + @pytest.mark.parametrize("model", ["solar-mini", "solar-mini-202610", "syn-pro"]) + def test_no_config_deny_listed_still_omits(self, upstage_profile, model): + # Default-on must not leak to the deny-listed non-reasoning models. + _, top_level = upstage_profile.build_api_kwargs_extras(model=model) + assert top_level == {} + + @pytest.mark.parametrize( + "model", + [ + "solar-pro3-250127", + "solar-open", + "solar-open-250127", + "solar-open2", + "solar-open2-260528", + ], + ) + def test_pro_and_open_variants_support_reasoning(self, upstage_profile, model): + # Both the Solar Pro and Solar Open families (incl. dated variants) + # accept reasoning_effort. + _, top_level = upstage_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, model=model + ) + assert top_level == {"reasoning_effort": "high"} + + @pytest.mark.parametrize("model", ["solar-mini", "solar-mini-202610", "syn-pro"]) + def test_deny_listed_models_never_send_reasoning(self, upstage_profile, model): + # solar-mini / syn-pro ignore reasoning_effort, so never send it — + # even when the user explicitly enables reasoning. + extra_body, top_level = upstage_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, model=model + ) + assert extra_body == {} + assert top_level == {} + + @pytest.mark.parametrize("model", ["solar-future", "solar-future-260601"]) + def test_unknown_future_models_default_to_reasoning(self, upstage_profile, model): + # Deny-list semantics: a future Solar model we've never heard of is + # assumed reasoning-capable, so reasoning_effort is sent instead of + # being silently dropped (the old allow-list failure mode). + _, top_level = upstage_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, model=model + ) + assert top_level == {"reasoning_effort": "high"} + + # And the unset-config default-on path applies to it too. + _, top_level = upstage_profile.build_api_kwargs_extras(model=model) + assert top_level == {"reasoning_effort": "medium"} + + def test_none_model_defaults_to_reasoning(self, upstage_profile): + # No model in context → treated as reasoning-capable, consistent with + # the provider default (fallback_models[0] == "solar-pro"). + _, top_level = upstage_profile.build_api_kwargs_extras(model=None) + assert top_level == {"reasoning_effort": "medium"} + + +def upstage_profile_singleton(): + import providers + + return providers.get_provider_profile("upstage") diff --git a/web/src/pages/EnvPage.tsx b/web/src/pages/EnvPage.tsx index ded3126550f5..89a6d9ef8fd3 100644 --- a/web/src/pages/EnvPage.tsx +++ b/web/src/pages/EnvPage.tsx @@ -64,6 +64,7 @@ const PROVIDER_GROUPS: { prefix: string; name: string; priority: number }[] = [ { prefix: "OPENCODE_ZEN_", name: "OpenCode Zen", priority: 11 }, { prefix: "OPENROUTER_", name: "OpenRouter", priority: 12 }, { prefix: "XIAOMI_", name: "Xiaomi MiMo", priority: 13 }, + { prefix: "UPSTAGE_", name: "Upstage Solar", priority: 14 }, ]; function getProviderGroup(key: string): string {