UpstageAI · minchang · Jun 8, 2026 · Jun 11, 2026
diff --git a/.env.example b/.env.example
@@ -124,6 +124,15 @@
 # Optional base URL override:
 # XIAOMI_BASE_URL=https://api.xiaomimimo.com/v1
 
+# =============================================================================
+# LLM PROVIDER (Upstage Solar)
+# =============================================================================
+# Upstage provides access to Upstage Solar models.
+# Get your key at: https://console.upstage.ai/api-keys
+# UPSTAGE_API_KEY=your_key_here
+# Optional base URL override:
+# UPSTAGE_BASE_URL=https://api.upstage.ai/v1
+
 # =============================================================================
 # TOOL API KEYS
 # =============================================================================

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
@@ -227,6 +227,19 @@ def _strip_provider_prefix(model: str) -> str:
     "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
     # Kimi
     "kimi": 262144,
+    # Upstage Solar — api.upstage.ai/v1/models does not return context_length,
+    # so these fallbacks keep token budgeting / compression from probing down
+    # to the 128k default. Substring matching is longest-first, so the versioned
+    # ids win over the "solar-pro" rolling-alias entry, which in turn covers
+    # future solar-pro* releases at the Pro context size.
+    # Sources: Solar Pro 3 = 128K, Solar Pro 2 = 64K, Solar Mini = 32K,
+    # Solar Open 2 = 256K.
+    "solar-open2-preview": 262144,  # 256K (longest-first: wins over solar-open2)
+    "solar-open2": 262144,  # 256K
+    "solar-pro3": 131072,
+    "solar-pro": 131072,  # rolling alias → latest Solar Pro (currently pro3)
+    "solar-pro2": 65536,
+    "solar-mini": 32768,
     # Tencent — Hy3 Preview (Hunyuan) with 256K context window.
     # OpenRouter live metadata reports 262144 (256 × 1024); align the
     # static fallback so cache and offline both agree (issue #22268).

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
@@ -287,6 +287,14 @@ class ProviderConfig:
         api_key_env_vars=("GMI_API_KEY",),
         base_url_env_var="GMI_BASE_URL",
     ),
+    "upstage": ProviderConfig(
+        id="upstage",
+        name="Upstage Solar",
+        auth_type="api_key",
+        inference_base_url="https://api.upstage.ai/v1",
+        api_key_env_vars=("UPSTAGE_API_KEY",),
+        base_url_env_var="UPSTAGE_BASE_URL",
+    ),
     "minimax": ProviderConfig(
         id="minimax",
         name="MiniMax",
@@ -1491,6 +1499,7 @@ def resolve_provider(
         "step": "stepfun", "stepfun-coding-plan": "stepfun",
         "arcee-ai": "arcee", "arceeai": "arcee",
         "gmi-cloud": "gmi", "gmicloud": "gmi",
+        "solar": "upstage",
         "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
         "minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth",
         "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
@@ -941,6 +941,7 @@ class ProviderEntry(NamedTuple):
     ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek; IAM or API key)"),
     ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint, your Azure AI deployment)"),
     ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (Reuses local Qwen CLI login)"),
+    ProviderEntry("upstage",        "Upstage Solar",            "Upstage (Solar API)"),
 ]
 
 # Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/

diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
@@ -196,6 +196,12 @@ class HermesOverlay:
         base_url_override="https://api.gmi-serving.com/v1",
         base_url_env_var="GMI_BASE_URL",
     ),
+    "upstage": HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("UPSTAGE_API_KEY",),
+        base_url_override="https://api.upstage.ai/v1",
+        base_url_env_var="UPSTAGE_BASE_URL",
+    ),
     "ollama-cloud": HermesOverlay(
         transport="openai_chat",
         base_url_override="https://ollama.com/v1",
@@ -348,6 +354,9 @@ class ProviderDef:
     "gmi-cloud": "gmi",
     "gmicloud": "gmi",
 
+    # upstage
+    "solar": "upstage",
+
     # Local server aliases → virtual "local" concept (resolved via user config)
     "lmstudio": "lmstudio",
     "lm-studio": "lmstudio",
@@ -371,6 +380,7 @@ class ProviderDef:
     "stepfun": "StepFun Step Plan",
     "xiaomi": "Xiaomi MiMo",
     "gmi": "GMI Cloud",
+    "upstage": "Upstage Solar",
     "tencent-tokenhub": "Tencent TokenHub",
     "lmstudio": "LM Studio",
     "local": "Local endpoint",

diff --git a/plugins/model-providers/upstage/__init__.py b/plugins/model-providers/upstage/__init__.py
@@ -0,0 +1,114 @@
+"""Upstage Solar provider profile."""
+
+from typing import Any
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+# Model-name markers for Solar families that do NOT accept ``reasoning_effort``.
+# Deny-list on purpose: newly released Solar models are assumed
+# reasoning-capable by default, so only the known non-reasoning families are
+# listed here. Substring match (not startswith) so dated variants like
+# ``solar-mini-250127`` are covered too.
+_NON_REASONING_MODEL_MARKERS = ("solar-mini", "syn-pro")
+
+# When the user hasn't picked a reasoning effort, Hermes passes
+# reasoning_config=None. Solar's own server default is "minimal" (reasoning
+# off), which is the wrong default for an agentic workload. We default reasoning
+# ON at this effort — matching the "medium (default)" that Hermes' /reasoning
+# panel shows for an unset config, so the displayed default and the real wire
+# value agree. An explicit saved setting or a `/reasoning <level>` change is
+# always honored over this default; `/reasoning none` disables it.
+_DEFAULT_REASONING_EFFORT = "medium"
+
+
+def _model_supports_reasoning(model: str | None) -> bool:
+    """Solar reasoning-capable models — True unless the model is deny-listed.
+
+    The Solar Pro family (``solar-pro``, ``solar-pro2``, ``solar-pro3`` and
+    dated variants like ``solar-pro3-250127``) and the Solar Open family
+    (``solar-open*``) accept ``reasoning_effort``; only ``solar-mini`` /
+    ``syn-pro`` ignore the parameter, so we deny-list those and treat every
+    other (incl. future) Solar model as reasoning-capable.
+
+    ``None``/empty model → True: the provider default (``fallback_models[0]``,
+    the ``solar-pro`` rolling alias) is reasoning-capable, so an unset model
+    gets the same default-on behaviour.
+    """
+    m = (model or "").strip().lower()
+    return not any(marker in m for marker in _NON_REASONING_MODEL_MARKERS)
+
+
+class UpstageProfile(ProviderProfile):
+    """Upstage Solar — top-level ``reasoning_effort`` control.
+
+    Solar Pro/Open expose reasoning through a top-level ``reasoning_effort``
+    field (``minimal`` | ``low`` | ``medium`` | ``high``), mirroring OpenAI's
+    shape. Unlike DeepSeek/Kimi it does NOT require echoing ``reasoning_content``
+    back on later turns, so only the request field needs wiring. We emit at most
+    ``low`` | ``medium`` | ``high`` — the explicit values both Solar Pro 2 and
+    Pro 3 accept.
+
+    Default-on: Solar's own server default is ``minimal`` (off), but for an
+    agentic workload we default reasoning ON (``_DEFAULT_REASONING_EFFORT``)
+    when the user hasn't picked an effort. The user can still set any level or
+    turn it off with ``/reasoning none``.
+    """
+
+    def build_api_kwargs_extras(
+        self, *, reasoning_config: dict | None = None, model: str | None = None, **context
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        top_level: dict[str, Any] = {}
+
+        # solar-mini / syn-pro (the deny-list) ignore reasoning_effort — send
+        # nothing. Everything else, including future Solar models, gets it.
+        if not _model_supports_reasoning(model):
+            return {}, top_level
+
+        # Unset (reasoning_config is None) → default reasoning ON for agents.
+        if not reasoning_config or not isinstance(reasoning_config, dict):
+            return {}, {"reasoning_effort": _DEFAULT_REASONING_EFFORT}
+
+        # Explicitly disabled (`/reasoning none`) → omit the field so Solar
+        # applies its own default (minimal = off).
+        if reasoning_config.get("enabled") is False:
+            return {}, top_level
+
+        # Map Hermes' effort vocabulary onto Solar's accepted set. xhigh/max
+        # collapse to high (Solar's strongest). minimal → off (omit). An
+        # enabled request with no recognised effort uses the default effort.
+        effort = (reasoning_config.get("effort") or "").strip().lower()
+        mapped = {
+            "minimal": None,
+            "low": "low",
+            "medium": "medium",
+            "high": "high",
+            "xhigh": "high",
+            "max": "high",
+        }.get(effort, _DEFAULT_REASONING_EFFORT)
+
+        if mapped:
+            top_level["reasoning_effort"] = mapped
+        return {}, top_level
+
+
+upstage = UpstageProfile(
+    name="upstage",
+    aliases=("solar",),
+    display_name="Upstage Solar",
+    description="Upstage (Solar API)",
+    signup_url="https://console.upstage.ai/api-keys",
+    env_vars=("UPSTAGE_API_KEY", "UPSTAGE_BASE_URL"),
+    base_url="https://api.upstage.ai/v1",
+    auth_type="api_key",
+    # default_aux_model left empty → auxiliary side tasks use the main model.
+    # entry [0] is the setup default. solar-pro is a rolling alias for the
+    # latest Solar Pro, so the default tracks the current flagship.
+    fallback_models=(
+        "solar-pro",
+        "solar-pro3",
+    ),
+)
+
+register_provider(upstage)
diff --git a/plugins/model-providers/upstage/plugin.yaml b/plugins/model-providers/upstage/plugin.yaml
@@ -0,0 +1,5 @@
+name: upstage-provider
+kind: model-provider
+version: 1.0.0
+description: Upstage (Solar API)
+author: Upstage AI
diff --git a/tests/hermes_cli/test_upstage_provider.py b/tests/hermes_cli/test_upstage_provider.py
@@ -0,0 +1,95 @@
+"""Focused tests for Upstage Solar first-class provider wiring.
+
+Regression guard for the bug where `hermes model` saved `provider: upstage`
+correctly but, on re-entry, showed a different provider as active. Root cause:
+`hermes_cli/providers.py` (the resolver behind `resolve_provider_full`) had no
+`upstage` overlay, so `resolve_provider_full("upstage")` returned None, the
+config provider was discarded, and resolution fell through to env auto-detect.
+"""
+
+from __future__ import annotations
+
+import sys
+import types
+
+import pytest
+
+if "dotenv" not in sys.modules:
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    sys.modules["dotenv"] = fake_dotenv
+
+
+class TestUpstageResolver:
+    """The providers.py resolver must recognise upstage (the actual bug)."""
+
+    def test_resolve_provider_full_recognizes_upstage(self):
+        from hermes_cli.providers import resolve_provider_full
+
+        pdef = resolve_provider_full("upstage", {}, [])
+        assert pdef is not None, (
+            "resolve_provider_full('upstage') returned None — config "
+            "`provider: upstage` would be discarded and auto-detect would win"
+        )
+        assert pdef.id == "upstage"
+        assert pdef.base_url == "https://api.upstage.ai/v1"
+        assert "UPSTAGE_API_KEY" in pdef.api_key_env_vars
+
+    def test_get_provider_returns_upstage_def(self):
+        from hermes_cli.providers import get_provider
+
+        pdef = get_provider("upstage")
+        assert pdef is not None and pdef.id == "upstage"
+        assert pdef.transport == "openai_chat"
+
+    def test_solar_alias_normalizes_to_upstage(self):
+        from hermes_cli.providers import normalize_provider, resolve_provider_full
+
+        assert normalize_provider("solar") == "upstage"
+        pdef = resolve_provider_full("solar", {}, [])
+        assert pdef is not None and pdef.id == "upstage"
+
+
+class TestUpstageOverlay:
+    def test_overlay_exists(self):
+        from hermes_cli.providers import HERMES_OVERLAYS
+
+        assert "upstage" in HERMES_OVERLAYS
+        overlay = HERMES_OVERLAYS["upstage"]
+        assert overlay.transport == "openai_chat"
+        assert overlay.extra_env_vars == ("UPSTAGE_API_KEY",)
+        assert overlay.base_url_override == "https://api.upstage.ai/v1"
+        assert overlay.base_url_env_var == "UPSTAGE_BASE_URL"
+        assert not overlay.is_aggregator
+
+    def test_provider_label(self):
+        from hermes_cli.providers import get_label
+
+        assert get_label("upstage") == "Upstage Solar"
+
+
+class TestUpstageConfigProviderWins:
+    """End-to-end: an explicit config provider must beat env auto-detect.
+
+    Mirrors the display logic in `hermes_cli/main.py` (cmd_model): read
+    `model.provider`, resolve it, and only fall back to auto-detect when that
+    resolution fails. With a stray DEEPSEEK_API_KEY present (the user's case),
+    upstage must still win because it is configured explicitly.
+    """
+
+    def test_explicit_upstage_beats_stray_deepseek_key(self, monkeypatch):
+        from hermes_cli.providers import resolve_provider_full
+
+        monkeypatch.setenv("DEEPSEEK_API_KEY", "junk")
+        monkeypatch.setenv("UPSTAGE_API_KEY", "up-test-key")
+
+        config_provider = "upstage"  # from config model.provider
+        active = ""
+        if config_provider and config_provider != "auto":
+            adef = resolve_provider_full(config_provider, {}, [])
+            active = adef.id if adef is not None else ""
+
+        assert active == "upstage", (
+            "explicit config provider should resolve to upstage, not fall "
+            "through to deepseek auto-detect"
+        )