Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,15 @@
# Optional base URL override:
# XIAOMI_BASE_URL=https://api.xiaomimimo.com/v1

# =============================================================================
# LLM PROVIDER (Upstage Solar)
# =============================================================================
# Upstage provides access to Upstage Solar models.
# Get your key at: https://console.upstage.ai/api-keys
# UPSTAGE_API_KEY=your_key_here
# Optional base URL override:
# UPSTAGE_BASE_URL=https://api.upstage.ai/v1

# =============================================================================
# TOOL API KEYS
# =============================================================================
Expand Down
13 changes: 13 additions & 0 deletions agent/model_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,19 @@ def _strip_provider_prefix(model: str) -> str:
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
# Kimi
"kimi": 262144,
# Upstage Solar — api.upstage.ai/v1/models does not return context_length,
# so these fallbacks keep token budgeting / compression from probing down
# to the 128k default. Substring matching is longest-first, so the versioned
# ids win over the "solar-pro" rolling-alias entry, which in turn covers
# future solar-pro* releases at the Pro context size.
# Sources: Solar Pro 3 = 128K, Solar Pro 2 = 64K, Solar Mini = 32K,
# Solar Open 2 = 256K.
"solar-open2-preview": 262144, # 256K (longest-first: wins over solar-open2)
"solar-open2": 262144, # 256K
"solar-pro3": 131072,
"solar-pro": 131072, # rolling alias → latest Solar Pro (currently pro3)
"solar-pro2": 65536,
"solar-mini": 32768,
# Tencent — Hy3 Preview (Hunyuan) with 256K context window.
# OpenRouter live metadata reports 262144 (256 × 1024); align the
# static fallback so cache and offline both agree (issue #22268).
Expand Down
9 changes: 9 additions & 0 deletions hermes_cli/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,14 @@ class ProviderConfig:
api_key_env_vars=("GMI_API_KEY",),
base_url_env_var="GMI_BASE_URL",
),
"upstage": ProviderConfig(
id="upstage",
name="Upstage Solar",
auth_type="api_key",
inference_base_url="https://api.upstage.ai/v1",
api_key_env_vars=("UPSTAGE_API_KEY",),
base_url_env_var="UPSTAGE_BASE_URL",
),
"minimax": ProviderConfig(
id="minimax",
name="MiniMax",
Expand Down Expand Up @@ -1491,6 +1499,7 @@ def resolve_provider(
"step": "stepfun", "stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee", "arceeai": "arcee",
"gmi-cloud": "gmi", "gmicloud": "gmi",
"solar": "upstage",
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
"minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth",
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
Expand Down
1 change: 1 addition & 0 deletions hermes_cli/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,7 @@ class ProviderEntry(NamedTuple):
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek; IAM or API key)"),
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint, your Azure AI deployment)"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (Reuses local Qwen CLI login)"),
ProviderEntry("upstage", "Upstage Solar", "Upstage (Solar API)"),
]

# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
Expand Down
10 changes: 10 additions & 0 deletions hermes_cli/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ class HermesOverlay:
base_url_override="https://api.gmi-serving.com/v1",
base_url_env_var="GMI_BASE_URL",
),
"upstage": HermesOverlay(
transport="openai_chat",
extra_env_vars=("UPSTAGE_API_KEY",),
base_url_override="https://api.upstage.ai/v1",
base_url_env_var="UPSTAGE_BASE_URL",
),
"ollama-cloud": HermesOverlay(
transport="openai_chat",
base_url_override="https://ollama.com/v1",
Expand Down Expand Up @@ -348,6 +354,9 @@ class ProviderDef:
"gmi-cloud": "gmi",
"gmicloud": "gmi",

# upstage
"solar": "upstage",

# Local server aliases → virtual "local" concept (resolved via user config)
"lmstudio": "lmstudio",
"lm-studio": "lmstudio",
Expand All @@ -371,6 +380,7 @@ class ProviderDef:
"stepfun": "StepFun Step Plan",
"xiaomi": "Xiaomi MiMo",
"gmi": "GMI Cloud",
"upstage": "Upstage Solar",
"tencent-tokenhub": "Tencent TokenHub",
"lmstudio": "LM Studio",
"local": "Local endpoint",
Expand Down
114 changes: 114 additions & 0 deletions plugins/model-providers/upstage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""Upstage Solar provider profile."""

from typing import Any

from providers import register_provider
from providers.base import ProviderProfile


# Model-name markers for Solar families that do NOT accept ``reasoning_effort``.
# Deny-list on purpose: newly released Solar models are assumed
# reasoning-capable by default, so only the known non-reasoning families are
# listed here. Substring match (not startswith) so dated variants like
# ``solar-mini-250127`` are covered too.
_NON_REASONING_MODEL_MARKERS = ("solar-mini", "syn-pro")

# When the user hasn't picked a reasoning effort, Hermes passes
# reasoning_config=None. Solar's own server default is "minimal" (reasoning
# off), which is the wrong default for an agentic workload. We default reasoning
# ON at this effort — matching the "medium (default)" that Hermes' /reasoning
# panel shows for an unset config, so the displayed default and the real wire
# value agree. An explicit saved setting or a `/reasoning <level>` change is
# always honored over this default; `/reasoning none` disables it.
_DEFAULT_REASONING_EFFORT = "medium"


def _model_supports_reasoning(model: str | None) -> bool:
"""Solar reasoning-capable models — True unless the model is deny-listed.

The Solar Pro family (``solar-pro``, ``solar-pro2``, ``solar-pro3`` and
dated variants like ``solar-pro3-250127``) and the Solar Open family
(``solar-open*``) accept ``reasoning_effort``; only ``solar-mini`` /
``syn-pro`` ignore the parameter, so we deny-list those and treat every
other (incl. future) Solar model as reasoning-capable.

``None``/empty model → True: the provider default (``fallback_models[0]``,
the ``solar-pro`` rolling alias) is reasoning-capable, so an unset model
gets the same default-on behaviour.
"""
m = (model or "").strip().lower()
return not any(marker in m for marker in _NON_REASONING_MODEL_MARKERS)


class UpstageProfile(ProviderProfile):
"""Upstage Solar — top-level ``reasoning_effort`` control.

Solar Pro/Open expose reasoning through a top-level ``reasoning_effort``
field (``minimal`` | ``low`` | ``medium`` | ``high``), mirroring OpenAI's
shape. Unlike DeepSeek/Kimi it does NOT require echoing ``reasoning_content``
back on later turns, so only the request field needs wiring. We emit at most
``low`` | ``medium`` | ``high`` — the explicit values both Solar Pro 2 and
Pro 3 accept.

Default-on: Solar's own server default is ``minimal`` (off), but for an
agentic workload we default reasoning ON (``_DEFAULT_REASONING_EFFORT``)
when the user hasn't picked an effort. The user can still set any level or
turn it off with ``/reasoning none``.
"""

def build_api_kwargs_extras(
self, *, reasoning_config: dict | None = None, model: str | None = None, **context
) -> tuple[dict[str, Any], dict[str, Any]]:
top_level: dict[str, Any] = {}

# solar-mini / syn-pro (the deny-list) ignore reasoning_effort — send
# nothing. Everything else, including future Solar models, gets it.
if not _model_supports_reasoning(model):
return {}, top_level

# Unset (reasoning_config is None) → default reasoning ON for agents.
if not reasoning_config or not isinstance(reasoning_config, dict):
return {}, {"reasoning_effort": _DEFAULT_REASONING_EFFORT}

# Explicitly disabled (`/reasoning none`) → omit the field so Solar
# applies its own default (minimal = off).
if reasoning_config.get("enabled") is False:
return {}, top_level

# Map Hermes' effort vocabulary onto Solar's accepted set. xhigh/max
# collapse to high (Solar's strongest). minimal → off (omit). An
# enabled request with no recognised effort uses the default effort.
effort = (reasoning_config.get("effort") or "").strip().lower()
mapped = {
"minimal": None,
"low": "low",
"medium": "medium",
"high": "high",
"xhigh": "high",
"max": "high",
}.get(effort, _DEFAULT_REASONING_EFFORT)

if mapped:
top_level["reasoning_effort"] = mapped
return {}, top_level


upstage = UpstageProfile(
name="upstage",
aliases=("solar",),
display_name="Upstage Solar",
description="Upstage (Solar API)",
signup_url="https://console.upstage.ai/api-keys",
env_vars=("UPSTAGE_API_KEY", "UPSTAGE_BASE_URL"),
base_url="https://api.upstage.ai/v1",
auth_type="api_key",
# default_aux_model left empty → auxiliary side tasks use the main model.
# entry [0] is the setup default. solar-pro is a rolling alias for the
# latest Solar Pro, so the default tracks the current flagship.
fallback_models=(
"solar-pro",
"solar-pro3",
),
)

register_provider(upstage)
5 changes: 5 additions & 0 deletions plugins/model-providers/upstage/plugin.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name: upstage-provider
kind: model-provider
version: 1.0.0
description: Upstage (Solar API)
author: Upstage AI
95 changes: 95 additions & 0 deletions tests/hermes_cli/test_upstage_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Focused tests for Upstage Solar first-class provider wiring.

Regression guard for the bug where `hermes model` saved `provider: upstage`
correctly but, on re-entry, showed a different provider as active. Root cause:
`hermes_cli/providers.py` (the resolver behind `resolve_provider_full`) had no
`upstage` overlay, so `resolve_provider_full("upstage")` returned None, the
config provider was discarded, and resolution fell through to env auto-detect.
"""

from __future__ import annotations

import sys
import types

import pytest

if "dotenv" not in sys.modules:
fake_dotenv = types.ModuleType("dotenv")
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
sys.modules["dotenv"] = fake_dotenv


class TestUpstageResolver:
"""The providers.py resolver must recognise upstage (the actual bug)."""

def test_resolve_provider_full_recognizes_upstage(self):
from hermes_cli.providers import resolve_provider_full

pdef = resolve_provider_full("upstage", {}, [])
assert pdef is not None, (
"resolve_provider_full('upstage') returned None — config "
"`provider: upstage` would be discarded and auto-detect would win"
)
assert pdef.id == "upstage"
assert pdef.base_url == "https://api.upstage.ai/v1"
assert "UPSTAGE_API_KEY" in pdef.api_key_env_vars

def test_get_provider_returns_upstage_def(self):
from hermes_cli.providers import get_provider

pdef = get_provider("upstage")
assert pdef is not None and pdef.id == "upstage"
assert pdef.transport == "openai_chat"

def test_solar_alias_normalizes_to_upstage(self):
from hermes_cli.providers import normalize_provider, resolve_provider_full

assert normalize_provider("solar") == "upstage"
pdef = resolve_provider_full("solar", {}, [])
assert pdef is not None and pdef.id == "upstage"


class TestUpstageOverlay:
def test_overlay_exists(self):
from hermes_cli.providers import HERMES_OVERLAYS

assert "upstage" in HERMES_OVERLAYS
overlay = HERMES_OVERLAYS["upstage"]
assert overlay.transport == "openai_chat"
assert overlay.extra_env_vars == ("UPSTAGE_API_KEY",)
assert overlay.base_url_override == "https://api.upstage.ai/v1"
assert overlay.base_url_env_var == "UPSTAGE_BASE_URL"
assert not overlay.is_aggregator

def test_provider_label(self):
from hermes_cli.providers import get_label

assert get_label("upstage") == "Upstage Solar"


class TestUpstageConfigProviderWins:
"""End-to-end: an explicit config provider must beat env auto-detect.

Mirrors the display logic in `hermes_cli/main.py` (cmd_model): read
`model.provider`, resolve it, and only fall back to auto-detect when that
resolution fails. With a stray DEEPSEEK_API_KEY present (the user's case),
upstage must still win because it is configured explicitly.
"""

def test_explicit_upstage_beats_stray_deepseek_key(self, monkeypatch):
from hermes_cli.providers import resolve_provider_full

monkeypatch.setenv("DEEPSEEK_API_KEY", "junk")
monkeypatch.setenv("UPSTAGE_API_KEY", "up-test-key")

config_provider = "upstage" # from config model.provider
active = ""
if config_provider and config_provider != "auto":
adef = resolve_provider_full(config_provider, {}, [])
active = adef.id if adef is not None else ""

assert active == "upstage", (
"explicit config provider should resolve to upstage, not fall "
"through to deepseek auto-detect"
)
Loading