Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions crates/smooth-cast/src/provider_migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ mod tests {
assert_eq!(r.routing.coding.model, "deepseek-v4-flash");
assert_eq!(r.routing.reasoning.as_ref().unwrap().model, "deepseek-v4-pro");
assert_eq!(r.routing.reviewing.model, "minimax-m2.7-direct");
assert_eq!(r.routing.judge.model, "groq-llama-3.3-70b");
assert_eq!(r.routing.judge.model, "groq-gpt-oss-120b");
assert_eq!(r.routing.summarize.model, "gemini-2.5-flash");
assert_eq!(r.routing.default.model, "deepseek-v4-flash");
assert_eq!(r.routing.fast.as_ref().unwrap().model, "groq-llama-3.1-8b");
assert_eq!(r.routing.fast.as_ref().unwrap().model, "groq-gpt-oss-20b");
}

#[test]
Expand Down Expand Up @@ -269,14 +269,14 @@ mod tests {
// Load via the wrapper: should rewrite + save back.
let loaded = load_providers_with_migration(&path).expect("load");
assert_eq!(loaded.routing.coding.model, "deepseek-v4-flash");
assert_eq!(loaded.routing.fast.as_ref().unwrap().model, "groq-llama-3.1-8b");
assert_eq!(loaded.routing.fast.as_ref().unwrap().model, "groq-gpt-oss-20b");

// Read again with raw load_from_file — the file on disk must
// now hold the concrete names too.
let raw_reloaded = ProviderRegistry::load_from_file(&path).expect("reload");
assert_eq!(raw_reloaded.routing.coding.model, "deepseek-v4-flash");
assert_eq!(raw_reloaded.routing.reasoning.as_ref().unwrap().model, "deepseek-v4-pro");
assert_eq!(raw_reloaded.routing.judge.model, "groq-llama-3.3-70b");
assert_eq!(raw_reloaded.routing.judge.model, "groq-gpt-oss-120b");
}

#[test]
Expand All @@ -303,6 +303,35 @@ mod tests {
assert_eq!(coding.new, "deepseek-v4-flash");
let fast = rewrites.iter().find(|r| r.slot == "fast").expect("fast rewrite");
assert_eq!(fast.old, "smooth-fast");
assert_eq!(fast.new, "groq-llama-3.1-8b");
assert_eq!(fast.new, "groq-gpt-oss-20b");
}

/// SMOODEV-2097: a config that already ran the smooth-* migration is
/// pinned to the *concrete* Groq Llama names. The gateway then
/// removed those models, so the second migration step must bump them
/// to gpt-oss — even though they carry no `smooth-` prefix.
#[test]
fn migrate_bumps_already_migrated_groq_llama_to_gpt_oss() {
let mut r = ProviderRegistry::new().with_routing(ModelRouting {
coding: ModelSlot::new("smooai-gateway", "deepseek-v4-flash"),
reasoning: Some(ModelSlot::new("smooai-gateway", "deepseek-v4-pro")),
reviewing: ModelSlot::new("smooai-gateway", "minimax-m2.7-direct"),
judge: ModelSlot::new("smooai-gateway", "groq-llama-3.3-70b"),
summarize: ModelSlot::new("smooai-gateway", "gemini-2.5-flash"),
default: ModelSlot::new("smooai-gateway", "deepseek-v4-flash"),
fast: Some(ModelSlot::new("smooai-gateway", "groq-llama-3.1-8b")),
planning: None,
});
let rewrites = migrate_provider_registry(&mut r);
// Only judge + fast change; the rest were already live concrete
// names.
assert_eq!(rewrites.len(), 2, "rewrites = {rewrites:?}");
assert_eq!(r.routing.judge.model, "groq-gpt-oss-120b");
assert_eq!(r.routing.fast.as_ref().unwrap().model, "groq-gpt-oss-20b");
let judge = rewrites.iter().find(|r| r.slot == "judge").expect("judge rewrite");
assert_eq!(judge.old, "groq-llama-3.3-70b");
assert_eq!(judge.new, "groq-gpt-oss-120b");
// Idempotent: a second pass makes no further changes.
assert!(migrate_provider_registry(&mut r).is_empty());
}
}
24 changes: 12 additions & 12 deletions crates/smooth-code/src/model_picker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -668,16 +668,16 @@ pub fn fallback_catalog() -> &'static [(&'static str, ModelInfo)] {
},
},
),
// Pearl th-3468bd: judge + fast defaults now route to
// Groq Llama. Embedding the catalog entries here so the
// picker shows them with the right metadata even in
// offline mode.
// Pearl th-3468bd / SMOODEV-2097: judge + fast defaults route
// to Groq gpt-oss (the Llama aliases were removed at the
// gateway). Embedding the catalog entries here so the picker
// shows them with the right metadata even in offline mode.
(
"groq-llama-3.3-70b",
"groq-gpt-oss-120b",
ModelInfo {
use_cases: svec(&["judge", "guardrails", "reasoning", "fast"]),
tier: Tier::Fast,
description: "Llama 3.3-70B on Groq — strong judge, sub-second p95.".into(),
description: "gpt-oss-120B on Groq — strong judge, sub-second p95.".into(),
input_cost_per_token: 0.00000059,
output_cost_per_token: 0.00000079,
benchmarks: Benchmarks {
Expand All @@ -688,11 +688,11 @@ pub fn fallback_catalog() -> &'static [(&'static str, ModelInfo)] {
},
),
(
"groq-llama-3.1-8b",
"groq-gpt-oss-20b",
ModelInfo {
use_cases: svec(&["fast", "utility", "cheap"]),
tier: Tier::Utility,
description: "Llama 3.1-8B on Groq — sub-300ms utility, ~10x cheaper than Gemini Flash Lite.".into(),
description: "gpt-oss-20B on Groq — sub-300ms utility, cheap.".into(),
input_cost_per_token: 0.00000005,
output_cost_per_token: 0.00000008,
benchmarks: Benchmarks {
Expand Down Expand Up @@ -1386,7 +1386,7 @@ mod tests {
let coding = p.slots.iter().find(|s| s.slot == PickerSlot::Coding).expect("coding");
assert_eq!(coding.current_model, "deepseek-v4-flash", "coding slot post-migration");
let fast = p.slots.iter().find(|s| s.slot == PickerSlot::Fast).expect("fast");
assert_eq!(fast.current_model, "groq-llama-3.1-8b", "fast slot post-migration");
assert_eq!(fast.current_model, "groq-gpt-oss-20b", "fast slot post-migration");

// The on-disk file must also be rewritten so the migration
// only runs once per user.
Expand All @@ -1407,9 +1407,9 @@ mod tests {
"minimax-m2.7-direct",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
// Pearl th-3468bd: judge + fast slot defaults
"groq-llama-3.3-70b",
"groq-llama-3.1-8b",
// Pearl th-3468bd / SMOODEV-2097: judge + fast slot defaults
"groq-gpt-oss-120b",
"groq-gpt-oss-20b",
] {
assert!(names.contains(&required), "fallback catalog missing {required}");
}
Expand Down
82 changes: 65 additions & 17 deletions crates/smooth-policy/src/smooth_alias.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
//! | `smooth-coding` | `deepseek-v4-flash` |
//! | `smooth-reasoning` | `deepseek-v4-pro` |
//! | `smooth-reviewing` | `minimax-m2.7-direct` |
//! | `smooth-judge` | `gemini-2.5-flash` |
//! | `smooth-judge` | `groq-gpt-oss-120b` |
//! | `smooth-summarize` | `gemini-2.5-flash` |
//! | `smooth-fast` | `gemini-2.5-flash-lite` |
//! | `smooth-fast` | `groq-gpt-oss-20b` |
//! | `smooth-default` | (alias of coding) |
//! | `smooth-planning` (deprecated)| (alias of reasoning) |
//! | `smooth-thinking` (deprecated)| (alias of reasoning) |
Expand Down Expand Up @@ -61,18 +61,20 @@ impl SmoothSlot {
Self::Reasoning => "deepseek-v4-pro",
Self::Reviewing => "minimax-m2.7-direct",
// Pearl th-3468bd: judge runs once per dispatch and gates
// tool execution; an 8B's miss on adversarial paraphrase
// attacks costs more than the few hundred extra ms.
// Llama 3.3-70B on Groq is still sub-second p95 and well
// tool execution; a small model's miss on adversarial
// paraphrase attacks costs more than the few hundred extra
// ms. gpt-oss-120B on Groq is still sub-second p95 and well
// under Gemini Flash on cost, with substantially better
// refusal/jailbreak detection.
Self::Judge => "groq-llama-3.3-70b",
// refusal/jailbreak detection. (Replaces the deprecated
// groq-llama-3.3-70b alias removed at the gateway.)
Self::Judge => "groq-gpt-oss-120b",
// Summarize needs the 1M context window — gemini-2.5-flash
// stays.
Self::Summarize => "gemini-2.5-flash",
// Fast is utility (titles, autocomplete) — sub-300ms first
// token and ~10x cheaper than Gemini Flash Lite.
Self::Fast => "groq-llama-3.1-8b",
// token and cheap. (Replaces the deprecated groq-llama-3.1-8b
// alias removed at the gateway.)
Self::Fast => "groq-gpt-oss-20b",
}
}

Expand Down Expand Up @@ -112,6 +114,18 @@ pub const ALL_SLOTS: &[SmoothSlot] = &[
#[must_use]
pub fn migrate_alias(model: &str) -> Option<&'static str> {
let lower = model.to_ascii_lowercase();

// Deprecated *concrete* gateway models. Configs that already ran the
// smooth-* → concrete migration are pinned to a literal model name,
// so they never hit the `smooth-` branches below. The Groq Llama
// aliases (`groq-llama-3.3-70b`, `groq-llama-3.1-8b`) were removed at
// the gateway and replaced by gpt-oss; rewrite them here so an
// already-migrated config gets bumped to the live alias instead of
// 404ing on a dead model.
if let Some(replacement) = migrate_deprecated_concrete(&lower) {
return Some(replacement);
}

let stripped = lower.strip_prefix("smooth-")?;

// Exact slot aliases.
Expand Down Expand Up @@ -143,6 +157,25 @@ pub fn migrate_alias(model: &str) -> Option<&'static str> {
None
}

/// Map a deprecated *concrete* gateway model name to its live
/// replacement. Returns `None` for anything still valid.
///
/// This is a second migration step layered on top of the `smooth-*`
/// alias rewrite: the gateway removed the `groq-llama-3.3-70b` /
/// `groq-llama-3.1-8b` models (SMOODEV-2097) after configs had already
/// been migrated *onto* them, so a config can hold the literal dead name
/// with no `smooth-` prefix left to re-trigger the slot mapping. The
/// `input` is expected to be pre-lowercased by the caller.
fn migrate_deprecated_concrete(lower: &str) -> Option<&'static str> {
match lower {
// Judge slot — the removed 70B Llama → gpt-oss-120B.
"groq-llama-3.3-70b" => Some("groq-gpt-oss-120b"),
// Fast slot — the removed 8B Llama → gpt-oss-20B.
"groq-llama-3.1-8b" => Some("groq-gpt-oss-20b"),
_ => None,
}
}

fn match_slot_exact(stripped: &str) -> Option<SmoothSlot> {
Some(match stripped {
"coding" => SmoothSlot::Coding,
Expand Down Expand Up @@ -185,9 +218,9 @@ mod tests {
assert_eq!(migrate_alias("smooth-coding"), Some("deepseek-v4-flash"));
assert_eq!(migrate_alias("smooth-reasoning"), Some("deepseek-v4-pro"));
assert_eq!(migrate_alias("smooth-reviewing"), Some("minimax-m2.7-direct"));
assert_eq!(migrate_alias("smooth-judge"), Some("groq-llama-3.3-70b"));
assert_eq!(migrate_alias("smooth-judge"), Some("groq-gpt-oss-120b"));
assert_eq!(migrate_alias("smooth-summarize"), Some("gemini-2.5-flash"));
assert_eq!(migrate_alias("smooth-fast"), Some("groq-llama-3.1-8b"));
assert_eq!(migrate_alias("smooth-fast"), Some("groq-gpt-oss-20b"));
assert_eq!(migrate_alias("smooth-default"), Some("deepseek-v4-flash"));
}

Expand All @@ -200,12 +233,12 @@ mod tests {

#[test]
fn sub_aliases_map_to_slot_concrete_default() {
assert_eq!(migrate_alias("smooth-fast-gemini"), Some("groq-llama-3.1-8b"));
assert_eq!(migrate_alias("smooth-fast-haiku"), Some("groq-llama-3.1-8b"));
assert_eq!(migrate_alias("smooth-fast-gpt"), Some("groq-llama-3.1-8b"));
assert_eq!(migrate_alias("smooth-judge-gemini"), Some("groq-llama-3.3-70b"));
assert_eq!(migrate_alias("smooth-judge-haiku"), Some("groq-llama-3.3-70b"));
assert_eq!(migrate_alias("smooth-judge-gpt"), Some("groq-llama-3.3-70b"));
assert_eq!(migrate_alias("smooth-fast-gemini"), Some("groq-gpt-oss-20b"));
assert_eq!(migrate_alias("smooth-fast-haiku"), Some("groq-gpt-oss-20b"));
assert_eq!(migrate_alias("smooth-fast-gpt"), Some("groq-gpt-oss-20b"));
assert_eq!(migrate_alias("smooth-judge-gemini"), Some("groq-gpt-oss-120b"));
assert_eq!(migrate_alias("smooth-judge-haiku"), Some("groq-gpt-oss-120b"));
assert_eq!(migrate_alias("smooth-judge-gpt"), Some("groq-gpt-oss-120b"));
assert_eq!(migrate_alias("smooth-summarize-gemini"), Some("gemini-2.5-flash"));
assert_eq!(migrate_alias("smooth-summarize-gpt"), Some("gemini-2.5-flash"));
assert_eq!(migrate_alias("smooth-summarize-qwen"), Some("gemini-2.5-flash"));
Expand All @@ -220,6 +253,21 @@ mod tests {
assert_eq!(migrate_alias("smooth-reviewing-qwen-coder"), Some("minimax-m2.7-direct"));
}

#[test]
fn deprecated_concrete_groq_models_migrate_to_gpt_oss() {
// SMOODEV-2097: the gateway removed the Groq Llama models that
// earlier migrations had already pinned configs onto. A config
// holding the literal dead name (no `smooth-` prefix) must still
// get bumped to the live gpt-oss alias.
assert_eq!(migrate_alias("groq-llama-3.3-70b"), Some("groq-gpt-oss-120b"));
assert_eq!(migrate_alias("groq-llama-3.1-8b"), Some("groq-gpt-oss-20b"));
// Case-insensitive, matching the rest of the lookup.
assert_eq!(migrate_alias("GROQ-LLAMA-3.3-70B"), Some("groq-gpt-oss-120b"));
// The live gpt-oss names are not themselves deprecated.
assert_eq!(migrate_alias("groq-gpt-oss-120b"), None);
assert_eq!(migrate_alias("groq-gpt-oss-20b"), None);
}

#[test]
fn unknown_aliases_return_none() {
// `smooth-` prefix but unknown slot name.
Expand Down
Loading