diff --git a/crates/smooth-cast/src/provider_migration.rs b/crates/smooth-cast/src/provider_migration.rs index e3360f04..00bad432 100644 --- a/crates/smooth-cast/src/provider_migration.rs +++ b/crates/smooth-cast/src/provider_migration.rs @@ -191,10 +191,10 @@ mod tests { assert_eq!(r.routing.coding.model, "deepseek-v4-flash"); assert_eq!(r.routing.reasoning.as_ref().unwrap().model, "deepseek-v4-pro"); assert_eq!(r.routing.reviewing.model, "minimax-m2.7-direct"); - assert_eq!(r.routing.judge.model, "groq-llama-3.3-70b"); + assert_eq!(r.routing.judge.model, "groq-gpt-oss-120b"); assert_eq!(r.routing.summarize.model, "gemini-2.5-flash"); assert_eq!(r.routing.default.model, "deepseek-v4-flash"); - assert_eq!(r.routing.fast.as_ref().unwrap().model, "groq-llama-3.1-8b"); + assert_eq!(r.routing.fast.as_ref().unwrap().model, "groq-gpt-oss-20b"); } #[test] @@ -269,14 +269,14 @@ mod tests { // Load via the wrapper: should rewrite + save back. let loaded = load_providers_with_migration(&path).expect("load"); assert_eq!(loaded.routing.coding.model, "deepseek-v4-flash"); - assert_eq!(loaded.routing.fast.as_ref().unwrap().model, "groq-llama-3.1-8b"); + assert_eq!(loaded.routing.fast.as_ref().unwrap().model, "groq-gpt-oss-20b"); // Read again with raw load_from_file — the file on disk must // now hold the concrete names too. let raw_reloaded = ProviderRegistry::load_from_file(&path).expect("reload"); assert_eq!(raw_reloaded.routing.coding.model, "deepseek-v4-flash"); assert_eq!(raw_reloaded.routing.reasoning.as_ref().unwrap().model, "deepseek-v4-pro"); - assert_eq!(raw_reloaded.routing.judge.model, "groq-llama-3.3-70b"); + assert_eq!(raw_reloaded.routing.judge.model, "groq-gpt-oss-120b"); } #[test] @@ -303,6 +303,35 @@ mod tests { assert_eq!(coding.new, "deepseek-v4-flash"); let fast = rewrites.iter().find(|r| r.slot == "fast").expect("fast rewrite"); assert_eq!(fast.old, "smooth-fast"); - assert_eq!(fast.new, "groq-llama-3.1-8b"); + assert_eq!(fast.new, "groq-gpt-oss-20b"); + } + + /// SMOODEV-2097: a config that already ran the smooth-* migration is + /// pinned to the *concrete* Groq Llama names. The gateway then + /// removed those models, so the second migration step must bump them + /// to gpt-oss — even though they carry no `smooth-` prefix. + #[test] + fn migrate_bumps_already_migrated_groq_llama_to_gpt_oss() { + let mut r = ProviderRegistry::new().with_routing(ModelRouting { + coding: ModelSlot::new("smooai-gateway", "deepseek-v4-flash"), + reasoning: Some(ModelSlot::new("smooai-gateway", "deepseek-v4-pro")), + reviewing: ModelSlot::new("smooai-gateway", "minimax-m2.7-direct"), + judge: ModelSlot::new("smooai-gateway", "groq-llama-3.3-70b"), + summarize: ModelSlot::new("smooai-gateway", "gemini-2.5-flash"), + default: ModelSlot::new("smooai-gateway", "deepseek-v4-flash"), + fast: Some(ModelSlot::new("smooai-gateway", "groq-llama-3.1-8b")), + planning: None, + }); + let rewrites = migrate_provider_registry(&mut r); + // Only judge + fast change; the rest were already live concrete + // names. + assert_eq!(rewrites.len(), 2, "rewrites = {rewrites:?}"); + assert_eq!(r.routing.judge.model, "groq-gpt-oss-120b"); + assert_eq!(r.routing.fast.as_ref().unwrap().model, "groq-gpt-oss-20b"); + let judge = rewrites.iter().find(|r| r.slot == "judge").expect("judge rewrite"); + assert_eq!(judge.old, "groq-llama-3.3-70b"); + assert_eq!(judge.new, "groq-gpt-oss-120b"); + // Idempotent: a second pass makes no further changes. + assert!(migrate_provider_registry(&mut r).is_empty()); } } diff --git a/crates/smooth-code/src/model_picker.rs b/crates/smooth-code/src/model_picker.rs index a9944ca9..57823415 100644 --- a/crates/smooth-code/src/model_picker.rs +++ b/crates/smooth-code/src/model_picker.rs @@ -668,16 +668,16 @@ pub fn fallback_catalog() -> &'static [(&'static str, ModelInfo)] { }, }, ), - // Pearl th-3468bd: judge + fast defaults now route to - // Groq Llama. Embedding the catalog entries here so the - // picker shows them with the right metadata even in - // offline mode. + // Pearl th-3468bd / SMOODEV-2097: judge + fast defaults route + // to Groq gpt-oss (the Llama aliases were removed at the + // gateway). Embedding the catalog entries here so the picker + // shows them with the right metadata even in offline mode. ( - "groq-llama-3.3-70b", + "groq-gpt-oss-120b", ModelInfo { use_cases: svec(&["judge", "guardrails", "reasoning", "fast"]), tier: Tier::Fast, - description: "Llama 3.3-70B on Groq — strong judge, sub-second p95.".into(), + description: "gpt-oss-120B on Groq — strong judge, sub-second p95.".into(), input_cost_per_token: 0.00000059, output_cost_per_token: 0.00000079, benchmarks: Benchmarks { @@ -688,11 +688,11 @@ pub fn fallback_catalog() -> &'static [(&'static str, ModelInfo)] { }, ), ( - "groq-llama-3.1-8b", + "groq-gpt-oss-20b", ModelInfo { use_cases: svec(&["fast", "utility", "cheap"]), tier: Tier::Utility, - description: "Llama 3.1-8B on Groq — sub-300ms utility, ~10x cheaper than Gemini Flash Lite.".into(), + description: "gpt-oss-20B on Groq — sub-300ms utility, cheap.".into(), input_cost_per_token: 0.00000005, output_cost_per_token: 0.00000008, benchmarks: Benchmarks { @@ -1386,7 +1386,7 @@ mod tests { let coding = p.slots.iter().find(|s| s.slot == PickerSlot::Coding).expect("coding"); assert_eq!(coding.current_model, "deepseek-v4-flash", "coding slot post-migration"); let fast = p.slots.iter().find(|s| s.slot == PickerSlot::Fast).expect("fast"); - assert_eq!(fast.current_model, "groq-llama-3.1-8b", "fast slot post-migration"); + assert_eq!(fast.current_model, "groq-gpt-oss-20b", "fast slot post-migration"); // The on-disk file must also be rewritten so the migration // only runs once per user. @@ -1407,9 +1407,9 @@ mod tests { "minimax-m2.7-direct", "gemini-2.5-flash", "gemini-2.5-flash-lite", - // Pearl th-3468bd: judge + fast slot defaults - "groq-llama-3.3-70b", - "groq-llama-3.1-8b", + // Pearl th-3468bd / SMOODEV-2097: judge + fast slot defaults + "groq-gpt-oss-120b", + "groq-gpt-oss-20b", ] { assert!(names.contains(&required), "fallback catalog missing {required}"); } diff --git a/crates/smooth-policy/src/smooth_alias.rs b/crates/smooth-policy/src/smooth_alias.rs index 6e05e09b..50c0166e 100644 --- a/crates/smooth-policy/src/smooth_alias.rs +++ b/crates/smooth-policy/src/smooth_alias.rs @@ -20,9 +20,9 @@ //! | `smooth-coding` | `deepseek-v4-flash` | //! | `smooth-reasoning` | `deepseek-v4-pro` | //! | `smooth-reviewing` | `minimax-m2.7-direct` | -//! | `smooth-judge` | `gemini-2.5-flash` | +//! | `smooth-judge` | `groq-gpt-oss-120b` | //! | `smooth-summarize` | `gemini-2.5-flash` | -//! | `smooth-fast` | `gemini-2.5-flash-lite` | +//! | `smooth-fast` | `groq-gpt-oss-20b` | //! | `smooth-default` | (alias of coding) | //! | `smooth-planning` (deprecated)| (alias of reasoning) | //! | `smooth-thinking` (deprecated)| (alias of reasoning) | @@ -61,18 +61,20 @@ impl SmoothSlot { Self::Reasoning => "deepseek-v4-pro", Self::Reviewing => "minimax-m2.7-direct", // Pearl th-3468bd: judge runs once per dispatch and gates - // tool execution; an 8B's miss on adversarial paraphrase - // attacks costs more than the few hundred extra ms. - // Llama 3.3-70B on Groq is still sub-second p95 and well + // tool execution; a small model's miss on adversarial + // paraphrase attacks costs more than the few hundred extra + // ms. gpt-oss-120B on Groq is still sub-second p95 and well // under Gemini Flash on cost, with substantially better - // refusal/jailbreak detection. - Self::Judge => "groq-llama-3.3-70b", + // refusal/jailbreak detection. (Replaces the deprecated + // groq-llama-3.3-70b alias removed at the gateway.) + Self::Judge => "groq-gpt-oss-120b", // Summarize needs the 1M context window — gemini-2.5-flash // stays. Self::Summarize => "gemini-2.5-flash", // Fast is utility (titles, autocomplete) — sub-300ms first - // token and ~10x cheaper than Gemini Flash Lite. - Self::Fast => "groq-llama-3.1-8b", + // token and cheap. (Replaces the deprecated groq-llama-3.1-8b + // alias removed at the gateway.) + Self::Fast => "groq-gpt-oss-20b", } } @@ -112,6 +114,18 @@ pub const ALL_SLOTS: &[SmoothSlot] = &[ #[must_use] pub fn migrate_alias(model: &str) -> Option<&'static str> { let lower = model.to_ascii_lowercase(); + + // Deprecated *concrete* gateway models. Configs that already ran the + // smooth-* → concrete migration are pinned to a literal model name, + // so they never hit the `smooth-` branches below. The Groq Llama + // aliases (`groq-llama-3.3-70b`, `groq-llama-3.1-8b`) were removed at + // the gateway and replaced by gpt-oss; rewrite them here so an + // already-migrated config gets bumped to the live alias instead of + // 404ing on a dead model. + if let Some(replacement) = migrate_deprecated_concrete(&lower) { + return Some(replacement); + } + let stripped = lower.strip_prefix("smooth-")?; // Exact slot aliases. @@ -143,6 +157,25 @@ pub fn migrate_alias(model: &str) -> Option<&'static str> { None } +/// Map a deprecated *concrete* gateway model name to its live +/// replacement. Returns `None` for anything still valid. +/// +/// This is a second migration step layered on top of the `smooth-*` +/// alias rewrite: the gateway removed the `groq-llama-3.3-70b` / +/// `groq-llama-3.1-8b` models (SMOODEV-2097) after configs had already +/// been migrated *onto* them, so a config can hold the literal dead name +/// with no `smooth-` prefix left to re-trigger the slot mapping. The +/// `input` is expected to be pre-lowercased by the caller. +fn migrate_deprecated_concrete(lower: &str) -> Option<&'static str> { + match lower { + // Judge slot — the removed 70B Llama → gpt-oss-120B. + "groq-llama-3.3-70b" => Some("groq-gpt-oss-120b"), + // Fast slot — the removed 8B Llama → gpt-oss-20B. + "groq-llama-3.1-8b" => Some("groq-gpt-oss-20b"), + _ => None, + } +} + fn match_slot_exact(stripped: &str) -> Option { Some(match stripped { "coding" => SmoothSlot::Coding, @@ -185,9 +218,9 @@ mod tests { assert_eq!(migrate_alias("smooth-coding"), Some("deepseek-v4-flash")); assert_eq!(migrate_alias("smooth-reasoning"), Some("deepseek-v4-pro")); assert_eq!(migrate_alias("smooth-reviewing"), Some("minimax-m2.7-direct")); - assert_eq!(migrate_alias("smooth-judge"), Some("groq-llama-3.3-70b")); + assert_eq!(migrate_alias("smooth-judge"), Some("groq-gpt-oss-120b")); assert_eq!(migrate_alias("smooth-summarize"), Some("gemini-2.5-flash")); - assert_eq!(migrate_alias("smooth-fast"), Some("groq-llama-3.1-8b")); + assert_eq!(migrate_alias("smooth-fast"), Some("groq-gpt-oss-20b")); assert_eq!(migrate_alias("smooth-default"), Some("deepseek-v4-flash")); } @@ -200,12 +233,12 @@ mod tests { #[test] fn sub_aliases_map_to_slot_concrete_default() { - assert_eq!(migrate_alias("smooth-fast-gemini"), Some("groq-llama-3.1-8b")); - assert_eq!(migrate_alias("smooth-fast-haiku"), Some("groq-llama-3.1-8b")); - assert_eq!(migrate_alias("smooth-fast-gpt"), Some("groq-llama-3.1-8b")); - assert_eq!(migrate_alias("smooth-judge-gemini"), Some("groq-llama-3.3-70b")); - assert_eq!(migrate_alias("smooth-judge-haiku"), Some("groq-llama-3.3-70b")); - assert_eq!(migrate_alias("smooth-judge-gpt"), Some("groq-llama-3.3-70b")); + assert_eq!(migrate_alias("smooth-fast-gemini"), Some("groq-gpt-oss-20b")); + assert_eq!(migrate_alias("smooth-fast-haiku"), Some("groq-gpt-oss-20b")); + assert_eq!(migrate_alias("smooth-fast-gpt"), Some("groq-gpt-oss-20b")); + assert_eq!(migrate_alias("smooth-judge-gemini"), Some("groq-gpt-oss-120b")); + assert_eq!(migrate_alias("smooth-judge-haiku"), Some("groq-gpt-oss-120b")); + assert_eq!(migrate_alias("smooth-judge-gpt"), Some("groq-gpt-oss-120b")); assert_eq!(migrate_alias("smooth-summarize-gemini"), Some("gemini-2.5-flash")); assert_eq!(migrate_alias("smooth-summarize-gpt"), Some("gemini-2.5-flash")); assert_eq!(migrate_alias("smooth-summarize-qwen"), Some("gemini-2.5-flash")); @@ -220,6 +253,21 @@ mod tests { assert_eq!(migrate_alias("smooth-reviewing-qwen-coder"), Some("minimax-m2.7-direct")); } + #[test] + fn deprecated_concrete_groq_models_migrate_to_gpt_oss() { + // SMOODEV-2097: the gateway removed the Groq Llama models that + // earlier migrations had already pinned configs onto. A config + // holding the literal dead name (no `smooth-` prefix) must still + // get bumped to the live gpt-oss alias. + assert_eq!(migrate_alias("groq-llama-3.3-70b"), Some("groq-gpt-oss-120b")); + assert_eq!(migrate_alias("groq-llama-3.1-8b"), Some("groq-gpt-oss-20b")); + // Case-insensitive, matching the rest of the lookup. + assert_eq!(migrate_alias("GROQ-LLAMA-3.3-70B"), Some("groq-gpt-oss-120b")); + // The live gpt-oss names are not themselves deprecated. + assert_eq!(migrate_alias("groq-gpt-oss-120b"), None); + assert_eq!(migrate_alias("groq-gpt-oss-20b"), None); + } + #[test] fn unknown_aliases_return_none() { // `smooth-` prefix but unknown slot name.