@@ -253,19 +253,31 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
253253 ProviderKind :: Ollama
254254}
255255
256+ #[ must_use]
257+ pub const fn model_family_identity_for_kind ( kind : ProviderKind ) -> runtime:: ModelFamilyIdentity {
258+ match kind {
259+ ProviderKind :: Anthropic => runtime:: ModelFamilyIdentity :: Claude ,
260+ ProviderKind :: Xai | ProviderKind :: OpenAi => runtime:: ModelFamilyIdentity :: Generic ,
261+ }
262+ }
263+
264+ #[ must_use]
265+ pub fn model_family_identity_for ( model : & str ) -> runtime:: ModelFamilyIdentity {
266+ model_family_identity_for_kind ( detect_provider_kind ( model) )
267+ }
268+
256269#[ must_use]
257270pub fn max_tokens_for_model ( model : & str ) -> u32 {
258- model_token_limit ( model) . map_or_else (
259- || {
260- let canonical = resolve_model_alias ( model) ;
261- if canonical. contains ( "opus" ) {
262- 32_000
263- } else {
264- 64_000
265- }
266- } ,
267- |limit| limit. max_output_tokens ,
268- )
271+ let canonical = resolve_model_alias ( model) ;
272+ let heuristic = if canonical. contains ( "opus" ) {
273+ 32_000
274+ } else {
275+ 64_000
276+ } ;
277+
278+ model_token_limit ( model)
279+ . map ( |limit| heuristic. min ( limit. max_output_tokens ) )
280+ . unwrap_or ( heuristic)
269281}
270282
271283/// Returns the effective max output tokens for a model, preferring a plugin
@@ -279,7 +291,8 @@ pub fn max_tokens_for_model_with_override(model: &str, plugin_override: Option<u
279291#[ must_use]
280292pub fn model_token_limit ( model : & str ) -> Option < ModelTokenLimit > {
281293 let canonical = resolve_model_alias ( model) ;
282- match canonical. as_str ( ) {
294+ let base_model = canonical. rsplit ( '/' ) . next ( ) . unwrap_or ( canonical. as_str ( ) ) ;
295+ match base_model {
283296 "claude-opus-4-6" => Some ( ModelTokenLimit {
284297 max_output_tokens : 32_000 ,
285298 context_window_tokens : 200_000 ,
@@ -292,6 +305,20 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
292305 max_output_tokens : 64_000 ,
293306 context_window_tokens : 131_072 ,
294307 } ) ,
308+ // GPT-4.1 family via the OpenAI API.
309+ "gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" => Some ( ModelTokenLimit {
310+ max_output_tokens : 32_768 ,
311+ context_window_tokens : 1_047_576 ,
312+ } ) ,
313+ // GPT-5.4 family via the OpenAI API.
314+ "gpt-5.4" => Some ( ModelTokenLimit {
315+ max_output_tokens : 128_000 ,
316+ context_window_tokens : 1_000_000 ,
317+ } ) ,
318+ "gpt-5.4-mini" | "gpt-5.4-nano" => Some ( ModelTokenLimit {
319+ max_output_tokens : 128_000 ,
320+ context_window_tokens : 400_000 ,
321+ } ) ,
295322 // Kimi models via DashScope (Moonshot AI)
296323 // Source: https://platform.moonshot.cn/docs/intro
297324 "kimi-k2.5" | "kimi-k1.5" => Some ( ModelTokenLimit {
@@ -473,8 +500,8 @@ mod tests {
473500 use super :: {
474501 anthropic_missing_credentials, anthropic_missing_credentials_hint, detect_provider_kind,
475502 load_dotenv_file, max_tokens_for_model, max_tokens_for_model_with_override,
476- model_token_limit , parse_dotenv , preflight_message_request , resolve_model_alias ,
477- ProviderKind ,
503+ model_family_identity_for , model_family_identity_for_kind , model_token_limit , parse_dotenv ,
504+ preflight_message_request , resolve_model_alias , ProviderKind ,
478505 } ;
479506
480507 /// Serializes every test in this module that mutates process-wide
@@ -533,6 +560,42 @@ mod tests {
533560 ) ;
534561 }
535562
563+ #[ test]
564+ fn maps_provider_kind_to_model_family_identity ( ) {
565+ // given: each supported provider kind
566+ let anthropic = ProviderKind :: Anthropic ;
567+ let openai = ProviderKind :: OpenAi ;
568+ let xai = ProviderKind :: Xai ;
569+
570+ // when: converting provider kinds to prompt model family identities
571+ let anthropic_identity = model_family_identity_for_kind ( anthropic) ;
572+ let openai_identity = model_family_identity_for_kind ( openai) ;
573+ let xai_identity = model_family_identity_for_kind ( xai) ;
574+
575+ // then: Anthropic stays Claude and OpenAI-compatible providers are generic
576+ assert_eq ! ( anthropic_identity, runtime:: ModelFamilyIdentity :: Claude ) ;
577+ assert_eq ! ( openai_identity, runtime:: ModelFamilyIdentity :: Generic ) ;
578+ assert_eq ! ( xai_identity, runtime:: ModelFamilyIdentity :: Generic ) ;
579+ }
580+
581+ #[ test]
582+ fn maps_model_name_to_model_family_identity ( ) {
583+ // given: Anthropic, OpenAI-compatible, and xAI model names
584+ let claude_model = "claude-opus-4-6" ;
585+ let openai_model = "openai/gpt-4.1-mini" ;
586+ let xai_model = "grok-3" ;
587+
588+ // when: detecting prompt model family identities from model names
589+ let claude_identity = model_family_identity_for ( claude_model) ;
590+ let openai_identity = model_family_identity_for ( openai_model) ;
591+ let xai_identity = model_family_identity_for ( xai_model) ;
592+
593+ // then: Anthropic stays Claude and OpenAI-compatible providers are generic
594+ assert_eq ! ( claude_identity, runtime:: ModelFamilyIdentity :: Claude ) ;
595+ assert_eq ! ( openai_identity, runtime:: ModelFamilyIdentity :: Generic ) ;
596+ assert_eq ! ( xai_identity, runtime:: ModelFamilyIdentity :: Generic ) ;
597+ }
598+
536599 #[ test]
537600 fn openai_namespaced_model_routes_to_openai_not_anthropic ( ) {
538601 // Regression: "openai/gpt-4.1-mini" was misrouted to Anthropic when
@@ -617,6 +680,15 @@ mod tests {
617680 fn keeps_existing_max_token_heuristic ( ) {
618681 assert_eq ! ( max_tokens_for_model( "opus" ) , 32_000 ) ;
619682 assert_eq ! ( max_tokens_for_model( "grok-3" ) , 64_000 ) ;
683+ assert_eq ! ( max_tokens_for_model( "gpt-5.4" ) , 64_000 ) ;
684+ }
685+
686+ #[ test]
687+ fn caps_default_max_tokens_to_openai_model_limits ( ) {
688+ assert_eq ! ( max_tokens_for_model( "gpt-4.1-mini" ) , 32_768 ) ;
689+ assert_eq ! ( max_tokens_for_model( "openai/gpt-4.1-mini" ) , 32_768 ) ;
690+ assert_eq ! ( max_tokens_for_model( "gpt-5.4" ) , 64_000 ) ;
691+ assert_eq ! ( max_tokens_for_model( "openai/gpt-5.4" ) , 64_000 ) ;
620692 }
621693
622694 #[ test]
@@ -683,6 +755,18 @@ mod tests {
683755 . context_window_tokens,
684756 131_072
685757 ) ;
758+ assert_eq ! (
759+ model_token_limit( "openai/gpt-4.1-mini" )
760+ . expect( "openai/gpt-4.1-mini should be registered" )
761+ . context_window_tokens,
762+ 1_047_576
763+ ) ;
764+ assert_eq ! (
765+ model_token_limit( "gpt-5.4" )
766+ . expect( "gpt-5.4 should be registered" )
767+ . context_window_tokens,
768+ 1_000_000
769+ ) ;
686770 }
687771
688772 #[ test]
@@ -731,6 +815,42 @@ mod tests {
731815 }
732816 }
733817
818+ #[ test]
819+ fn preflight_blocks_oversized_requests_for_gpt_5_4 ( ) {
820+ let request = MessageRequest {
821+ model : "gpt-5.4" . to_string ( ) ,
822+ max_tokens : 64_000 ,
823+ messages : vec ! [ InputMessage {
824+ role: "user" . to_string( ) ,
825+ content: vec![ InputContentBlock :: Text {
826+ text: "x" . repeat( 3_900_000 ) ,
827+ } ] ,
828+ } ] ,
829+ system : Some ( "Keep the answer short." . to_string ( ) ) ,
830+ tools : None ,
831+ tool_choice : None ,
832+ stream : true ,
833+ ..Default :: default ( )
834+ } ;
835+
836+ let error = preflight_message_request ( & request)
837+ . expect_err ( "oversized gpt-5.4 request should be rejected before the provider call" ) ;
838+
839+ match error {
840+ ApiError :: ContextWindowExceeded {
841+ model,
842+ requested_output_tokens,
843+ context_window_tokens,
844+ ..
845+ } => {
846+ assert_eq ! ( model, "gpt-5.4" ) ;
847+ assert_eq ! ( requested_output_tokens, 64_000 ) ;
848+ assert_eq ! ( context_window_tokens, 1_000_000 ) ;
849+ }
850+ other => panic ! ( "expected context-window preflight failure, got {other:?}" ) ,
851+ }
852+ }
853+
734854 #[ test]
735855 fn preflight_skips_unknown_models ( ) {
736856 let request = MessageRequest {
0 commit comments