Skip to content

Commit f117d82

Browse files
Merge remote-tracking branch 'upstream/main' into dev
2 parents 4d67959 + 357629d commit f117d82

20 files changed

Lines changed: 1637 additions & 124 deletions

File tree

ROADMAP.md

Lines changed: 36 additions & 0 deletions
Large diffs are not rendered by default.

rust/crates/api/src/error.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ const CONTEXT_WINDOW_ERROR_MARKERS: &[&str] = &[
1414
"too many tokens",
1515
"prompt is too long",
1616
"input is too long",
17+
"input tokens exceed",
18+
"configured limit",
19+
"messages resulted in",
20+
"completion tokens",
21+
"prompt tokens",
1722
"request is too large",
1823
];
1924

@@ -542,6 +547,26 @@ mod tests {
542547
assert_eq!(error.request_id(), Some("req_ctx_123"));
543548
}
544549

550+
#[test]
551+
fn classifies_openai_configured_limit_errors_as_context_window_failures() {
552+
let error = ApiError::Api {
553+
status: reqwest::StatusCode::BAD_REQUEST,
554+
error_type: Some("invalid_request_error".to_string()),
555+
message: Some(
556+
"Input tokens exceed the configured limit of 922000 tokens. Your messages resulted in 1860900 tokens. Please reduce the length of the messages."
557+
.to_string(),
558+
),
559+
request_id: Some("req_ctx_openai_123".to_string()),
560+
body: String::new(),
561+
retryable: false,
562+
suggested_action: None,
563+
};
564+
565+
assert!(error.is_context_window_failure());
566+
assert_eq!(error.safe_failure_class(), "context_window");
567+
assert_eq!(error.request_id(), Some("req_ctx_openai_123"));
568+
}
569+
545570
#[test]
546571
fn missing_credentials_without_hint_renders_the_canonical_message() {
547572
// given

rust/crates/api/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@ pub use prompt_cache::{
2121
pub use providers::anthropic::{AnthropicClient, AnthropicClient as ApiClient, AuthSource};
2222
pub use providers::openai_compat::{
2323
build_chat_completion_request, flatten_tool_result_content, is_reasoning_model,
24-
model_rejects_is_error_field, translate_message, OpenAiCompatClient, OpenAiCompatConfig,
24+
model_rejects_is_error_field, model_requires_reasoning_content_in_history, translate_message,
25+
OpenAiCompatClient, OpenAiCompatConfig,
2526
};
2627
pub use providers::{
2728
detect_provider_kind, max_tokens_for_model, max_tokens_for_model_with_override,
28-
resolve_model_alias, ProviderKind,
29+
model_family_identity_for, model_family_identity_for_kind, resolve_model_alias, ProviderKind,
2930
};
3031
pub use sse::{parse_frame, SseParser};
3132
pub use types::{

rust/crates/api/src/providers/mod.rs

Lines changed: 134 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -253,19 +253,31 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
253253
ProviderKind::Ollama
254254
}
255255

256+
#[must_use]
257+
pub const fn model_family_identity_for_kind(kind: ProviderKind) -> runtime::ModelFamilyIdentity {
258+
match kind {
259+
ProviderKind::Anthropic => runtime::ModelFamilyIdentity::Claude,
260+
ProviderKind::Xai | ProviderKind::OpenAi => runtime::ModelFamilyIdentity::Generic,
261+
}
262+
}
263+
264+
#[must_use]
265+
pub fn model_family_identity_for(model: &str) -> runtime::ModelFamilyIdentity {
266+
model_family_identity_for_kind(detect_provider_kind(model))
267+
}
268+
256269
#[must_use]
257270
pub fn max_tokens_for_model(model: &str) -> u32 {
258-
model_token_limit(model).map_or_else(
259-
|| {
260-
let canonical = resolve_model_alias(model);
261-
if canonical.contains("opus") {
262-
32_000
263-
} else {
264-
64_000
265-
}
266-
},
267-
|limit| limit.max_output_tokens,
268-
)
271+
let canonical = resolve_model_alias(model);
272+
let heuristic = if canonical.contains("opus") {
273+
32_000
274+
} else {
275+
64_000
276+
};
277+
278+
model_token_limit(model)
279+
.map(|limit| heuristic.min(limit.max_output_tokens))
280+
.unwrap_or(heuristic)
269281
}
270282

271283
/// Returns the effective max output tokens for a model, preferring a plugin
@@ -279,7 +291,8 @@ pub fn max_tokens_for_model_with_override(model: &str, plugin_override: Option<u
279291
#[must_use]
280292
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
281293
let canonical = resolve_model_alias(model);
282-
match canonical.as_str() {
294+
let base_model = canonical.rsplit('/').next().unwrap_or(canonical.as_str());
295+
match base_model {
283296
"claude-opus-4-6" => Some(ModelTokenLimit {
284297
max_output_tokens: 32_000,
285298
context_window_tokens: 200_000,
@@ -292,6 +305,20 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
292305
max_output_tokens: 64_000,
293306
context_window_tokens: 131_072,
294307
}),
308+
// GPT-4.1 family via the OpenAI API.
309+
"gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" => Some(ModelTokenLimit {
310+
max_output_tokens: 32_768,
311+
context_window_tokens: 1_047_576,
312+
}),
313+
// GPT-5.4 family via the OpenAI API.
314+
"gpt-5.4" => Some(ModelTokenLimit {
315+
max_output_tokens: 128_000,
316+
context_window_tokens: 1_000_000,
317+
}),
318+
"gpt-5.4-mini" | "gpt-5.4-nano" => Some(ModelTokenLimit {
319+
max_output_tokens: 128_000,
320+
context_window_tokens: 400_000,
321+
}),
295322
// Kimi models via DashScope (Moonshot AI)
296323
// Source: https://platform.moonshot.cn/docs/intro
297324
"kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
@@ -473,8 +500,8 @@ mod tests {
473500
use super::{
474501
anthropic_missing_credentials, anthropic_missing_credentials_hint, detect_provider_kind,
475502
load_dotenv_file, max_tokens_for_model, max_tokens_for_model_with_override,
476-
model_token_limit, parse_dotenv, preflight_message_request, resolve_model_alias,
477-
ProviderKind,
503+
model_family_identity_for, model_family_identity_for_kind, model_token_limit, parse_dotenv,
504+
preflight_message_request, resolve_model_alias, ProviderKind,
478505
};
479506

480507
/// Serializes every test in this module that mutates process-wide
@@ -533,6 +560,42 @@ mod tests {
533560
);
534561
}
535562

563+
#[test]
564+
fn maps_provider_kind_to_model_family_identity() {
565+
// given: each supported provider kind
566+
let anthropic = ProviderKind::Anthropic;
567+
let openai = ProviderKind::OpenAi;
568+
let xai = ProviderKind::Xai;
569+
570+
// when: converting provider kinds to prompt model family identities
571+
let anthropic_identity = model_family_identity_for_kind(anthropic);
572+
let openai_identity = model_family_identity_for_kind(openai);
573+
let xai_identity = model_family_identity_for_kind(xai);
574+
575+
// then: Anthropic stays Claude and OpenAI-compatible providers are generic
576+
assert_eq!(anthropic_identity, runtime::ModelFamilyIdentity::Claude);
577+
assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
578+
assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
579+
}
580+
581+
#[test]
582+
fn maps_model_name_to_model_family_identity() {
583+
// given: Anthropic, OpenAI-compatible, and xAI model names
584+
let claude_model = "claude-opus-4-6";
585+
let openai_model = "openai/gpt-4.1-mini";
586+
let xai_model = "grok-3";
587+
588+
// when: detecting prompt model family identities from model names
589+
let claude_identity = model_family_identity_for(claude_model);
590+
let openai_identity = model_family_identity_for(openai_model);
591+
let xai_identity = model_family_identity_for(xai_model);
592+
593+
// then: Anthropic stays Claude and OpenAI-compatible providers are generic
594+
assert_eq!(claude_identity, runtime::ModelFamilyIdentity::Claude);
595+
assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
596+
assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
597+
}
598+
536599
#[test]
537600
fn openai_namespaced_model_routes_to_openai_not_anthropic() {
538601
// Regression: "openai/gpt-4.1-mini" was misrouted to Anthropic when
@@ -617,6 +680,15 @@ mod tests {
617680
fn keeps_existing_max_token_heuristic() {
618681
assert_eq!(max_tokens_for_model("opus"), 32_000);
619682
assert_eq!(max_tokens_for_model("grok-3"), 64_000);
683+
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
684+
}
685+
686+
#[test]
687+
fn caps_default_max_tokens_to_openai_model_limits() {
688+
assert_eq!(max_tokens_for_model("gpt-4.1-mini"), 32_768);
689+
assert_eq!(max_tokens_for_model("openai/gpt-4.1-mini"), 32_768);
690+
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
691+
assert_eq!(max_tokens_for_model("openai/gpt-5.4"), 64_000);
620692
}
621693

622694
#[test]
@@ -683,6 +755,18 @@ mod tests {
683755
.context_window_tokens,
684756
131_072
685757
);
758+
assert_eq!(
759+
model_token_limit("openai/gpt-4.1-mini")
760+
.expect("openai/gpt-4.1-mini should be registered")
761+
.context_window_tokens,
762+
1_047_576
763+
);
764+
assert_eq!(
765+
model_token_limit("gpt-5.4")
766+
.expect("gpt-5.4 should be registered")
767+
.context_window_tokens,
768+
1_000_000
769+
);
686770
}
687771

688772
#[test]
@@ -731,6 +815,42 @@ mod tests {
731815
}
732816
}
733817

818+
#[test]
819+
fn preflight_blocks_oversized_requests_for_gpt_5_4() {
820+
let request = MessageRequest {
821+
model: "gpt-5.4".to_string(),
822+
max_tokens: 64_000,
823+
messages: vec![InputMessage {
824+
role: "user".to_string(),
825+
content: vec![InputContentBlock::Text {
826+
text: "x".repeat(3_900_000),
827+
}],
828+
}],
829+
system: Some("Keep the answer short.".to_string()),
830+
tools: None,
831+
tool_choice: None,
832+
stream: true,
833+
..Default::default()
834+
};
835+
836+
let error = preflight_message_request(&request)
837+
.expect_err("oversized gpt-5.4 request should be rejected before the provider call");
838+
839+
match error {
840+
ApiError::ContextWindowExceeded {
841+
model,
842+
requested_output_tokens,
843+
context_window_tokens,
844+
..
845+
} => {
846+
assert_eq!(model, "gpt-5.4");
847+
assert_eq!(requested_output_tokens, 64_000);
848+
assert_eq!(context_window_tokens, 1_000_000);
849+
}
850+
other => panic!("expected context-window preflight failure, got {other:?}"),
851+
}
852+
}
853+
734854
#[test]
735855
fn preflight_skips_unknown_models() {
736856
let request = MessageRequest {

0 commit comments

Comments
 (0)