From 26abe8ac43c8494f3bb89687f2ed09c008dc947a Mon Sep 17 00:00:00 2001 From: Taksh Date: Wed, 10 Jun 2026 12:37:55 +0530 Subject: [PATCH] fix: allow separate input/output budgets for T5 in context check Co-authored-by: Cursor --- fastchat/serve/openai_api_server.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fastchat/serve/openai_api_server.py b/fastchat/serve/openai_api_server.py index a6ffee96b..00e5aa428 100644 --- a/fastchat/serve/openai_api_server.py +++ b/fastchat/serve/openai_api_server.py @@ -166,9 +166,10 @@ async def check_length(request, prompt, max_tokens, worker_addr): {"model": request.model, "prompt": prompt}, "count", ) - length = min(max_tokens, context_len - token_num) + is_enc_dec = "t5" in request.model.lower() + length = min(max_tokens, context_len if is_enc_dec else context_len - token_num) - if length <= 0: + if (is_enc_dec and token_num > context_len) or length <= 0: return None, create_error_response( ErrorCode.CONTEXT_OVERFLOW, f"This model's maximum context length is {context_len} tokens. However, your messages resulted in {token_num} tokens. Please reduce the length of the messages.",