From 26abe8ac43c8494f3bb89687f2ed09c008dc947a Mon Sep 17 00:00:00 2001
From: Taksh <takshkothari09@gmail.com>
Date: Wed, 10 Jun 2026 12:37:55 +0530
Subject: [PATCH] fix: allow separate input/output budgets for T5 in context
 check

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 fastchat/serve/openai_api_server.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fastchat/serve/openai_api_server.py b/fastchat/serve/openai_api_server.py
index a6ffee96b..00e5aa428 100644
--- a/fastchat/serve/openai_api_server.py
+++ b/fastchat/serve/openai_api_server.py
@@ -166,9 +166,10 @@ async def check_length(request, prompt, max_tokens, worker_addr):
         {"model": request.model, "prompt": prompt},
         "count",
     )
-    length = min(max_tokens, context_len - token_num)
+    is_enc_dec = "t5" in request.model.lower()
+    length = min(max_tokens, context_len if is_enc_dec else context_len - token_num)
 
-    if length <= 0:
+    if (is_enc_dec and token_num > context_len) or length <= 0:
         return None, create_error_response(
             ErrorCode.CONTEXT_OVERFLOW,
             f"This model's maximum context length is {context_len} tokens. However, your messages resulted in {token_num} tokens. Please reduce the length of the messages.",