diff --git a/demo/nesa/backend/llms.py b/demo/nesa/backend/llms.py index 37a3321..a505a08 100644 --- a/demo/nesa/backend/llms.py +++ b/demo/nesa/backend/llms.py @@ -48,7 +48,7 @@ async def sse_message_handler(inf_request: LLMInference, timeout=60): async with httpx.AsyncClient() as client: async with client.stream( - "POST", settings.stream_url, data=msgspec.json.encode(inf_request), headers=headers, timeout=None + "POST", settings.stream_url, data=msgspec.json.encode(inf_request), headers=headers, timeout=timeout ) as response: response.raise_for_status() @@ -179,13 +179,13 @@ def perform_inference( tokenizer: Any, current_msg: str, model_name: Optional[Any] = None, - history: Optional[List[str]] = [], + history: Optional[List[str]] = None, system_prompt: Optional[str] = "", **kwargs, ) -> Generator[str, None, None]: prompt_template = generate_prompt_template( - current_msg=current_msg, system_prompt=system_prompt, history=history + current_msg=current_msg, system_prompt=system_prompt, history=history or [] ) print("[Server]") print("Input received from client:")