diff --git a/demo/nesa/backend/llms.py b/demo/nesa/backend/llms.py
index 37a3321..a505a08 100644
--- a/demo/nesa/backend/llms.py
+++ b/demo/nesa/backend/llms.py
@@ -48,7 +48,7 @@ async def sse_message_handler(inf_request: LLMInference, timeout=60):
 
     async with httpx.AsyncClient() as client:
         async with client.stream(
-            "POST", settings.stream_url, data=msgspec.json.encode(inf_request), headers=headers, timeout=None
+            "POST", settings.stream_url, data=msgspec.json.encode(inf_request), headers=headers, timeout=timeout
         ) as response:
             response.raise_for_status()
 
@@ -179,13 +179,13 @@ def perform_inference(
         tokenizer: Any,
         current_msg: str,
         model_name: Optional[Any] = None,
-        history: Optional[List[str]] = [],
+        history: Optional[List[str]] = None,
         system_prompt: Optional[str] = "",
         **kwargs,
     ) -> Generator[str, None, None]:
     
         prompt_template = generate_prompt_template(
-            current_msg=current_msg, system_prompt=system_prompt, history=history
+            current_msg=current_msg, system_prompt=system_prompt, history=history or []
         )
         print("[Server]")
         print("Input received from client:")