diff --git a/.jules/bolt.md b/.jules/bolt.md
new file mode 100644
index 0000000..90c9292
--- /dev/null
+++ b/.jules/bolt.md
@@ -0,0 +1,4 @@
+
+## 2024-05-20 - Non-blocking asyncio Inference Calls
+**Learning:** Calling blocking synchronous code in an asyncio event loop halts all other concurrent tasks. In FastAPI applications, running ML inference or heavy processing functions sequentially on the main thread severely limits throughput and creates bottlenecks.
+**Action:** Always wrap heavy synchronous functions (like `inference.generate_solution`) in `await asyncio.to_thread(...)` to dispatch them to background threads, preserving event loop concurrency for incoming requests.
diff --git a/web/__pycache__/app.cpython-312.pyc b/web/__pycache__/app.cpython-312.pyc
new file mode 100644
index 0000000..1077fd3
Binary files /dev/null and b/web/__pycache__/app.cpython-312.pyc differ
diff --git a/web/app.py b/web/app.py
index 62b21b8..21f0297 100644
--- a/web/app.py
+++ b/web/app.py
@@ -3,6 +3,8 @@
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
 from typing import Optional
+import asyncio
+import logging
 
 import inference
 
@@ -40,14 +42,16 @@ async def solve(req: SolveRequest):
         raise HTTPException(status_code=400, detail="`problem` must be a non-empty string")
 
     try:
-        solution = inference.generate_solution(
+        solution = await asyncio.to_thread(
+            inference.generate_solution,
             problem=req.problem,
             cot=req.cot,
             temperature=req.temperature,
             top_p=req.top_p,
             max_new_tokens=req.max_new_tokens,
         )
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+    except Exception:
+        logging.exception("Error during inference")
+        raise HTTPException(status_code=500, detail="Internal server error")
 
     return SolveResponse(solution=solution)