From 56b1362650514d05fbd103c9aa045f3579bc70aa Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 18 Mar 2026 00:10:30 +0000 Subject: [PATCH] perf: use asyncio.to_thread for ML inference to prevent blocking the event loop Offloads the blocking synchronous call `inference.generate_solution` in `web/app.py` to a thread pool using `asyncio.to_thread()`. This allows the FastAPI event loop to concurrently handle other requests during generation. Additionally, error handling was updated to log exception details internally via `logging.exception()` and return a generic error message, improving security. Co-authored-by: dhanush342 <187305764+dhanush342@users.noreply.github.com> --- .jules/bolt.md | 4 ++++ web/__pycache__/app.cpython-312.pyc | Bin 0 -> 2972 bytes web/app.py | 10 +++++++--- 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 .jules/bolt.md create mode 100644 web/__pycache__/app.cpython-312.pyc diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..90c9292 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,4 @@ + +## 2024-05-20 - Non-blocking asyncio Inference Calls +**Learning:** Calling blocking synchronous code in an asyncio event loop halts all other concurrent tasks. In FastAPI applications, running ML inference or heavy processing functions sequentially on the main thread severely limits throughput and creates bottlenecks. +**Action:** Always wrap heavy synchronous functions (like `inference.generate_solution`) in `await asyncio.to_thread(...)` to dispatch them to background threads, preserving event loop concurrency for incoming requests. diff --git a/web/__pycache__/app.cpython-312.pyc b/web/__pycache__/app.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1077fd328f01b762b24c147bb05a2064bc324fec GIT binary patch literal 2972 zcmZ`*O>7&-6`t8$lFMKH(xjwD-o}n3`bV;ypmkxlMr_EnAxkzYIY8+Gn>BYNue{u) zXP1`5rhxp@MkxwZx>U|7IXaMf2?De|*1Z(yg@Oc#*xG1{^w0o3D3O3RMGt*%my{S) z9dO^i{pQV^H}n4Zr(`llU_ATB?~J}MA)nz#W5BmRc4jCcD?}$cHA#`?X;H`v6u$*i zEJ}IF=S4GA4Cli>FPV{|oR@t*WJZgze9Y&=W;`EeL`02Q~5T5e4prT zPl(>mGDr<<(^csg43jk+}jZxrS}!s287bHP zue=hQWq0a4>eLO+3a-uPy>K7^pk!%9RuPBFMxptz=#j_p>?lA+35Do!>mCE2(S|=J z$gjm&niIw=>HV#S3`=KogLmDc$uR;yB0E6IzkO@8QfaU>@5yCD^~HDxOtrz|v;+A& z`PTyZTRc~`L0Z?7Yhgg_J zx_}2v^f4(f0~W1gAs-9pV@+7Z$K-q*utZ%hir9QC7xxkqw)p`3bid3T*Nc?6eaB?Q zE1p=e-Dwc=hZlEQvBbFMmN~1V+ZVx0c1bOL{zs7UQblb}wb-oc+V_~{jOUV`tg7e= zRrO-3TD0}DiF`s;@0T^RJ|j9V_gYj{vn(4u48fwRJOdhe6xB+1Y}@2nWJ6P?t+^ae zzyLBSYR8@cytINKgH|m zhPTbkuNjPG%nyIOrXc)vstX3K2fR4$MT*REv}sm3-<*fonM7#KQBH_q^p_9YW!AJx>e7MP=r>(!DgPw z+5N&^j>1YA9{&&@0~#FS9Wn#p!NI!^$7jTYl4P&?I!QOwn8mAE6B||Z5seZT@7*6W zXvKly`W*|wnZ$u9_&nSeKvRdx;^iF}Ad7xGFaYg50Kw;)v|8_ykD(v_7*6~vaYg)D zC`5ijT?{vWyhGRI{hIe}@{87E#KolX$8o}sQQ~5*`D3QL!R^2GP-8tv?%!Nkk2K%h zESVF3NM@;DRiop^5_qn1{Ox)Gy{#0>j;q{Z5JHx1orWso&MQz83~L(NKpcyD*-;A+ z&R$q&u4b5!IA~^2BQX2+D?;VS21nXci?T-F^z%9=@KbmYG{nI~RTRIPfi;+P{2W02IQIz%Bvpy~8h3JJV0ru5yD@mlkBm!>vC7M`|PItdfkT6&D8a(e0@8FBL8uQ!0<Ig3jb_y3_h=|G1Dl54#$-Ba0SQqVC6sgD_%$hJr+{R=tq zlAL^rwD%?H`7=qu?=#Z+*Kl%a{MpE6`0RqX9VS$IH~KKTm|MR6?80XH+;8U=qMPI@ zObDTOGY>P1>Bqfa2$T-*h(u`JMs8OQ(SfD!>=JnGwo=-@bZwWwYg=xt>3BN1klCQG u;=;Emt~`b--=cv?_F;B$;KT1OWH;#Ww$MgTE&b?A0xuLi`vn4iv-vL^ikZX! literal 0 HcmV?d00001 diff --git a/web/app.py b/web/app.py index 62b21b8..21f0297 100644 --- a/web/app.py +++ b/web/app.py @@ -3,6 +3,8 @@ from fastapi.responses import FileResponse from pydantic import BaseModel from typing import Optional +import asyncio +import logging import inference @@ -40,14 +42,16 @@ async def solve(req: SolveRequest): raise HTTPException(status_code=400, detail="`problem` must be a non-empty string") try: - solution = inference.generate_solution( + solution = await asyncio.to_thread( + inference.generate_solution, problem=req.problem, cot=req.cot, temperature=req.temperature, top_p=req.top_p, max_new_tokens=req.max_new_tokens, ) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + except Exception: + logging.exception("Error during inference") + raise HTTPException(status_code=500, detail="Internal server error") return SolveResponse(solution=solution)