From 41ad117360ec18913e64a52cde95a27042d1bc35 Mon Sep 17 00:00:00 2001 From: SANVI SHUKLA Date: Tue, 17 Mar 2026 04:15:50 +0530 Subject: [PATCH] Improve Ollama error handling and add LLM healthcheck --- api/errors/base.py | 11 ++++++++++- api/errors/handlers.py | 1 + api/main.py | 38 +++++++++++++++++++++++++++++++++++++- src/file_manipulator.py | 7 +++++-- src/llm.py | 25 ++++++++++++++++++------- 5 files changed, 71 insertions(+), 11 deletions(-) diff --git a/api/errors/base.py b/api/errors/base.py index 1f81a08..939ced0 100644 --- a/api/errors/base.py +++ b/api/errors/base.py @@ -1,4 +1,13 @@ class AppError(Exception): def __init__(self, message: str, status_code: int = 400): self.message = message - self.status_code = status_code \ No newline at end of file + self.status_code = status_code + + +class LLMUnavailableError(AppError): + """ + Raised when the LLM backend (Ollama) is unreachable or misconfigured. + """ + + def __init__(self, detail: str): + super().__init__(message=detail, status_code=503) \ No newline at end of file diff --git a/api/errors/handlers.py b/api/errors/handlers.py index 903e744..2f57d64 100644 --- a/api/errors/handlers.py +++ b/api/errors/handlers.py @@ -2,6 +2,7 @@ from fastapi.responses import JSONResponse from api.errors.base import AppError + def register_exception_handlers(app): @app.exception_handler(AppError) async def app_error_handler(request: Request, exc: AppError): diff --git a/api/main.py b/api/main.py index d0b8c79..85940d9 100644 --- a/api/main.py +++ b/api/main.py @@ -1,7 +1,43 @@ from fastapi import FastAPI +from fastapi.responses import JSONResponse +import requests from api.routes import templates, forms +from api.errors.handlers import register_exception_handlers +from api.errors.base import LLMUnavailableError app = FastAPI() +register_exception_handlers(app) + app.include_router(templates.router) -app.include_router(forms.router) \ No newline at end of file +app.include_router(forms.router) + + +@app.get("/health/llm", tags=["health"]) +async def llm_healthcheck(): + """ + Lightweight health check for the LLM backend (Ollama). + + Uses the cheap /api/tags endpoint so we don't have to load a model. + """ + from src.llm import LLM + + llm = LLM(transcript_text="", target_fields={"healthcheck": ""}) + generate_url = llm._get_ollama_url() + ollama_host = generate_url.rsplit("/api/generate", 1)[0] + tags_url = f"{ollama_host}/api/tags" + + try: + response = requests.get(tags_url, timeout=5) + response.raise_for_status() + except requests.exceptions.ConnectionError: + raise LLMUnavailableError( + "LLM backend unreachable. Ensure Ollama is running (`ollama serve`) " + "and that OLLAMA_HOST is set correctly." + ) + except requests.exceptions.HTTPError as e: + raise LLMUnavailableError(f"LLM backend returned an HTTP error: {e}") + except requests.exceptions.RequestException as e: + raise LLMUnavailableError(f"LLM healthcheck failed: {e}") + + return JSONResponse({"status": "ok"}) \ No newline at end of file diff --git a/src/file_manipulator.py b/src/file_manipulator.py index b7815cc..317de94 100644 --- a/src/file_manipulator.py +++ b/src/file_manipulator.py @@ -1,6 +1,7 @@ import os from src.filler import Filler from src.llm import LLM +from api.errors.base import LLMUnavailableError from commonforms import prepare_form @@ -41,7 +42,9 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str): return output_name + except ConnectionError as e: + # Wrap low-level connection errors in a domain-specific error + raise LLMUnavailableError(str(e)) except Exception as e: print(f"An error occurred during PDF generation: {e}") - # Re-raise the exception so the frontend can handle it - raise e + raise diff --git a/src/llm.py b/src/llm.py index 70937f9..333d0af 100644 --- a/src/llm.py +++ b/src/llm.py @@ -44,14 +44,20 @@ def build_prompt(self, current_field): return prompt + def _get_ollama_url(self) -> str: + """ + Resolve the Ollama base URL from environment. + Defaults to a local instance if OLLAMA_HOST is not set. + """ + ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/") + return f"{ollama_host}/api/generate" + def main_loop(self): # self.type_check_all() + ollama_url = self._get_ollama_url() + for field in self._target_fields.keys(): prompt = self.build_prompt(field) - # print(prompt) - # ollama_url = "http://localhost:11434/api/generate" - ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/") - ollama_url = f"{ollama_host}/api/generate" payload = { "model": "mistral", @@ -63,9 +69,15 @@ def main_loop(self): response = requests.post(ollama_url, json=payload) response.raise_for_status() except requests.exceptions.ConnectionError: + ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/") raise ConnectionError( - f"Could not connect to Ollama at {ollama_url}. " - "Please ensure Ollama is running and accessible." + "Could not connect to the Ollama LLM backend.\n" + f"- Attempted URL: {ollama_url}\n" + f"- Current OLLAMA_HOST: {ollama_host}\n" + "Troubleshooting steps:\n" + " 1) Ensure the Ollama server is running (e.g. `ollama serve`).\n" + " 2) Ensure the `mistral` model is available (`ollama pull mistral`).\n" + " 3) If running in Docker or remotely, verify the OLLAMA_HOST address." ) except requests.exceptions.HTTPError as e: raise RuntimeError(f"Ollama returned an error: {e}") @@ -73,7 +85,6 @@ def main_loop(self): # parse response json_data = response.json() parsed_response = json_data["response"] - # print(parsed_response) self.add_response_to_json(field, parsed_response) print("----------------------------------")