From 41ad117360ec18913e64a52cde95a27042d1bc35 Mon Sep 17 00:00:00 2001
From: SANVI SHUKLA <sanvishukla@gmail.com>
Date: Tue, 17 Mar 2026 04:15:50 +0530
Subject: [PATCH] Improve Ollama error handling and add LLM healthcheck

---
 api/errors/base.py      | 11 ++++++++++-
 api/errors/handlers.py  |  1 +
 api/main.py             | 38 +++++++++++++++++++++++++++++++++++++-
 src/file_manipulator.py |  7 +++++--
 src/llm.py              | 25 ++++++++++++++++++-------
 5 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/api/errors/base.py b/api/errors/base.py
index 1f81a08..939ced0 100644
--- a/api/errors/base.py
+++ b/api/errors/base.py
@@ -1,4 +1,13 @@
 class AppError(Exception):
     def __init__(self, message: str, status_code: int = 400):
         self.message = message
-        self.status_code = status_code
\ No newline at end of file
+        self.status_code = status_code
+
+
+class LLMUnavailableError(AppError):
+    """
+    Raised when the LLM backend (Ollama) is unreachable or misconfigured.
+    """
+
+    def __init__(self, detail: str):
+        super().__init__(message=detail, status_code=503)
\ No newline at end of file
diff --git a/api/errors/handlers.py b/api/errors/handlers.py
index 903e744..2f57d64 100644
--- a/api/errors/handlers.py
+++ b/api/errors/handlers.py
@@ -2,6 +2,7 @@
 from fastapi.responses import JSONResponse
 from api.errors.base import AppError
 
+
 def register_exception_handlers(app):
     @app.exception_handler(AppError)
     async def app_error_handler(request: Request, exc: AppError):
diff --git a/api/main.py b/api/main.py
index d0b8c79..85940d9 100644
--- a/api/main.py
+++ b/api/main.py
@@ -1,7 +1,43 @@
 from fastapi import FastAPI
+from fastapi.responses import JSONResponse
+import requests
 from api.routes import templates, forms
+from api.errors.handlers import register_exception_handlers
+from api.errors.base import LLMUnavailableError
 
 app = FastAPI()
 
+register_exception_handlers(app)
+
 app.include_router(templates.router)
-app.include_router(forms.router)
\ No newline at end of file
+app.include_router(forms.router)
+
+
+@app.get("/health/llm", tags=["health"])
+async def llm_healthcheck():
+    """
+    Lightweight health check for the LLM backend (Ollama).
+
+    Uses the cheap /api/tags endpoint so we don't have to load a model.
+    """
+    from src.llm import LLM
+
+    llm = LLM(transcript_text="", target_fields={"healthcheck": ""})
+    generate_url = llm._get_ollama_url()
+    ollama_host = generate_url.rsplit("/api/generate", 1)[0]
+    tags_url = f"{ollama_host}/api/tags"
+
+    try:
+        response = requests.get(tags_url, timeout=5)
+        response.raise_for_status()
+    except requests.exceptions.ConnectionError:
+        raise LLMUnavailableError(
+            "LLM backend unreachable. Ensure Ollama is running (`ollama serve`) "
+            "and that OLLAMA_HOST is set correctly."
+        )
+    except requests.exceptions.HTTPError as e:
+        raise LLMUnavailableError(f"LLM backend returned an HTTP error: {e}")
+    except requests.exceptions.RequestException as e:
+        raise LLMUnavailableError(f"LLM healthcheck failed: {e}")
+
+    return JSONResponse({"status": "ok"})
\ No newline at end of file
diff --git a/src/file_manipulator.py b/src/file_manipulator.py
index b7815cc..317de94 100644
--- a/src/file_manipulator.py
+++ b/src/file_manipulator.py
@@ -1,6 +1,7 @@
 import os
 from src.filler import Filler
 from src.llm import LLM
+from api.errors.base import LLMUnavailableError
 from commonforms import prepare_form
 
 
@@ -41,7 +42,9 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
 
             return output_name
 
+        except ConnectionError as e:
+            # Wrap low-level connection errors in a domain-specific error
+            raise LLMUnavailableError(str(e))
         except Exception as e:
             print(f"An error occurred during PDF generation: {e}")
-            # Re-raise the exception so the frontend can handle it
-            raise e
+            raise
diff --git a/src/llm.py b/src/llm.py
index 70937f9..333d0af 100644
--- a/src/llm.py
+++ b/src/llm.py
@@ -44,14 +44,20 @@ def build_prompt(self, current_field):
 
         return prompt
 
+    def _get_ollama_url(self) -> str:
+        """
+        Resolve the Ollama base URL from environment.
+        Defaults to a local instance if OLLAMA_HOST is not set.
+        """
+        ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
+        return f"{ollama_host}/api/generate"
+
     def main_loop(self):
         # self.type_check_all()
+        ollama_url = self._get_ollama_url()
+
         for field in self._target_fields.keys():
             prompt = self.build_prompt(field)
-            # print(prompt)
-            # ollama_url = "http://localhost:11434/api/generate"
-            ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
-            ollama_url = f"{ollama_host}/api/generate"
 
             payload = {
                 "model": "mistral",
@@ -63,9 +69,15 @@ def main_loop(self):
                 response = requests.post(ollama_url, json=payload)
                 response.raise_for_status()
             except requests.exceptions.ConnectionError:
+                ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
                 raise ConnectionError(
-                    f"Could not connect to Ollama at {ollama_url}. "
-                    "Please ensure Ollama is running and accessible."
+                    "Could not connect to the Ollama LLM backend.\n"
+                    f"- Attempted URL: {ollama_url}\n"
+                    f"- Current OLLAMA_HOST: {ollama_host}\n"
+                    "Troubleshooting steps:\n"
+                    "  1) Ensure the Ollama server is running (e.g. `ollama serve`).\n"
+                    "  2) Ensure the `mistral` model is available (`ollama pull mistral`).\n"
+                    "  3) If running in Docker or remotely, verify the OLLAMA_HOST address."
                 )
             except requests.exceptions.HTTPError as e:
                 raise RuntimeError(f"Ollama returned an error: {e}")
@@ -73,7 +85,6 @@ def main_loop(self):
             # parse response
             json_data = response.json()
             parsed_response = json_data["response"]
-            # print(parsed_response)
             self.add_response_to_json(field, parsed_response)
 
         print("----------------------------------")