From a3dfe703507494e6082f142276fb91a743375250 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 21 Nov 2025 12:44:51 +0700
Subject: [PATCH 001/236] Remove the unused auto-refresh functionality and
 related imports.

They are no longer needed since the underlying library issue has been resolved.
---
 app/services/client.py | 46 +-----------------------------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 825564b..1554bdd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -8,11 +8,8 @@
 from gemini_webapi import GeminiClient, ModelOutput
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
-from gemini_webapi.exceptions import AuthError, ModelInvalid
+from gemini_webapi.exceptions import ModelInvalid
 from gemini_webapi.types import Gem
-from gemini_webapi.utils import rotate_tasks
-from gemini_webapi.utils.rotate_1psidts import rotate_1psidts
-from loguru import logger
 
 from ..models import Message
 from ..utils import g_config
@@ -76,47 +73,6 @@ async def init(
             verbose=verbose,
         )
 
-    async def start_auto_refresh(self) -> None:
-        """
-        Refresh the __Secure-1PSIDTS cookie periodically and keep the HTTP client in sync.
-        """
-        while True:
-            new_1psidts: str | None = None
-            try:
-                new_1psidts = await rotate_1psidts(self.cookies, self.proxy)
-            except AuthError:
-                if task := rotate_tasks.get(self.cookies.get("__Secure-1PSID", "")):
-                    task.cancel()
-                logger.warning(
-                    "Failed to refresh Gemini cookies (AuthError). Auto refresh task canceled."
-                )
-                return
-            except Exception as exc:
-                logger.warning(f"Unexpected error while refreshing Gemini cookies: {exc}")
-
-            if new_1psidts:
-                self.cookies["__Secure-1PSIDTS"] = new_1psidts
-                self._sync_httpx_cookie("__Secure-1PSIDTS", new_1psidts)
-                logger.debug("Gemini cookies refreshed. New __Secure-1PSIDTS applied.")
-            await asyncio.sleep(self.refresh_interval)
-
-    def _sync_httpx_cookie(self, name: str, value: str) -> None:
-        """
-        Ensure the underlying httpx client uses the refreshed cookie value.
-        """
-        if not self.client:
-            return
-
-        jar = self.client.cookies.jar
-        matched = False
-        for cookie in jar:
-            if cookie.name == name:
-                cookie.value = value
-                matched = True
-        if not matched:
-            # Fall back to setting the cookie with default scope if we did not find an existing entry.
-            self.client.cookies.set(name, value)
-
     async def generate_content(
         self,
         prompt: str,

From 3a692ab014bf6d0cb98f38d499dc2760eb92c096 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 14:54:53 +0700
Subject: [PATCH 002/236] Enhance error handling in client initialization and
 message sending

---
 app/server/chat.py     | 12 ++++++++--
 app/services/client.py | 52 +++++++++++-------------------------------
 app/services/pool.py   | 26 ++++++++++++++-------
 3 files changed, 41 insertions(+), 49 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 66fa6ce..e8752cf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1129,7 +1129,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
         # No need to split - a single request is fine.
-        return await session.send_message(text, files=files)
+        try:
+            return await session.send_message(text, files=files)
+        except Exception as e:
+            logger.exception(f"Error sending message to Gemini: {e}")
+            raise
     hint_len = len(CONTINUATION_HINT)
     chunk_size = MAX_CHARS_PER_REQUEST - hint_len
 
@@ -1155,7 +1159,11 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
             raise
 
     # The last chunk carries the files (if any) and we return its response.
-    return await session.send_message(chunks[-1], files=files)
+    try:
+        return await session.send_message(chunks[-1], files=files)
+    except Exception as e:
+        logger.exception(f"Error sending final chunk to Gemini: {e}")
+        raise
 
 
 def _iter_stream_segments(model_output: str, chunk_size: int = 64):
diff --git a/app/services/client.py b/app/services/client.py
index 1554bdd..26be26f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,4 +1,3 @@
-import asyncio
 import html
 import json
 import re
@@ -6,10 +5,7 @@
 from typing import Any, cast
 
 from gemini_webapi import GeminiClient, ModelOutput
-from gemini_webapi.client import ChatSession
-from gemini_webapi.constants import Model
-from gemini_webapi.exceptions import ModelInvalid
-from gemini_webapi.types import Gem
+from loguru import logger
 
 from ..models import Message
 from ..utils import g_config
@@ -64,40 +60,18 @@ async def init(
         refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
         verbose = cast(bool, _resolve(verbose, config.verbose))
 
-        await super().init(
-            timeout=timeout,
-            auto_close=auto_close,
-            close_delay=close_delay,
-            auto_refresh=auto_refresh,
-            refresh_interval=refresh_interval,
-            verbose=verbose,
-        )
-
-    async def generate_content(
-        self,
-        prompt: str,
-        files: list[str | Path] | None = None,
-        model: Model | str = Model.UNSPECIFIED,
-        gem: Gem | str | None = None,
-        chat: ChatSession | None = None,
-        **kwargs,
-    ) -> ModelOutput:
-        cnt = 2  # Try 2 times before giving up
-        last_exception: ModelInvalid | None = None
-        while cnt:
-            cnt -= 1
-            try:
-                return await super().generate_content(prompt, files, model, gem, chat, **kwargs)
-            except ModelInvalid as e:
-                # This is not always caused by model selection. Instead, it can be solved by retrying.
-                # So we catch it and retry as a workaround.
-                await asyncio.sleep(1)
-                last_exception = e
-
-        # If retrying failed, re-raise ModelInvalid
-        if last_exception is not None:
-            raise last_exception
-        raise RuntimeError("generate_content failed without receiving a ModelInvalid error.")
+        try:
+            await super().init(
+                timeout=timeout,
+                auto_close=auto_close,
+                close_delay=close_delay,
+                auto_refresh=auto_refresh,
+                refresh_interval=refresh_interval,
+                verbose=verbose,
+            )
+        except Exception:
+            logger.exception(f"Failed to initialize GeminiClient {self.id}")
+            raise
 
     @staticmethod
     async def process_message(
diff --git a/app/services/pool.py b/app/services/pool.py
index abf1fa0..24a21dc 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -35,14 +35,24 @@ def __init__(self) -> None:
 
     async def init(self) -> None:
         """Initialize all clients in the pool."""
+        success_count = 0
         for client in self._clients:
             if not client.running:
-                await client.init(
-                    timeout=g_config.gemini.timeout,
-                    auto_refresh=g_config.gemini.auto_refresh,
-                    verbose=g_config.gemini.verbose,
-                    refresh_interval=g_config.gemini.refresh_interval,
-                )
+                try:
+                    await client.init(
+                        timeout=g_config.gemini.timeout,
+                        auto_refresh=g_config.gemini.auto_refresh,
+                        verbose=g_config.gemini.verbose,
+                        refresh_interval=g_config.gemini.refresh_interval,
+                    )
+                except Exception:
+                    logger.exception(f"Failed to initialize client {client.id}")
+
+            if client.running:
+                success_count += 1
+
+        if success_count == 0:
+            raise RuntimeError("Failed to initialize any Gemini clients")
 
     async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
         """Return a healthy client by id or using round-robin."""
@@ -89,8 +99,8 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
                 )
                 logger.info(f"Restarted Gemini client {client.id} after it stopped.")
                 return True
-            except Exception as exc:
-                logger.warning(f"Failed to restart Gemini client {client.id}: {exc}")
+            except Exception:
+                logger.exception(f"Failed to restart Gemini client {client.id}")
                 return False
 
     @property

From d57e3676fed9fa03e1f51a5aed80d4b7f88e6a88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 17:49:41 +0700
Subject: [PATCH 003/236] Refactor link handling to extract file paths and
 simplify Google search links

---
 app/services/client.py | 46 +++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 26be26f..f5a39dd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -24,9 +24,20 @@
 )
 
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
+
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])")
+
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL)
 
+FILE_PATH_PATTERN = re.compile(
+    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
+    re.IGNORECASE,
+)
+
+GOOGLE_SEARCH_LINK_PATTERN = re.compile(
+    r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+)
+
 
 _UNSET = object()
 
@@ -219,28 +230,25 @@ def _unescape_markdown(text_content: str) -> str:
         text = _unescape_html(text)
         text = _unescape_markdown(text)
 
-        def simplify_link_target(text_content: str) -> str:
-            match_colon_num = re.match(r"([^:]+:\d+)", text_content)
-            if match_colon_num:
-                return match_colon_num.group(1)
-            return text_content
+        def extract_file_path_from_display_text(text_content: str) -> str | None:
+            match = re.match(FILE_PATH_PATTERN, text_content)
+            if match:
+                return match.group(1)
+            return None
 
         def replacer(match: re.Match) -> str:
-            outer_open_paren = match.group(1)
-            display_text = match.group(2)
+            display_text = str(match.group(1)).strip()
+            google_search_prefix = match.group(2)
+            query_part = match.group(3)
 
-            new_target_url = simplify_link_target(display_text)
-            new_link_segment = f"[`{display_text}`]({new_target_url})"
+            file_path = extract_file_path_from_display_text(display_text)
 
-            if outer_open_paren:
-                return f"{outer_open_paren}{new_link_segment})"
+            if file_path:
+                # If it's a file path, transform it into a self-referencing Markdown link
+                return f"[`{file_path}`]({file_path})"
             else:
-                return new_link_segment
-
-        # Replace Google search links with simplified Markdown links
-        pattern = r"(\()?\[`([^`]+?)`\]\((https://www.google.com/search\?q=)(.*?)(?<!\\)\)\)*(\))?"
-        text = re.sub(pattern, replacer, text)
+                # Otherwise, reconstruct the original Google search link with the display_text
+                original_google_search_url = f"{google_search_prefix}{query_part}"
+                return f"[`{display_text}`]({original_google_search_url})"
 
-        # Fix inline code blocks
-        pattern = r"`(\[[^\]]+\]\([^\)]+\))`"
-        return re.sub(pattern, r"\1", text)
+        return re.sub(GOOGLE_SEARCH_LINK_PATTERN, replacer, text)

From ccd55f9fd3d9c48f986c42413b71971311ecb5d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 18:29:41 +0700
Subject: [PATCH 004/236] Fix regex pattern for Google search link matching

---
 app/services/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/services/client.py b/app/services/client.py
index f5a39dd..ffc559e 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -35,7 +35,7 @@
 )
 
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"(?:`\s*)?`?\[`?([^`\]]+)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+    r"(?:`\s*)?`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
 )
 
 

From 37632b3fb89fab17f2cb728a1c5a66ab5dd013ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 21:44:09 +0700
Subject: [PATCH 005/236] Fix regex patterns for Markdown escaping, code fence
 and Google search link matching

---
 app/services/client.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index ffc559e..0088c74 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -25,17 +25,17 @@
 
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
 
-MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=\s*[-\\`*_{}\[\]()#+.!<>])")
+MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
 
-CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`]*`)", re.DOTALL)
+CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
 
 FILE_PATH_PATTERN = re.compile(
-    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|Gemfile|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
+    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
 )
 
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"(?:`\s*)?`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)(.*?)(?<!\\)\)(?:\s*`?)?"
+    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
 
 

From b11cfcc45f20259d17b1ddfde0e07fdaa80d9532 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 22 Nov 2025 22:52:27 +0700
Subject: [PATCH 006/236] Increase timeout value in configuration files from 60
 to 120 seconds to better handle heavy tasks

---
 app/server/chat.py     | 2 --
 app/services/client.py | 8 --------
 app/utils/config.py    | 2 +-
 config/config.yaml     | 6 +++---
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index e8752cf..b4e88da 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -48,9 +48,7 @@
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
-
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
     r"<tool_call\s+name=\"([^\"]+)\">(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
diff --git a/app/services/client.py b/app/services/client.py
index 0088c74..166eb70 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -16,29 +16,21 @@
     '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
     "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
 )
-
 CODE_BLOCK_HINT = (
     "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
     "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
     "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
 )
-
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
-
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
-
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
-
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
 )
-
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
-
-
 _UNSET = object()
 
 
diff --git a/app/utils/config.py b/app/utils/config.py
index 48f0792..796ca75 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -56,7 +56,7 @@ class GeminiConfig(BaseModel):
     clients: list[GeminiClientSettings] = Field(
         ..., description="List of Gemini client credential pairs"
     )
-    timeout: int = Field(default=60, ge=1, description="Init timeout")
+    timeout: int = Field(default=120, ge=1, description="Init timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
diff --git a/config/config.yaml b/config/config.yaml
index b0f8fbf..89c88b7 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -21,8 +21,8 @@ gemini:
     - id: "example-id-1"   # Arbitrary client ID
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
-      proxy: null           # Optional proxy URL (null/empty means direct connection)
-  timeout: 60              # Init timeout in seconds
+      proxy: null          # Optional proxy URL (null/empty means direct connection)
+  timeout: 120             # Init timeout in seconds
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
@@ -34,4 +34,4 @@ storage:
   retention_days: 14       # Number of days to retain conversations before cleanup
 
 logging:
-  level: "INFO"           # Log level: DEBUG, INFO, WARNING, ERROR
+  level: "INFO"            # Log level: DEBUG, INFO, WARNING, ERROR

From f00ebfcbd0424c7ab06d680f308349a04aff3be0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 13:15:27 +0700
Subject: [PATCH 007/236] Fix Image generation

---
 .github/workflows/docker.yaml | 10 ++---
 .github/workflows/track.yml   | 12 +++---
 app/models/models.py          | 14 +++----
 app/server/chat.py            | 77 +++++++++++++++++++++--------------
 app/services/client.py        |  4 +-
 app/utils/helper.py           | 10 ++++-
 6 files changed, 75 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 4527f3d..eef2a41 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -5,11 +5,11 @@ on:
     branches:
       - main
     tags:
-      - 'v*'
+      - "v*"
     paths-ignore:
-      - '**/*.md'
-      - '.github/workflows/ruff.yaml'
-      - '.github/workflows/track.yml'
+      - "**/*.md"
+      - ".github/workflows/ruff.yaml"
+      - ".github/workflows/track.yml"
 
 env:
   REGISTRY: ghcr.io
@@ -57,4 +57,4 @@ jobs:
           labels: ${{ steps.meta.outputs.labels }}
           platforms: linux/amd64,linux/arm64
           cache-from: type=gha
-          cache-to: type=gha,mode=max
\ No newline at end of file
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/track.yml b/.github/workflows/track.yml
index 63afbec..838dcf8 100644
--- a/.github/workflows/track.yml
+++ b/.github/workflows/track.yml
@@ -2,7 +2,7 @@ name: Update gemini-webapi
 
 on:
   schedule:
-    - cron: '0 0 * * *' # Runs every day at midnight
+    - cron: "0 0 * * *" # Runs every day at midnight
   workflow_dispatch:
 
 jobs:
@@ -24,7 +24,7 @@ jobs:
         run: |
           # Install dependencies first to enable uv pip show
           uv sync
-          
+
           # Get current version of gemini-webapi before upgrade
           OLD_VERSION=$(uv pip show gemini-webapi 2>/dev/null | grep ^Version: | awk '{print $2}')
           if [ -z "$OLD_VERSION" ]; then
@@ -32,10 +32,10 @@ jobs:
             exit 1
           fi
           echo "Current gemini-webapi version: $OLD_VERSION"
-          
+
           # Update the package using uv, which handles pyproject.toml and uv.lock
           uv add --upgrade gemini-webapi
-          
+
           # Get new version of gemini-webapi after upgrade
           NEW_VERSION=$(uv pip show gemini-webapi | grep ^Version: | awk '{print $2}')
           if [ -z "$NEW_VERSION" ]; then
@@ -43,7 +43,7 @@ jobs:
             exit 1
           fi
           echo "New gemini-webapi version: $NEW_VERSION"
-          
+
           # Only proceed if gemini-webapi version has changed
           if [ "$OLD_VERSION" != "$NEW_VERSION" ]; then
             echo "gemini-webapi has been updated from $OLD_VERSION to $NEW_VERSION"
@@ -63,7 +63,7 @@ jobs:
           title: ":arrow_up: update gemini-webapi to ${{ steps.update.outputs.version }}"
           body: |
             Update `gemini-webapi` to version `${{ steps.update.outputs.version }}`.
-            
+
             Auto-generated by GitHub Actions using `uv`.
           branch: update-gemini-webapi
           base: main
diff --git a/app/models/models.py b/app/models/models.py
index 3991f12..74d8cd5 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -154,11 +154,13 @@ class ConversationInStore(BaseModel):
 class ResponseInputContent(BaseModel):
     """Content item for Responses API input."""
 
-    type: Literal["input_text", "input_image"]
+    type: Literal["input_text", "input_image", "input_file"]
     text: Optional[str] = None
     image_url: Optional[str] = None
-    image_base64: Optional[str] = None
-    mime_type: Optional[str] = None
+    detail: Optional[Literal["auto", "low", "high"]] = None
+    file_url: Optional[str] = None
+    file_data: Optional[str] = None
+    filename: Optional[str] = None
 
 
 class ResponseInputItem(BaseModel):
@@ -212,12 +214,8 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text", "output_image"]
+    type: Literal["output_text"]
     text: Optional[str] = None
-    image_base64: Optional[str] = None
-    mime_type: Optional[str] = None
-    width: Optional[int] = None
-    height: Optional[int] = None
 
 
 class ResponseOutputMessage(BaseModel):
diff --git a/app/server/chat.py b/app/server/chat.py
index b4e88da..76dc632 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -381,14 +381,6 @@ def _strip_tagged_blocks(text: str) -> str:
     return "".join(result)
 
 
-def _ensure_data_url(part: ResponseInputContent) -> str | None:
-    image_url = part.image_url
-    if not image_url and part.image_base64:
-        mime_type = part.mime_type or "image/png"
-        image_url = f"data:{mime_type};base64,{part.image_base64}"
-    return image_url
-
-
 def _response_items_to_messages(
     items: str | list[ResponseInputItem],
 ) -> tuple[list[Message], str | list[ResponseInputItem]]:
@@ -422,14 +414,34 @@ def _response_items_to_messages(
                     if text_value:
                         converted.append(ContentItem(type="text", text=text_value))
                 elif part.type == "input_image":
-                    image_url = _ensure_data_url(part)
+                    image_url = part.image_url
                     if image_url:
                         normalized_contents.append(
-                            ResponseInputContent(type="input_image", image_url=image_url)
+                            ResponseInputContent(
+                                type="input_image",
+                                image_url=image_url,
+                                detail=part.detail if part.detail else "auto",
+                            )
                         )
                         converted.append(
-                            ContentItem(type="image_url", image_url={"url": image_url})
+                            ContentItem(
+                                type="image_url",
+                                image_url={
+                                    "url": image_url,
+                                    "detail": part.detail if part.detail else "auto",
+                                },
+                            )
                         )
+                elif part.type == "input_file":
+                    if part.file_url or part.file_data:
+                        normalized_contents.append(part)
+                        file_info = {}
+                        if part.file_data:
+                            file_info["file_data"] = part.file_data
+                            file_info["filename"] = part.filename
+                        if part.file_url:
+                            file_info["url"] = part.file_url
+                        converted.append(ContentItem(type="file", file=file_info))
             messages.append(Message(role=role, content=converted or None))
 
         normalized_input.append(
@@ -472,11 +484,26 @@ def _instructions_to_messages(
                     if text_value:
                         converted.append(ContentItem(type="text", text=text_value))
                 elif part.type == "input_image":
-                    image_url = _ensure_data_url(part)
+                    image_url = part.image_url
                     if image_url:
                         converted.append(
-                            ContentItem(type="image_url", image_url={"url": image_url})
+                            ContentItem(
+                                type="image_url",
+                                image_url={
+                                    "url": image_url,
+                                    "detail": part.detail if part.detail else "auto",
+                                },
+                            )
                         )
+                elif part.type == "input_file":
+                    file_info = {}
+                    if part.file_data:
+                        file_info["file_data"] = part.file_data
+                        file_info["filename"] = part.filename
+                    if part.file_url:
+                        file_info["url"] = part.file_url
+                    if file_info:
+                        converted.append(ContentItem(type="file", file=file_info))
             instruction_messages.append(Message(role=role, content=converted or None))
 
     return instruction_messages
@@ -799,13 +826,13 @@ async def create_response(
     session, client, remaining_messages = await _find_reusable_session(db, pool, model, messages)
 
     async def _build_payload(
-        payload_messages: list[Message], reuse_session: bool
+        _payload_messages: list[Message], _reuse_session: bool
     ) -> tuple[str, list[Path | str]]:
-        if reuse_session and len(payload_messages) == 1:
+        if _reuse_session and len(_payload_messages) == 1:
             return await GeminiClientWrapper.process_message(
-                payload_messages[0], tmp_dir, tagged=False
+                _payload_messages[0], tmp_dir, tagged=False
             )
-        return await GeminiClientWrapper.process_conversation(payload_messages, tmp_dir)
+        return await GeminiClientWrapper.process_conversation(_payload_messages, tmp_dir)
 
     reuse_session = session is not None
     if reuse_session:
@@ -821,7 +848,7 @@ async def _build_payload(
                 detail="No new messages to send for the existing session.",
             )
         payload_messages = messages_to_send
-        model_input, files = await _build_payload(payload_messages, reuse_session=True)
+        model_input, files = await _build_payload(payload_messages, _reuse_session=True)
         logger.debug(
             f"Reused session {session.metadata} - sending {len(payload_messages)} prepared messages."
         )
@@ -830,7 +857,7 @@ async def _build_payload(
             client = await pool.acquire()
             session = client.start_chat(model=model)
             payload_messages = messages
-            model_input, files = await _build_payload(payload_messages, reuse_session=False)
+            model_input, files = await _build_payload(payload_messages, _reuse_session=False)
         except ValueError as e:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
         except RuntimeError as e:
@@ -935,7 +962,6 @@ async def _build_payload(
             detail = f"{detail} Assistant response: {summary}"
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
 
-    image_contents: list[ResponseOutputContent] = []
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
@@ -943,16 +969,6 @@ async def _build_payload(
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
-        mime_type = "image/png" if isinstance(image, GeneratedImage) else "image/jpeg"
-        image_contents.append(
-            ResponseOutputContent(
-                type="output_image",
-                image_base64=image_base64,
-                mime_type=mime_type,
-                width=width,
-                height=height,
-            )
-        )
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
@@ -977,7 +993,6 @@ async def _build_payload(
     response_contents: list[ResponseOutputContent] = []
     if assistant_text:
         response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
-    response_contents.extend(image_contents)
 
     if not response_contents:
         response_contents.append(ResponseOutputContent(type="output_text", text=""))
diff --git a/app/services/client.py b/app/services/client.py
index 166eb70..0207114 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -113,8 +113,10 @@ async def process_message(
                     if file_data := item.file.get("file_data", None):
                         filename = item.file.get("filename", "")
                         files.append(await save_file_to_tempfile(file_data, filename, tempdir))
+                    elif url := item.file.get("url", None):
+                        files.append(await save_url_to_tempfile(url, tempdir))
                     else:
-                        raise ValueError("File must contain 'file_data' key")
+                        raise ValueError("File must contain 'file_data' or 'url' key")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 48fc99d..3bff469 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,4 +1,5 @@
 import base64
+import mimetypes
 import tempfile
 from pathlib import Path
 
@@ -40,9 +41,16 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
     suffix: str | None = None
     if url.startswith("data:image/"):
         # Base64 encoded image
+        metadata_part = url.split(",")[0]
+        mime_type = metadata_part.split(":")[1].split(";")[0]
+
         base64_data = url.split(",")[1]
         data = base64.b64decode(base64_data)
-        suffix = ".png"
+
+        # Guess extension from mime type, default to the subtype if not found
+        suffix = mimetypes.guess_extension(mime_type)
+        if not suffix:
+            suffix = f".{mime_type.split('/')[1]}"
     else:
         # http files
         async with httpx.AsyncClient() as client:

From d911c33e81e83211ed53d77b300c4c203df7b53c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 15:50:45 +0700
Subject: [PATCH 008/236] Refactor tool handling to support standard and image
 generation tools separately

---
 app/models/models.py |  7 ++++---
 app/server/chat.py   | 36 +++++++++++++++++++++++++++++++++---
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 74d8cd5..52dd414 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -174,7 +174,8 @@ class ResponseInputItem(BaseModel):
 class ResponseToolChoice(BaseModel):
     """Tool choice enforcing a specific tool in Responses API."""
 
-    type: Literal["image_generation"]
+    type: Literal["function", "image_generation"]
+    function: Optional[ToolChoiceFunctionDetail] = None
 
 
 class ResponseImageTool(BaseModel):
@@ -195,8 +196,8 @@ class ResponseCreateRequest(BaseModel):
     top_p: Optional[float] = 1.0
     max_output_tokens: Optional[int] = None
     stream: Optional[bool] = False
-    tool_choice: Optional[ResponseToolChoice] = None
-    tools: Optional[List[ResponseImageTool]] = None
+    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
+    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
     store: Optional[bool] = None
     user: Optional[str] = None
     response_format: Optional[Dict[str, Any]] = None
diff --git a/app/server/chat.py b/app/server/chat.py
index 76dc632..8277d0c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -795,7 +795,28 @@ async def create_response(
             f"Structured response requested for /v1/responses (schema={structured_requirement.schema_name})."
         )
 
-    image_instruction = _build_image_generation_instruction(request.tools, request.tool_choice)
+    # Separate standard tools from image generation tools
+    standard_tools: list[Tool] = []
+    image_tools: list[ResponseImageTool] = []
+
+    if request.tools:
+        for t in request.tools:
+            if isinstance(t, Tool):
+                standard_tools.append(t)
+            elif isinstance(t, ResponseImageTool):
+                image_tools.append(t)
+            # Handle dicts if Pydantic didn't convert them fully (fallback)
+            elif isinstance(t, dict):
+                t_type = t.get("type")
+                if t_type == "function":
+                    standard_tools.append(Tool.model_validate(t))
+                elif t_type == "image_generation":
+                    image_tools.append(ResponseImageTool.model_validate(t))
+
+    image_instruction = _build_image_generation_instruction(
+        image_tools,
+        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
+    )
     if image_instruction:
         extra_instructions.append(image_instruction)
         logger.debug("Image generation support enabled for /v1/responses request.")
@@ -808,10 +829,19 @@ async def create_response(
             f"Injected {len(preface_messages)} instruction messages before sending to Gemini."
         )
 
+    # Pass standard tools to the prompt builder
+    # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction)
+    model_tool_choice = None
+    if isinstance(request.tool_choice, str):
+        model_tool_choice = request.tool_choice
+    elif isinstance(request.tool_choice, ToolChoiceFunction):
+        model_tool_choice = request.tool_choice
+    # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice.
+
     messages = _prepare_messages_for_model(
         conversation_messages,
-        tools=None,
-        tool_choice=None,
+        tools=standard_tools or None,
+        tool_choice=model_tool_choice,
         extra_instructions=extra_instructions or None,
     )
 

From a8241ad78831b675d0321bbe5271c1bf10a6ce2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 2 Dec 2025 17:17:27 +0700
Subject: [PATCH 009/236] Fix: use "ascii" decoding for base64-encoded image
 data consistency

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 8277d0c..67790ab 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1524,4 +1524,4 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
 
     data = Path(saved_path).read_bytes()
     width, height = _extract_image_dimensions(data)
-    return base64.b64encode(data).decode("utf-8"), width, height
+    return base64.b64encode(data).decode("ascii"), width, height

From fd2723d49b5929cb770a231aeb479f392f7a7d53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 12:08:19 +0700
Subject: [PATCH 010/236] Fix: replace `running` with `_running` for internal
 client status checks

---
 app/services/pool.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/services/pool.py b/app/services/pool.py
index 24a21dc..28a3435 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -37,7 +37,7 @@ async def init(self) -> None:
         """Initialize all clients in the pool."""
         success_count = 0
         for client in self._clients:
-            if not client.running:
+            if not client._running:
                 try:
                     await client.init(
                         timeout=g_config.gemini.timeout,
@@ -48,7 +48,7 @@ async def init(self) -> None:
                 except Exception:
                     logger.exception(f"Failed to initialize client {client.id}")
 
-            if client.running:
+            if client._running:
                 success_count += 1
 
         if success_count == 0:
@@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
 
     async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
         """Make sure the client is running, attempting a restart if needed."""
-        if client.running:
+        if client._running:
             return True
 
         lock = self._restart_locks.get(client.id)
@@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
             return False  # Should not happen
 
         async with lock:
-            if client.running:
+            if client._running:
                 return True
 
             try:
@@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]:
 
     def status(self) -> Dict[str, bool]:
         """Return running status for each client."""
-        return {client.id: client.running for client in self._clients}
+        return {client.id: client._running for client in self._clients}

From 8ee6cc0335e4b63df2126a6bf69d6c9e42505485 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 14:10:06 +0700
Subject: [PATCH 011/236] Refactor: replace direct `_running` access with
 `running()` method in client status checks

---
 app/services/client.py |  3 +++
 app/services/pool.py   | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 0207114..09c52c1 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -76,6 +76,9 @@ async def init(
             logger.exception(f"Failed to initialize GeminiClient {self.id}")
             raise
 
+    def running(self) -> bool:
+        return self._running
+
     @staticmethod
     async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True
diff --git a/app/services/pool.py b/app/services/pool.py
index 28a3435..a134dda 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -37,7 +37,7 @@ async def init(self) -> None:
         """Initialize all clients in the pool."""
         success_count = 0
         for client in self._clients:
-            if not client._running:
+            if not client.running():
                 try:
                     await client.init(
                         timeout=g_config.gemini.timeout,
@@ -48,7 +48,7 @@ async def init(self) -> None:
                 except Exception:
                     logger.exception(f"Failed to initialize client {client.id}")
 
-            if client._running:
+            if client.running():
                 success_count += 1
 
         if success_count == 0:
@@ -79,7 +79,7 @@ async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
 
     async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
         """Make sure the client is running, attempting a restart if needed."""
-        if client._running:
+        if client.running():
             return True
 
         lock = self._restart_locks.get(client.id)
@@ -87,7 +87,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
             return False  # Should not happen
 
         async with lock:
-            if client._running:
+            if client.running():
                 return True
 
             try:
@@ -110,4 +110,4 @@ def clients(self) -> List[GeminiClientWrapper]:
 
     def status(self) -> Dict[str, bool]:
         """Return running status for each client."""
-        return {client.id: client._running for client in self._clients}
+        return {client.id: client.running() for client in self._clients}

From 453700eba682cfdd4bfc2e061a8139129654d017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 22:11:11 +0700
Subject: [PATCH 012/236] Extend models with new fields for annotations,
 reasoning, audio, log probabilities, and token details; adjust response
 handling accordingly.

---
 app/models/models.py | 13 ++++++++++++-
 app/server/chat.py   |  7 ++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 52dd414..1d7368c 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -12,7 +12,9 @@ class ContentItem(BaseModel):
     type: Literal["text", "image_url", "file", "input_audio"]
     text: Optional[str] = None
     image_url: Optional[Dict[str, str]] = None
+    input_audio: Optional[Dict[str, Any]] = None
     file: Optional[Dict[str, str]] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class Message(BaseModel):
@@ -22,6 +24,10 @@ class Message(BaseModel):
     content: Union[str, List[ContentItem], None] = None
     name: Optional[str] = None
     tool_calls: Optional[List["ToolCall"]] = None
+    refusal: Optional[str] = None
+    reasoning_content: Optional[str] = None
+    audio: Optional[Dict[str, Any]] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class Choice(BaseModel):
@@ -30,6 +36,7 @@ class Choice(BaseModel):
     index: int
     message: Message
     finish_reason: str
+    logprobs: Optional[Dict[str, Any]] = None
 
 
 class FunctionCall(BaseModel):
@@ -81,6 +88,8 @@ class Usage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
+    prompt_tokens_details: Optional[Dict[str, int]] = None
+    completion_tokens_details: Optional[Dict[str, int]] = None
 
 
 class ModelData(BaseModel):
@@ -118,6 +127,8 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
+    system_fingerprint: Optional[str] = None
+    service_tier: Optional[str] = None
 
 
 class ModelListResponse(BaseModel):
@@ -217,6 +228,7 @@ class ResponseOutputContent(BaseModel):
 
     type: Literal["output_text"]
     text: Optional[str] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
 class ResponseOutputMessage(BaseModel):
@@ -257,7 +269,6 @@ class ResponseCreateResponse(BaseModel):
     created: int
     model: str
     output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]]
-    output_text: Optional[str] = None
     status: Literal[
         "in_progress",
         "completed",
diff --git a/app/server/chat.py b/app/server/chat.py
index 67790ab..5848a39 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1022,10 +1022,12 @@ async def _build_payload(
 
     response_contents: list[ResponseOutputContent] = []
     if assistant_text:
-        response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+        response_contents.append(
+            ResponseOutputContent(type="output_text", text=assistant_text, annotations=[])
+        )
 
     if not response_contents:
-        response_contents.append(ResponseOutputContent(type="output_text", text=""))
+        response_contents.append(ResponseOutputContent(type="output_text", text="", annotations=[]))
 
     created_time = int(datetime.now(tz=timezone.utc).timestamp())
     response_id = f"resp_{uuid.uuid4().hex}"
@@ -1059,7 +1061,6 @@ async def _build_payload(
             *tool_call_items,
             *image_call_items,
         ],
-        output_text=assistant_text or None,
         status="completed",
         usage=usage,
         input=normalized_input or None,

From 9260f8b5cc37192716d4127ed6ab98a087e7e3ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 3 Dec 2025 22:51:54 +0700
Subject: [PATCH 013/236] Extend models with new fields (annotations, error),
 add `normalize_output_text` validator, rename `created` to `created_at`, and
 update response handling accordingly.

---
 app/models/models.py | 16 +++++++++++++---
 app/server/chat.py   |  8 ++++----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 1d7368c..8d5102c 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -3,7 +3,7 @@
 from datetime import datetime
 from typing import Any, Dict, List, Literal, Optional, Union
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 
 
 class ContentItem(BaseModel):
@@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
-    system_fingerprint: Optional[str] = None
     service_tier: Optional[str] = None
 
 
@@ -172,6 +171,15 @@ class ResponseInputContent(BaseModel):
     file_url: Optional[str] = None
     file_data: Optional[str] = None
     filename: Optional[str] = None
+    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+
+    @model_validator(mode="before")
+    @classmethod
+    def normalize_output_text(cls, data: Any) -> Any:
+        """Allow output_text (from previous turns) to be treated as input_text."""
+        if isinstance(data, dict) and data.get("type") == "output_text":
+            data["type"] = "input_text"
+        return data
 
 
 class ResponseInputItem(BaseModel):
@@ -266,7 +274,7 @@ class ResponseCreateResponse(BaseModel):
 
     id: str
     object: Literal["response"] = "response"
-    created: int
+    created_at: int
     model: str
     output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]]
     status: Literal[
@@ -274,9 +282,11 @@ class ResponseCreateResponse(BaseModel):
         "completed",
         "failed",
         "incomplete",
+        "cancelled",
         "requires_action",
     ] = "completed"
     usage: ResponseUsage
+    error: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
     system_fingerprint: Optional[str] = None
     input: Optional[Union[str, List[ResponseInputItem]]] = None
diff --git a/app/server/chat.py b/app/server/chat.py
index 5848a39..ef508b9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1049,7 +1049,7 @@ async def _build_payload(
 
     response_payload = ResponseCreateResponse(
         id=response_id,
-        created=created_time,
+        created_at=created_time,
         model=request.model,
         output=[
             ResponseOutputMessage(
@@ -1334,7 +1334,7 @@ def _create_responses_streaming_response(
 
     response_dict = response_payload.model_dump(mode="json")
     response_id = response_payload.id
-    created_time = response_payload.created
+    created_time = response_payload.created_at
     model = response_payload.model
 
     logger.debug(
@@ -1344,14 +1344,14 @@ def _create_responses_streaming_response(
     base_event = {
         "id": response_id,
         "object": "response",
-        "created": created_time,
+        "created_at": created_time,
         "model": model,
     }
 
     created_snapshot: dict[str, Any] = {
         "id": response_id,
         "object": "response",
-        "created": created_time,
+        "created_at": created_time,
         "model": model,
         "status": "in_progress",
     }

From d6a8e6bdb786bb90dd653cd9aa3fc88469c2b505 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 09:35:30 +0700
Subject: [PATCH 014/236] Extend response models to support tool choices, image
 output, and improved streaming of response items. Refactor image generation
 handling for consistency and add compatibility with output content.

---
 app/models/models.py |  7 ++--
 app/server/chat.py   | 83 ++++++++++++++++++++++++++++++++------------
 2 files changed, 65 insertions(+), 25 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 8d5102c..bbc2140 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -127,7 +127,6 @@ class ChatCompletionResponse(BaseModel):
     model: str
     choices: List[Choice]
     usage: Usage
-    service_tier: Optional[str] = None
 
 
 class ModelListResponse(BaseModel):
@@ -234,8 +233,9 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text"]
+    type: Literal["output_text", "output_image"]
     text: Optional[str] = None
+    image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
@@ -285,10 +285,11 @@ class ResponseCreateResponse(BaseModel):
         "cancelled",
         "requires_action",
     ] = "completed"
+    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
+    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
     usage: ResponseUsage
     error: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
-    system_fingerprint: Optional[str] = None
     input: Optional[Union[str, List[ResponseInputItem]]] = None
 
 
diff --git a/app/server/chat.py b/app/server/chat.py
index ef508b9..cb498a5 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -992,6 +992,7 @@ async def _build_payload(
             detail = f"{detail} Assistant response: {summary}"
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
 
+    response_contents: list[ResponseOutputContent] = []
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
@@ -999,15 +1000,25 @@ async def _build_payload(
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
+
+        img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
                 status="completed",
                 result=image_base64,
-                output_format="png" if isinstance(image, GeneratedImage) else "jpeg",
+                output_format=img_format,
                 size=f"{width}x{height}" if width and height else None,
             )
         )
+        # Add as output_image content for compatibility
+        response_contents.append(
+            ResponseOutputContent(
+                type="output_image",
+                image_url=f"data:image/{img_format};base64,{image_base64}",
+                annotations=[],
+            )
+        )
 
     tool_call_items: list[ResponseToolCall] = []
     if detected_tool_calls:
@@ -1020,7 +1031,6 @@ async def _build_payload(
             for call in detected_tool_calls
         ]
 
-    response_contents: list[ResponseOutputContent] = []
     if assistant_text:
         response_contents.append(
             ResponseOutputContent(type="output_text", text=assistant_text, annotations=[])
@@ -1065,6 +1075,8 @@ async def _build_payload(
         usage=usage,
         input=normalized_input or None,
         metadata=request.metadata or None,
+        tools=request.tools,
+        tool_choice=request.tool_choice,
     )
 
     try:
@@ -1359,6 +1371,10 @@ def _create_responses_streaming_response(
         created_snapshot["metadata"] = response_dict["metadata"]
     if response_dict.get("input") is not None:
         created_snapshot["input"] = response_dict["input"]
+    if response_dict.get("tools") is not None:
+        created_snapshot["tools"] = response_dict["tools"]
+    if response_dict.get("tool_choice") is not None:
+        created_snapshot["tool_choice"] = response_dict["tool_choice"]
 
     async def generate_stream():
         # Emit creation event
@@ -1369,30 +1385,53 @@ async def generate_stream():
         }
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        # Stream textual content, if any
-        if assistant_text:
-            for chunk in _iter_stream_segments(assistant_text):
-                delta_event = {
-                    **base_event,
-                    "type": "response.output_text.delta",
-                    "output_index": 0,
-                    "delta": chunk,
-                }
-                yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n"
-
-            done_event = {
+        # Stream output items (Message/Text, Tool Calls, Images)
+        for i, item in enumerate(response_payload.output):
+            item_json = item.model_dump(mode="json", exclude_none=True)
+
+            added_event = {
                 **base_event,
-                "type": "response.output_text.done",
-                "output_index": 0,
+                "type": "response.output_item.added",
+                "output_index": i,
+                "item": item_json,
             }
-            yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
-        else:
-            done_event = {
+            yield f"data: {orjson.dumps(added_event).decode('utf-8')}\n\n"
+
+            # 2. Stream content if it's a message (text)
+            if item.type == "message":
+                content_text = ""
+                # Aggregate text content to stream
+                for c in item.content:
+                    if c.type == "output_text" and c.text:
+                        content_text += c.text
+
+                if content_text:
+                    for chunk in _iter_stream_segments(content_text):
+                        delta_event = {
+                            **base_event,
+                            "type": "response.output_text.delta",
+                            "output_index": i,
+                            "delta": chunk,
+                        }
+                        yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n"
+
+                    # Text done
+                    done_event = {
+                        **base_event,
+                        "type": "response.output_text.done",
+                        "output_index": i,
+                    }
+                    yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
+
+            # 3. Emit output_item.done for all types
+            # This confirms the item is fully transferred.
+            item_done_event = {
                 **base_event,
-                "type": "response.output_text.done",
-                "output_index": 0,
+                "type": "response.output_item.done",
+                "output_index": i,
+                "item": item_json,
             }
-            yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps(item_done_event).decode('utf-8')}\n\n"
 
         # Emit completed event with full payload
         completed_event = {

From 16435a2ce12a4d37e9f3cfa758f384000aa41123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 09:50:47 +0700
Subject: [PATCH 015/236] Set default `text` value to an empty string for
 `ResponseOutputContent` and ensure consistent initialization in image output
 handling.

---
 app/models/models.py | 2 +-
 app/server/chat.py   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/models/models.py b/app/models/models.py
index bbc2140..2c987b8 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -234,7 +234,7 @@ class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
     type: Literal["output_text", "output_image"]
-    text: Optional[str] = None
+    text: Optional[str] = ""
     image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
diff --git a/app/server/chat.py b/app/server/chat.py
index cb498a5..7745a26 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1015,6 +1015,7 @@ async def _build_payload(
         response_contents.append(
             ResponseOutputContent(
                 type="output_image",
+                text="",
                 image_url=f"data:image/{img_format};base64,{image_base64}",
                 annotations=[],
             )

From fc99c2d60193f346006f5cf17af4e849d8ea2669 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:03:50 +0700
Subject: [PATCH 016/236] feat: Add /images endpoint with dedicated router and
 improved image management

Add dedicated router for /images endpoint and refactor image handling logic for better modularity. Enhance temporary image management with secure naming, token verification, and cleanup functionality.
---
 app/main.py              |  9 +++++-
 app/server/chat.py       | 35 ++++++++++++++++--------
 app/server/images.py     | 15 ++++++++++
 app/server/middleware.py | 59 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 105 insertions(+), 13 deletions(-)
 create mode 100644 app/server/images.py

diff --git a/app/main.py b/app/main.py
index 95458d3..c215e2a 100644
--- a/app/main.py
+++ b/app/main.py
@@ -6,7 +6,12 @@
 
 from .server.chat import router as chat_router
 from .server.health import router as health_router
-from .server.middleware import add_cors_middleware, add_exception_handler
+from .server.images import router as images_router
+from .server.middleware import (
+    add_cors_middleware,
+    add_exception_handler,
+    cleanup_expired_images,
+)
 from .services import GeminiClientPool, LMDBConversationStore
 
 RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60  # 6 hours
@@ -28,6 +33,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None:
     while not stop_event.is_set():
         try:
             store.cleanup_expired()
+            cleanup_expired_images(store.retention_days)
         except Exception:
             logger.exception("LMDB retention cleanup task failed.")
 
@@ -93,5 +99,6 @@ def create_app() -> FastAPI:
 
     app.include_router(health_router, tags=["Health"])
     app.include_router(chat_router, tags=["Chat"])
+    app.include_router(images_router, tags=["Images"])
 
     return app
diff --git a/app/server/chat.py b/app/server/chat.py
index 7745a26..db92dbc 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -44,7 +44,7 @@
 from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
 from ..utils.helper import estimate_tokens
-from .middleware import get_temp_dir, verify_api_key
+from .middleware import get_image_store_dir, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
@@ -588,6 +588,7 @@ async def create_chat_completion(
     request: ChatCompletionRequest,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
 ):
     pool = GeminiClientPool()
     db = LMDBConversationStore()
@@ -775,6 +776,7 @@ async def create_response(
     request: ResponseCreateRequest,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
 ):
     base_messages, normalized_input = _response_items_to_messages(request.input)
     if not base_messages:
@@ -996,12 +998,16 @@ async def _build_payload(
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
-            image_base64, width, height = await _image_to_base64(image, tmp_dir)
+            image_base64, width, height, filename = await _image_to_base64(image, tmp_dir)
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue
 
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+
+        # Use static URL for compatibility
+        image_url = f"{request.base_url}images/{filename}"
+
         image_call_items.append(
             ResponseImageGenerationCall(
                 id=f"img_{uuid.uuid4().hex}",
@@ -1013,12 +1019,10 @@ async def _build_payload(
         )
         # Add as output_image content for compatibility
         response_contents.append(
-            ResponseOutputContent(
-                type="output_image",
-                text="",
-                image_url=f"data:image/{img_format};base64,{image_base64}",
-                annotations=[],
-            )
+            ResponseOutputContent(type="output_text", text=image_url, annotations=[])
+        )
+        response_contents.append(
+            ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[])
         )
 
     tool_call_items: list[ResponseToolCall] = []
@@ -1553,8 +1557,8 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
     return None, None
 
 
-async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None]:
-    """Persist an image provided by gemini_webapi and return base64 plus dimensions."""
+async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
+    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
         saved_path = await image.save(path=str(temp_dir), full_size=True)
     else:
@@ -1563,6 +1567,13 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
     if not saved_path:
         raise ValueError("Failed to save generated image")
 
-    data = Path(saved_path).read_bytes()
+    # Rename file to a random UUID to ensure uniqueness and unpredictability
+    original_path = Path(saved_path)
+    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
+    new_path = temp_dir / random_name
+    original_path.rename(new_path)
+
+    data = new_path.read_bytes()
     width, height = _extract_image_dimensions(data)
-    return base64.b64encode(data).decode("ascii"), width, height
+    filename = random_name
+    return base64.b64encode(data).decode("ascii"), width, height, filename
diff --git a/app/server/images.py b/app/server/images.py
new file mode 100644
index 0000000..2867239
--- /dev/null
+++ b/app/server/images.py
@@ -0,0 +1,15 @@
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+
+from ..server.middleware import get_image_store_dir
+
+router = APIRouter()
+
+
+@router.get("/images/{filename}", tags=["Images"])
+async def get_image(filename: str):
+    image_store = get_image_store_dir()
+    file_path = image_store / filename
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Image not found")
+    return FileResponse(file_path)
diff --git a/app/server/middleware.py b/app/server/middleware.py
index b12024f..60e4c8d 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -1,13 +1,72 @@
+import hashlib
+import hmac
 import tempfile
+import time
 from pathlib import Path
 
 from fastapi import Depends, FastAPI, HTTPException, Request, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import ORJSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from loguru import logger
 
 from ..utils import g_config
 
+# Persistent directory for storing generated images
+IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images"
+IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def get_image_store_dir() -> Path:
+    """Returns a persistent directory for storing images."""
+    return IMAGE_STORE_DIR
+
+
+def get_image_token(filename: str) -> str:
+    """Generate a HMAC-SHA256 token for a filename using the API key."""
+    secret = g_config.server.api_key
+    if not secret:
+        return ""
+
+    msg = filename.encode("utf-8")
+    secret_bytes = secret.encode("utf-8")
+    return hmac.new(secret_bytes, msg, hashlib.sha256).hexdigest()
+
+
+def verify_image_token(filename: str, token: str | None) -> bool:
+    """Verify the provided token against the filename."""
+    expected = get_image_token(filename)
+    if not expected:
+        return True  # No auth required
+    if not token:
+        return False
+    return hmac.compare_digest(token, expected)
+
+
+def cleanup_expired_images(retention_days: int) -> int:
+    """Delete images in IMAGE_STORE_DIR older than retention_days."""
+    if retention_days <= 0:
+        return 0
+
+    now = time.time()
+    retention_seconds = retention_days * 24 * 60 * 60
+    cutoff = now - retention_seconds
+
+    count = 0
+    for file_path in IMAGE_STORE_DIR.iterdir():
+        if not file_path.is_file():
+            continue
+        try:
+            if file_path.stat().st_mtime < cutoff:
+                file_path.unlink()
+                count += 1
+        except Exception as e:
+            logger.warning(f"Failed to delete expired image {file_path}: {e}")
+
+    if count > 0:
+        logger.info(f"Cleaned up {count} expired images.")
+    return count
+
 
 def global_exception_handler(request: Request, exc: Exception):
     if isinstance(exc, HTTPException):

From 28441765f3fa47787027620cdc4a6d9e7ddbdd94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:10:29 +0700
Subject: [PATCH 017/236] feat: Add token-based verification for image access

---
 app/server/chat.py   | 4 ++--
 app/server/images.py | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index db92dbc..9371137 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -44,7 +44,7 @@
 from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
 from ..utils.helper import estimate_tokens
-from .middleware import get_image_store_dir, get_temp_dir, verify_api_key
+from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
@@ -1006,7 +1006,7 @@ async def _build_payload(
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
 
         # Use static URL for compatibility
-        image_url = f"{request.base_url}images/{filename}"
+        image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}"
 
         image_call_items.append(
             ResponseImageGenerationCall(
diff --git a/app/server/images.py b/app/server/images.py
index 2867239..fe078f7 100644
--- a/app/server/images.py
+++ b/app/server/images.py
@@ -1,13 +1,16 @@
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, HTTPException, Query
 from fastapi.responses import FileResponse
 
-from ..server.middleware import get_image_store_dir
+from ..server.middleware import get_image_store_dir, verify_image_token
 
 router = APIRouter()
 
 
 @router.get("/images/{filename}", tags=["Images"])
-async def get_image(filename: str):
+async def get_image(filename: str, token: str | None = Query(default=None)):
+    if not verify_image_token(filename, token):
+        raise HTTPException(status_code=403, detail="Invalid token")
+
     image_store = get_image_store_dir()
     file_path = image_store / filename
     if not file_path.exists():

From 4509c14dfd5a38dfa6b989b3e9ac308e3bc8c982 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:22:07 +0700
Subject: [PATCH 018/236] Refactor: rename image store directory to
 `ai_generated_images` for clarity

---
 app/server/middleware.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/middleware.py b/app/server/middleware.py
index 60e4c8d..630e1f5 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -13,7 +13,7 @@
 from ..utils import g_config
 
 # Persistent directory for storing generated images
-IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "gemini_fastapi_images"
+IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images"
 IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
 
 

From 75e2f61d3a6b1d12269af2ee82344ab643f34e83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:31:15 +0700
Subject: [PATCH 019/236] fix: Update create_response to use FastAPI Request
 object for base_url and refactor variable handling

---
 app/server/chat.py | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 9371137..0010f4a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -9,7 +9,7 @@
 from typing import Any, Iterator
 
 import orjson
-from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
@@ -773,19 +773,15 @@ async def create_chat_completion(
 
 @router.post("/v1/responses")
 async def create_response(
-    request: ResponseCreateRequest,
+    request_data: ResponseCreateRequest,
+    request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
-    base_messages, normalized_input = _response_items_to_messages(request.input)
-    if not base_messages:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST, detail="No message input provided."
-        )
-
-    structured_requirement = _build_structured_requirement(request.response_format)
-    if structured_requirement and request.stream:
+    base_messages, normalized_input = _response_items_to_messages(request_data.input)
+    structured_requirement = _build_structured_requirement(request_data.response_format)
+    if structured_requirement and request_data.stream:
         logger.debug(
             "Structured response requested with streaming enabled; streaming not supported for Responses."
         )
@@ -801,7 +797,7 @@ async def create_response(
     standard_tools: list[Tool] = []
     image_tools: list[ResponseImageTool] = []
 
-    if request.tools:
+    if request_data.tools:
         for t in request.tools:
             if isinstance(t, Tool):
                 standard_tools.append(t)
@@ -817,13 +813,15 @@ async def create_response(
 
     image_instruction = _build_image_generation_instruction(
         image_tools,
-        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
+        request_data.tool_choice
+        if isinstance(request_data.tool_choice, ResponseToolChoice)
+        else None,
     )
     if image_instruction:
         extra_instructions.append(image_instruction)
         logger.debug("Image generation support enabled for /v1/responses request.")
 
-    preface_messages = _instructions_to_messages(request.instructions)
+    preface_messages = _instructions_to_messages(request_data.instructions)
     conversation_messages = base_messages
     if preface_messages:
         conversation_messages = [*preface_messages, *base_messages]
@@ -834,10 +832,10 @@ async def create_response(
     # Pass standard tools to the prompt builder
     # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction)
     model_tool_choice = None
-    if isinstance(request.tool_choice, str):
-        model_tool_choice = request.tool_choice
-    elif isinstance(request.tool_choice, ToolChoiceFunction):
-        model_tool_choice = request.tool_choice
+    if isinstance(request_data.tool_choice, str):
+        model_tool_choice = request_data.tool_choice
+    elif isinstance(request_data.tool_choice, ToolChoiceFunction):
+        model_tool_choice = request_data.tool_choice
     # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice.
 
     messages = _prepare_messages_for_model(
@@ -851,7 +849,7 @@ async def create_response(
     db = LMDBConversationStore()
 
     try:
-        model = Model.from_name(request.model)
+        model = Model.from_name(request_data.model)
     except ValueError as exc:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
@@ -971,7 +969,7 @@ async def _build_payload(
         )
 
     expects_image = (
-        request.tool_choice is not None and request.tool_choice.type == "image_generation"
+        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
     )
     images = model_output.images or []
     logger.debug(
@@ -1065,7 +1063,7 @@ async def _build_payload(
     response_payload = ResponseCreateResponse(
         id=response_id,
         created_at=created_time,
-        model=request.model,
+        model=request_data.model,
         output=[
             ResponseOutputMessage(
                 id=message_id,
@@ -1079,9 +1077,9 @@ async def _build_payload(
         status="completed",
         usage=usage,
         input=normalized_input or None,
-        metadata=request.metadata or None,
-        tools=request.tools,
-        tool_choice=request.tool_choice,
+        metadata=request_data.metadata or None,
+        tools=request_data.tools,
+        tool_choice=request_data.tool_choice,
     )
 
     try:

From bde6d0d146fc9088df947cfc0958dc88963e93ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:35:44 +0700
Subject: [PATCH 020/236] fix: Correct attribute access in request_data
 handling within `chat.py` for tools, tool_choice, and streaming settings

---
 app/server/chat.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0010f4a..9a3f19f 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -798,7 +798,7 @@ async def create_response(
     image_tools: list[ResponseImageTool] = []
 
     if request_data.tools:
-        for t in request.tools:
+        for t in request_data.tools:
             if isinstance(t, Tool):
                 standard_tools.append(t)
             elif isinstance(t, ResponseImageTool):
@@ -984,7 +984,7 @@ async def _build_payload(
                 summary = f"{summary[:197]}..."
         logger.warning(
             "Image generation requested but Gemini produced no images. "
-            f"client_id={client_id}, forced_tool_choice={request.tool_choice is not None}, "
+            f"client_id={client_id}, forced_tool_choice={request_data.tool_choice is not None}, "
             f"instruction_applied={bool(image_instruction)}, assistant_preview='{summary}'"
         )
         detail = "LLM returned no images for the requested image_generation tool."
@@ -1100,7 +1100,7 @@ async def _build_payload(
     except Exception as exc:
         logger.warning(f"Failed to save Responses conversation to LMDB: {exc}")
 
-    if request.stream:
+    if request_data.stream:
         logger.debug(
             f"Streaming Responses API payload (response_id={response_payload.id}, text_chunks={bool(assistant_text)})."
         )

From 601451a8dbf8cf689a482fd75cda399b5e815cd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:45:49 +0700
Subject: [PATCH 021/236] fix: Save generated images to persistent storage

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 9a3f19f..4246c53 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -996,7 +996,7 @@ async def _build_payload(
     image_call_items: list[ResponseImageGenerationCall] = []
     for image in images:
         try:
-            image_base64, width, height, filename = await _image_to_base64(image, tmp_dir)
+            image_base64, width, height, filename = await _image_to_base64(image, image_store)
         except Exception as exc:
             logger.warning(f"Failed to download generated image: {exc}")
             continue

From 893eb6d47305f60c4b13896bfc48beb89909dd88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 11:51:42 +0700
Subject: [PATCH 022/236] fix: Remove unused `output_image` type from
 `ResponseOutputContent` and update response handling for consistency

---
 app/models/models.py | 3 +--
 app/server/chat.py   | 5 +----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 2c987b8..c27e024 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -233,9 +233,8 @@ class ResponseUsage(BaseModel):
 class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
-    type: Literal["output_text", "output_image"]
+    type: Literal["output_text"]
     text: Optional[str] = ""
-    image_url: Optional[str] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
 
diff --git a/app/server/chat.py b/app/server/chat.py
index 4246c53..3396df0 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1015,13 +1015,10 @@ async def _build_payload(
                 size=f"{width}x{height}" if width and height else None,
             )
         )
-        # Add as output_image content for compatibility
+        # Add as output_text content for compatibility
         response_contents.append(
             ResponseOutputContent(type="output_text", text=image_url, annotations=[])
         )
-        response_contents.append(
-            ResponseOutputContent(type="output_image", text="", image_url=image_url, annotations=[])
-        )
 
     tool_call_items: list[ResponseToolCall] = []
     if detected_tool_calls:

From 80462b586a110cad7e5b5cc259424e405ecbafc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 4 Dec 2025 13:24:14 +0700
Subject: [PATCH 023/236] fix: Update image URL generation in chat response to
 use Markdown format for compatibility

---
 app/server/chat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 3396df0..c2a60ab 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1004,7 +1004,9 @@ async def _build_payload(
         img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
 
         # Use static URL for compatibility
-        image_url = f"{request.base_url}images/{filename}?token={get_image_token(filename)}"
+        image_url = (
+            f"![{filename}]({request.base_url}images/{filename}?token={get_image_token(filename)})"
+        )
 
         image_call_items.append(
             ResponseImageGenerationCall(

From 8d49a72e0b5c605e2439d6dcbf149925cb670ded Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 8 Dec 2025 09:45:58 +0700
Subject: [PATCH 024/236] fix: Enhance error handling for full-size image
 saving and add fallback to default size

---
 app/server/chat.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index c2a60ab..d14e9ce 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1557,7 +1557,11 @@ def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
 async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
     """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
-        saved_path = await image.save(path=str(temp_dir), full_size=True)
+        try:
+            saved_path = await image.save(path=str(temp_dir), full_size=True)
+        except Exception as e:
+            logger.warning(f"Failed to download full-size image, retrying with default size: {e}")
+            saved_path = await image.save(path=str(temp_dir), full_size=False)
     else:
         saved_path = await image.save(path=str(temp_dir))
 

From d37eae0ab8c4590b3301dc8853ef22a512ab0d98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 9 Dec 2025 20:46:03 +0700
Subject: [PATCH 025/236] fix: Use filename as image ID to ensure consistency
 in generated image handling

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index d14e9ce..fc69293 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1010,7 +1010,7 @@ async def _build_payload(
 
         image_call_items.append(
             ResponseImageGenerationCall(
-                id=f"img_{uuid.uuid4().hex}",
+                id=filename.split(".")[0],
                 status="completed",
                 result=image_base64,
                 output_format=img_format,

From b9f776dfbb9d251ee016e05a1f6001907c3f8b84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 16 Dec 2025 19:50:07 +0700
Subject: [PATCH 026/236] fix: Enhance tempfile saving by adding custom
 headers, content-type handling, and improved extension determination

---
 app/utils/helper.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 3bff469..89fc31e 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -2,12 +2,17 @@
 import mimetypes
 import tempfile
 from pathlib import Path
+from urllib.parse import urlparse
 
 import httpx
 from loguru import logger
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
+}
+
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
     """Surround content with role tags"""
@@ -36,7 +41,7 @@ async def save_file_to_tempfile(
     return path
 
 
-async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
+async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     data: bytes | None = None
     suffix: str | None = None
     if url.startswith("data:image/"):
@@ -47,17 +52,26 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None):
         base64_data = url.split(",")[1]
         data = base64.b64decode(base64_data)
 
-        # Guess extension from mime type, default to the subtype if not found
         suffix = mimetypes.guess_extension(mime_type)
         if not suffix:
             suffix = f".{mime_type.split('/')[1]}"
     else:
-        # http files
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
-            suffix = Path(url).suffix or ".bin"
+            content_type = resp.headers.get("content-type")
+
+            if content_type:
+                mime_type = content_type.split(";")[0].strip()
+                suffix = mimetypes.guess_extension(mime_type)
+
+            if not suffix:
+                path_url = urlparse(url).path
+                suffix = Path(path_url).suffix
+
+            if not suffix:
+                suffix = ".bin"
 
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp:
         tmp.write(data)

From 4b5fe078250ce0496ca93b1861f9622fc5171746 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 30 Dec 2025 22:39:05 +0700
Subject: [PATCH 027/236] feat: Add support for custom Gemini models and model
 loading strategies

- Introduced `model_strategy` configuration for "append" (default + custom models) or "overwrite" (custom models only).
- Enhanced `/v1/models` endpoint to return models based on the configured strategy.
- Improved model loading with environment variable overrides and validation.
- Refactored model handling logic for improved modularity and error handling.
---
 app/server/chat.py  | 70 ++++++++++++++++++++++++++++++++++--------
 app/utils/config.py | 75 ++++++++++++++++++++++++++++++++++++++++++++-
 config/config.yaml  |  5 +++
 3 files changed, 136 insertions(+), 14 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index fc69293..0a4c16c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -562,24 +562,64 @@ def _replace(match: re.Match[str]) -> str:
     return cleaned, tool_calls
 
 
-@router.get("/v1/models", response_model=ModelListResponse)
-async def list_models(api_key: str = Depends(verify_api_key)):
-    now = int(datetime.now(tz=timezone.utc).timestamp())
+def _get_model_by_name(name: str) -> Model:
+    """
+    Retrieve a Model instance by name, considering custom models from config
+    and the update strategy (append or overwrite).
+    """
+    strategy = g_config.gemini.model_strategy
+    custom_models = {m.model_name: m for m in g_config.gemini.models if m.model_name}
 
-    models = []
-    for model in Model:
-        m_name = model.model_name
-        if not m_name or m_name == "unspecified":
-            continue
+    if name in custom_models:
+        return Model.from_dict(custom_models[name].model_dump())
+
+    if strategy == "overwrite":
+        raise ValueError(f"Model '{name}' not found in custom models (strategy='overwrite').")
 
-        models.append(
+    return Model.from_name(name)
+
+
+def _get_available_models() -> list[ModelData]:
+    """
+    Return a list of available models based on configuration strategy.
+    """
+    now = int(datetime.now(tz=timezone.utc).timestamp())
+    strategy = g_config.gemini.model_strategy
+    models_data = []
+
+    custom_models = [m for m in g_config.gemini.models if m.model_name]
+    for m in custom_models:
+        models_data.append(
             ModelData(
-                id=m_name,
+                id=m.model_name,
                 created=now,
-                owned_by="gemini-web",
+                owned_by="custom",
             )
         )
 
+    if strategy == "append":
+        custom_ids = {m.model_name for m in custom_models}
+        for model in Model:
+            m_name = model.model_name
+            if not m_name or m_name == "unspecified":
+                continue
+            if m_name in custom_ids:
+                continue
+
+            models_data.append(
+                ModelData(
+                    id=m_name,
+                    created=now,
+                    owned_by="gemini-web",
+                )
+            )
+
+    return models_data
+
+
+@router.get("/v1/models", response_model=ModelListResponse)
+async def list_models(api_key: str = Depends(verify_api_key)):
+    models = _get_available_models()
     return ModelListResponse(data=models)
 
 
@@ -592,7 +632,11 @@ async def create_chat_completion(
 ):
     pool = GeminiClientPool()
     db = LMDBConversationStore()
-    model = Model.from_name(request.model)
+
+    try:
+        model = _get_model_by_name(request.model)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
     if len(request.messages) == 0:
         raise HTTPException(
@@ -849,7 +893,7 @@ async def create_response(
     db = LMDBConversationStore()
 
     try:
-        model = Model.from_name(request_data.model)
+        model = _get_model_by_name(request_data.model)
     except ValueError as exc:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
diff --git a/app/utils/config.py b/app/utils/config.py
index 796ca75..a5c924a 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -50,12 +50,26 @@ def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]:
         return stripped or None
 
 
+class GeminiModelConfig(BaseModel):
+    """Configuration for a custom Gemini model."""
+
+    model_name: Optional[str] = Field(default=None, description="Name of the model")
+    model_header: Optional[dict[str, Optional[str]]] = Field(
+        default=None, description="Header for the model"
+    )
+
+
 class GeminiConfig(BaseModel):
     """Gemini API configuration"""
 
     clients: list[GeminiClientSettings] = Field(
         ..., description="List of Gemini client credential pairs"
     )
+    models: list[GeminiModelConfig] = Field(default=[], description="List of custom Gemini models")
+    model_strategy: Literal["append", "overwrite"] = Field(
+        default="append",
+        description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
+    )
     timeout: int = Field(default=120, ge=1, description="Init timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
@@ -68,6 +82,13 @@ class GeminiConfig(BaseModel):
         description="Maximum characters Gemini Web can accept per request",
     )
 
+    @field_validator("models")
+    @classmethod
+    def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
+        """Filter out models that don't have a name set (placeholders)."""
+
+        return [model for model in v if model.model_name]
+
 
 class CORSConfig(BaseModel):
     """CORS configuration"""
@@ -211,6 +232,53 @@ def _merge_clients_with_env(
     return result_clients if result_clients else base_clients
 
 
+def extract_gemini_models_env() -> dict[int, dict[str, str]]:
+    """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict."""
+    prefix = "CONFIG_GEMINI__MODELS__"
+    env_overrides: dict[int, dict[str, str]] = {}
+    to_delete = []
+    for k, v in os.environ.items():
+        if k.startswith(prefix):
+            parts = k.split("__")
+            if len(parts) < 4:
+                continue
+            index_str, field = parts[2], parts[3].lower()
+            if not index_str.isdigit():
+                continue
+            idx = int(index_str)
+            env_overrides.setdefault(idx, {})[field] = v
+            to_delete.append(k)
+    # Remove these environment variables to avoid Pydantic parsing errors
+    for k in to_delete:
+        del os.environ[k]
+    return env_overrides
+
+
+def _merge_models_with_env(
+    base_models: list[GeminiModelConfig] | None,
+    env_overrides: dict[int, dict[str, str]],
+):
+    """Override base_models with env_overrides, return the new models list."""
+    if not env_overrides:
+        return base_models or []
+    result_models: list[GeminiModelConfig] = []
+    if base_models:
+        result_models = [model.model_copy() for model in base_models]
+
+    for idx in sorted(env_overrides):
+        overrides = env_overrides[idx]
+        if idx < len(result_models):
+            model_dict = result_models[idx].model_dump()
+            model_dict.update(overrides)
+            result_models[idx] = GeminiModelConfig(**model_dict)
+        elif idx == len(result_models):
+            new_model = GeminiModelConfig(**overrides)
+            result_models.append(new_model)
+        else:
+            raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).")
+    return result_models
+
+
 def initialize_config() -> Config:
     """
     Initialize the configuration.
@@ -221,6 +289,8 @@ def initialize_config() -> Config:
     try:
         # First, extract and remove Gemini clients related environment variables
         env_clients_overrides = extract_gemini_clients_env()
+        # Extract and remove Gemini models related environment variables
+        env_models_overrides = extract_gemini_models_env()
 
         # Then, initialize Config with pydantic_settings
         config = Config()  # type: ignore
@@ -228,7 +298,10 @@ def initialize_config() -> Config:
         # Synthesize clients
         config.gemini.clients = _merge_clients_with_env(
             config.gemini.clients, env_clients_overrides
-        )  # type: ignore
+        )
+
+        # Synthesize models
+        config.gemini.models = _merge_models_with_env(config.gemini.models, env_models_overrides)
 
         return config
     except ValidationError as e:
diff --git a/config/config.yaml b/config/config.yaml
index 89c88b7..84c4602 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -27,6 +27,11 @@ gemini:
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
+  models:
+    - model_name: null
+      model_header:
+        x-goog-ext-xxxxxxxxx-jspb: null
+  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
 
 storage:
   path: "data/lmdb"        # Database storage path

From 5cb29e8ea7333fd3c207f60a75b5269105bae8b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 30 Dec 2025 23:19:49 +0700
Subject: [PATCH 028/236] feat: Improve Gemini model environment variable
 parsing and nested field support

- Enhanced `extract_gemini_models_env` to handle nested fields within environment variables.
- Updated type hints for more flexibility in model overrides.
- Improved `_merge_models_with_env` to better support field-level updates and appending new models.
---
 app/utils/config.py | 31 +++++++++++++++++++++++--------
 config/config.yaml  |  2 +-
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index a5c924a..5782c66 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,6 +1,6 @@
 import os
 import sys
-from typing import Literal, Optional
+from typing import Any, Literal, Optional
 
 from loguru import logger
 from pydantic import BaseModel, Field, ValidationError, field_validator
@@ -232,21 +232,34 @@ def _merge_clients_with_env(
     return result_clients if result_clients else base_clients
 
 
-def extract_gemini_models_env() -> dict[int, dict[str, str]]:
-    """Extract and remove all Gemini models related environment variables, return a mapping from index to field dict."""
+def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
+    """Extract and remove all Gemini models related environment variables, supporting nested fields."""
     prefix = "CONFIG_GEMINI__MODELS__"
-    env_overrides: dict[int, dict[str, str]] = {}
+    env_overrides: dict[int, dict[str, Any]] = {}
     to_delete = []
     for k, v in os.environ.items():
         if k.startswith(prefix):
             parts = k.split("__")
             if len(parts) < 4:
                 continue
-            index_str, field = parts[2], parts[3].lower()
+            index_str = parts[2]
             if not index_str.isdigit():
                 continue
             idx = int(index_str)
-            env_overrides.setdefault(idx, {})[field] = v
+
+            # Navigate to the correct nested dict
+            current = env_overrides.setdefault(idx, {})
+            for i in range(3, len(parts) - 1):
+                field_name = parts[i].lower()
+                current = current.setdefault(field_name, {})
+
+            # Set the value (lowercase root field names, preserve sub-key casing)
+            last_part = parts[-1]
+            if len(parts) == 4:
+                current[last_part.lower()] = v
+            else:
+                current[last_part] = v
+
             to_delete.append(k)
     # Remove these environment variables to avoid Pydantic parsing errors
     for k in to_delete:
@@ -256,9 +269,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, str]]:
 
 def _merge_models_with_env(
     base_models: list[GeminiModelConfig] | None,
-    env_overrides: dict[int, dict[str, str]],
+    env_overrides: dict[int, dict[str, Any]],
 ):
-    """Override base_models with env_overrides, return the new models list."""
+    """Override base_models with env_overrides using standard update (replace whole fields)."""
     if not env_overrides:
         return base_models or []
     result_models: list[GeminiModelConfig] = []
@@ -268,10 +281,12 @@ def _merge_models_with_env(
     for idx in sorted(env_overrides):
         overrides = env_overrides[idx]
         if idx < len(result_models):
+            # Update existing model: overwrite fields found in env
             model_dict = result_models[idx].model_dump()
             model_dict.update(overrides)
             result_models[idx] = GeminiModelConfig(**model_dict)
         elif idx == len(result_models):
+            # Append new model
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:
diff --git a/config/config.yaml b/config/config.yaml
index 84c4602..2fbc061 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -27,11 +27,11 @@ gemini:
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
+  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
   models:
     - model_name: null
       model_header:
         x-goog-ext-xxxxxxxxx-jspb: null
-  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
 
 storage:
   path: "data/lmdb"        # Database storage path

From f25f16d00118ebeea7936cea34797270d5137b5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 09:52:49 +0700
Subject: [PATCH 029/236] refactor: Consolidate utility functions and clean up
 unused code

- Moved utility functions like `strip_code_fence`, `extract_tool_calls`, and `iter_stream_segments` to a centralized helper module.
- Removed unused and redundant private methods from `chat.py`, including `_strip_code_fence`, `_strip_tagged_blocks`, and `_strip_system_hints`.
- Updated imports and references across modules for consistency.
- Simplified tool call and streaming logic by replacing inline implementations with shared helper functions.
---
 app/server/chat.py     | 306 ++++------------------------------------
 app/services/client.py |  16 +--
 app/utils/helper.py    | 312 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 342 insertions(+), 292 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0a4c16c..9485f7a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,12 +1,11 @@
 import base64
 import json
 import re
-import struct
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Iterator
+from typing import Any
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
@@ -21,7 +20,6 @@
     ChatCompletionRequest,
     ContentItem,
     ConversationInStore,
-    FunctionCall,
     Message,
     ModelData,
     ModelListResponse,
@@ -37,26 +35,28 @@
     ResponseToolChoice,
     ResponseUsage,
     Tool,
-    ToolCall,
     ToolChoiceFunction,
 )
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
-from ..services.client import CODE_BLOCK_HINT, XML_WRAP_HINT
 from ..utils import g_config
-from ..utils.helper import estimate_tokens
+from ..utils.helper import (
+    CODE_BLOCK_HINT,
+    CODE_HINT_STRIPPED,
+    XML_HINT_STRIPPED,
+    XML_WRAP_HINT,
+    estimate_tokens,
+    extract_image_dimensions,
+    extract_tool_calls,
+    iter_stream_segments,
+    remove_tool_call_blocks,
+    strip_code_fence,
+    text_from_message,
+)
 from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)```", re.DOTALL | re.IGNORECASE)
-TOOL_CALL_RE = re.compile(
-    r"<tool_call\s+name=\"([^\"]+)\">(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
-)
-JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
-CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
-XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
-CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip()
 
 router = APIRouter()
 
@@ -118,14 +118,6 @@ def _build_structured_requirement(
     )
 
 
-def _strip_code_fence(text: str) -> str:
-    """Remove surrounding ```json fences if present."""
-    match = JSON_FENCE_RE.match(text.strip())
-    if match:
-        return match.group(1).strip()
-    return text.strip()
-
-
 def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
@@ -312,75 +304,6 @@ def _prepare_messages_for_model(
     return prepared
 
 
-def _strip_system_hints(text: str) -> str:
-    """Remove system-level hint text from a given string."""
-    if not text:
-        return text
-    cleaned = _strip_tagged_blocks(text)
-    cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
-    cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "")
-    cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
-    return cleaned.strip()
-
-
-def _strip_tagged_blocks(text: str) -> str:
-    """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely.
-    - tool blocks are removed entirely (if missing end marker, drop to EOF).
-    - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF).
-    """
-    if not text:
-        return text
-
-    result: list[str] = []
-    idx = 0
-    length = len(text)
-    start_marker = "<|im_start|>"
-    end_marker = "<|im_end|>"
-
-    while idx < length:
-        start = text.find(start_marker, idx)
-        if start == -1:
-            result.append(text[idx:])
-            break
-
-        # append any content before this block
-        result.append(text[idx:start])
-
-        role_start = start + len(start_marker)
-        newline = text.find("\n", role_start)
-        if newline == -1:
-            # malformed block; keep remainder as-is (safe behavior)
-            result.append(text[start:])
-            break
-
-        role = text[role_start:newline].strip().lower()
-
-        end = text.find(end_marker, newline + 1)
-        if end == -1:
-            # missing end marker
-            if role == "tool":
-                # drop from start marker to EOF (skip remainder)
-                break
-            else:
-                # keep inner content from after the role newline to EOF
-                result.append(text[newline + 1 :])
-                break
-
-        block_end = end + len(end_marker)
-
-        if role == "tool":
-            # drop whole block
-            idx = block_end
-            continue
-
-        # keep the content without role markers
-        content = text[newline + 1 : end]
-        result.append(content)
-        idx = block_end
-
-    return "".join(result)
-
-
 def _response_items_to_messages(
     items: str | list[ResponseInputItem],
 ) -> tuple[list[Message], str | list[ResponseInputItem]]:
@@ -509,59 +432,6 @@ def _instructions_to_messages(
     return instruction_messages
 
 
-def _remove_tool_call_blocks(text: str) -> str:
-    """Strip tool call code blocks from text."""
-    if not text:
-        return text
-    cleaned = TOOL_BLOCK_RE.sub("", text)
-    return _strip_system_hints(cleaned)
-
-
-def _extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
-    """Extract tool call definitions and return cleaned text."""
-    if not text:
-        return text, []
-
-    tool_calls: list[ToolCall] = []
-
-    def _replace(match: re.Match[str]) -> str:
-        block_content = match.group(1)
-        if not block_content:
-            return ""
-
-        for call_match in TOOL_CALL_RE.finditer(block_content):
-            name = (call_match.group(1) or "").strip()
-            raw_args = (call_match.group(2) or "").strip()
-            if not name:
-                logger.warning(
-                    f"Encountered tool_call block without a function name: {block_content}"
-                )
-                continue
-
-            arguments = raw_args
-            try:
-                parsed_args = json.loads(raw_args)
-                arguments = json.dumps(parsed_args, ensure_ascii=False)
-            except json.JSONDecodeError:
-                logger.warning(
-                    f"Failed to parse tool call arguments for '{name}'. Passing raw string."
-                )
-
-            tool_calls.append(
-                ToolCall(
-                    id=f"call_{uuid.uuid4().hex}",
-                    type="function",
-                    function=FunctionCall(name=name, arguments=arguments),
-                )
-            )
-
-        return ""
-
-    cleaned = TOOL_BLOCK_RE.sub(_replace, text)
-    cleaned = _strip_system_hints(cleaned)
-    return cleaned, tool_calls
-
-
 def _get_model_by_name(name: str) -> Model:
     """
     Retrieve a Model instance by name, considering custom models from config
@@ -742,12 +612,12 @@ async def create_chat_completion(
             detail="Gemini output parsing failed unexpectedly.",
         ) from exc
 
-    visible_output, tool_calls = _extract_tool_calls(raw_output_with_think)
-    storage_output = _remove_tool_call_blocks(raw_output_clean).strip()
+    visible_output, tool_calls = extract_tool_calls(raw_output_with_think)
+    storage_output = remove_tool_call_blocks(raw_output_clean).strip()
     tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
 
     if structured_requirement:
-        cleaned_visible = _strip_code_fence(visible_output or "")
+        cleaned_visible = strip_code_fence(visible_output or "")
         if not cleaned_visible:
             raise HTTPException(
                 status_code=status.HTTP_502_BAD_GATEWAY,
@@ -982,12 +852,12 @@ async def _build_payload(
             detail="Gemini output parsing failed unexpectedly.",
         ) from exc
 
-    visible_text, detected_tool_calls = _extract_tool_calls(text_with_think)
-    storage_output = _remove_tool_call_blocks(text_without_think).strip()
+    visible_text, detected_tool_calls = extract_tool_calls(text_with_think)
+    storage_output = remove_tool_call_blocks(text_without_think).strip()
     assistant_text = LMDBConversationStore.remove_think_tags(visible_text.strip())
 
     if structured_requirement:
-        cleaned_visible = _strip_code_fence(assistant_text or "")
+        cleaned_visible = strip_code_fence(assistant_text or "")
         if not cleaned_visible:
             raise HTTPException(
                 status_code=status.HTTP_502_BAD_GATEWAY,
@@ -1089,7 +959,7 @@ async def _build_payload(
     response_id = f"resp_{uuid.uuid4().hex}"
     message_id = f"msg_{uuid.uuid4().hex}"
 
-    input_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    input_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_arg_text = "".join(call.function.arguments or "" for call in detected_tool_calls)
     completion_basis = assistant_text or ""
     if tool_arg_text:
@@ -1152,25 +1022,6 @@ async def _build_payload(
     return response_payload
 
 
-def _text_from_message(message: Message) -> str:
-    """Return text content from a message for token estimation."""
-    base_text = ""
-    if isinstance(message.content, str):
-        base_text = message.content
-    elif isinstance(message.content, list):
-        base_text = "\n".join(
-            item.text or "" for item in message.content if getattr(item, "type", "") == "text"
-        )
-    elif message.content is None:
-        base_text = ""
-
-    if message.tool_calls:
-        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
-        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
-
-    return base_text
-
-
 async def _find_reusable_session(
     db: LMDBConversationStore,
     pool: GeminiClientPool,
@@ -1268,47 +1119,6 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
         raise
 
 
-def _iter_stream_segments(model_output: str, chunk_size: int = 64):
-    """Yield stream segments while keeping <think> markers and words intact."""
-    if not model_output:
-        return
-
-    token_pattern = re.compile(r"\s+|\S+\s*")
-    pending = ""
-
-    def _flush_pending() -> Iterator[str]:
-        nonlocal pending
-        if pending:
-            yield pending
-            pending = ""
-
-    # Split on <think> boundaries so the markers are never fragmented.
-    parts = re.split(r"(</?think>)", model_output)
-    for part in parts:
-        if not part:
-            continue
-        if part in {"<think>", "</think>"}:
-            yield from _flush_pending()
-            yield part
-            continue
-
-        for match in token_pattern.finditer(part):
-            token = match.group(0)
-
-            if len(token) > chunk_size:
-                yield from _flush_pending()
-                for idx in range(0, len(token), chunk_size):
-                    yield token[idx : idx + chunk_size]
-                continue
-
-            if pending and len(pending) + len(token) > chunk_size:
-                yield from _flush_pending()
-
-            pending += token
-
-    yield from _flush_pending()
-
-
 def _create_streaming_response(
     model_output: str,
     tool_calls: list[dict],
@@ -1320,7 +1130,7 @@ def _create_streaming_response(
     """Create streaming response with `usage` calculation included in the final chunk."""
 
     # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
     completion_tokens = estimate_tokens(model_output + tool_args)
     total_tokens = prompt_tokens + completion_tokens
@@ -1338,7 +1148,7 @@ async def generate_stream():
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
         # Stream output text in chunks for efficiency
-        for chunk in _iter_stream_segments(model_output):
+        for chunk in iter_stream_segments(model_output):
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
@@ -1452,7 +1262,7 @@ async def generate_stream():
                         content_text += c.text
 
                 if content_text:
-                    for chunk in _iter_stream_segments(content_text):
+                    for chunk in iter_stream_segments(content_text):
                         delta_event = {
                             **base_event,
                             "type": "response.output_text.delta",
@@ -1501,7 +1311,7 @@ def _create_standard_response(
 ) -> dict:
     """Create standard response"""
     # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(_text_from_message(msg)) for msg in messages)
+    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
     completion_tokens = estimate_tokens(model_output + tool_args)
     total_tokens = prompt_tokens + completion_tokens
@@ -1534,70 +1344,6 @@ def _create_standard_response(
     return result
 
 
-def _extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
-    """Return image dimensions (width, height) if PNG or JPEG headers are present."""
-    # PNG: dimensions stored in bytes 16..24 of the IHDR chunk
-    if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
-        try:
-            width, height = struct.unpack(">II", data[16:24])
-            return int(width), int(height)
-        except struct.error:
-            return None, None
-
-    # JPEG: dimensions stored in SOF segment; iterate through markers to locate it
-    if len(data) >= 4 and data[0:2] == b"\xff\xd8":
-        idx = 2
-        length = len(data)
-        sof_markers = {
-            0xC0,
-            0xC1,
-            0xC2,
-            0xC3,
-            0xC5,
-            0xC6,
-            0xC7,
-            0xC9,
-            0xCA,
-            0xCB,
-            0xCD,
-            0xCE,
-            0xCF,
-        }
-        while idx < length:
-            # Find marker alignment (markers are prefixed with 0xFF bytes)
-            if data[idx] != 0xFF:
-                idx += 1
-                continue
-            while idx < length and data[idx] == 0xFF:
-                idx += 1
-            if idx >= length:
-                break
-            marker = data[idx]
-            idx += 1
-
-            if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7:
-                continue
-
-            if idx + 1 >= length:
-                break
-            segment_length = (data[idx] << 8) + data[idx + 1]
-            idx += 2
-            if segment_length < 2:
-                break
-
-            if marker in sof_markers:
-                if idx + 4 < length:
-                    # Skip precision byte at idx, then read height/width (big-endian)
-                    height = (data[idx + 1] << 8) + data[idx + 2]
-                    width = (data[idx + 3] << 8) + data[idx + 4]
-                    return int(width), int(height)
-                break
-
-            idx += segment_length - 2
-
-    return None, None
-
-
 async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
     """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
     if isinstance(image, GeneratedImage):
@@ -1619,6 +1365,6 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
     original_path.rename(new_path)
 
     data = new_path.read_bytes()
-    width, height = _extract_image_dimensions(data)
+    width, height = extract_image_dimensions(data)
     filename = random_name
     return base64.b64encode(data).decode("ascii"), width, height, filename
diff --git a/app/services/client.py b/app/services/client.py
index 09c52c1..87c0ca7 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -9,18 +9,12 @@
 
 from ..models import Message
 from ..utils import g_config
-from ..utils.helper import add_tag, save_file_to_tempfile, save_url_to_tempfile
-
-XML_WRAP_HINT = (
-    "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
-    '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
-    "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
-)
-CODE_BLOCK_HINT = (
-    "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
-    "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
-    "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
+from ..utils.helper import (
+    add_tag,
+    save_file_to_tempfile,
+    save_url_to_tempfile,
 )
+
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
 MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 89fc31e..2627faa 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,17 +1,41 @@
 import base64
+import json
 import mimetypes
+import re
+import struct
 import tempfile
+import uuid
 from pathlib import Path
+from typing import Iterator
 from urllib.parse import urlparse
 
 import httpx
 from loguru import logger
 
-VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
+from ..models import FunctionCall, Message, ToolCall
 
 HEADERS = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
 }
+VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
+XML_WRAP_HINT = (
+    "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
+    '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
+    "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
+)
+CODE_BLOCK_HINT = (
+    "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
+    "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
+    "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
+)
+TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
+TOOL_CALL_RE = re.compile(
+    r"<tool_call\s+name=[\"']([^\"']+)[\"']\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
+)
+JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
+XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
+CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip()
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -78,3 +102,289 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         path = Path(tmp.name)
 
     return path
+
+
+def strip_code_fence(text: str) -> str:
+    """Remove surrounding ```json fences if present."""
+    match = JSON_FENCE_RE.match(text.strip())
+    if match:
+        return match.group(1).strip()
+    return text.strip()
+
+
+def strip_tagged_blocks(text: str) -> str:
+    """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely.
+    - tool blocks are removed entirely (if missing end marker, drop to EOF).
+    - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF).
+    """
+    if not text:
+        return text
+
+    result: list[str] = []
+    idx = 0
+    length = len(text)
+    start_marker = "<|im_start|>"
+    end_marker = "<|im_end|>"
+
+    while idx < length:
+        start = text.find(start_marker, idx)
+        if start == -1:
+            result.append(text[idx:])
+            break
+
+        # append any content before this block
+        result.append(text[idx:start])
+
+        role_start = start + len(start_marker)
+        newline = text.find("\n", role_start)
+        if newline == -1:
+            # malformed block; keep remainder as-is (safe behavior)
+            result.append(text[start:])
+            break
+
+        role = text[role_start:newline].strip().lower()
+
+        end = text.find(end_marker, newline + 1)
+        if end == -1:
+            # missing end marker
+            if role == "tool":
+                # drop from start marker to EOF (skip remainder)
+                break
+            else:
+                # keep inner content from after the role newline to EOF
+                result.append(text[newline + 1 :])
+                break
+
+        block_end = end + len(end_marker)
+
+        if role == "tool":
+            # drop whole block
+            idx = block_end
+            continue
+
+        # keep the content without role markers
+        content = text[newline + 1 : end]
+        result.append(content)
+        idx = block_end
+
+    return "".join(result)
+
+
+def strip_system_hints(text: str) -> str:
+    """Remove system-level hint text from a given string."""
+    if not text:
+        return text
+    cleaned = strip_tagged_blocks(text)
+    cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+    cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "")
+    cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
+    return cleaned.strip()
+
+
+def remove_tool_call_blocks(text: str) -> str:
+    """Strip tool call code blocks from text."""
+    if not text:
+        return text
+
+    # 1. Remove fenced blocks ONLY if they contain tool calls
+    def _replace_block(match: re.Match[str]) -> str:
+        block_content = match.group(1)
+        if not block_content:
+            return match.group(0)
+
+        # Check if the block contains any tool call tag
+        if TOOL_CALL_RE.search(block_content):
+            return ""
+
+        # Preserve the block if no tool call found
+        return match.group(0)
+
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
+
+    # 2. Remove orphaned tool calls
+    cleaned = TOOL_CALL_RE.sub("", cleaned)
+
+    return strip_system_hints(cleaned)
+
+
+def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
+    """Extract tool call definitions and return cleaned text."""
+    if not text:
+        return text, []
+
+    tool_calls: list[ToolCall] = []
+
+    def _create_tool_call(name: str, raw_args: str) -> None:
+        """Helper to parse args and append to tool_calls list."""
+        if not name:
+            logger.warning("Encountered tool_call without a function name.")
+            return
+
+        arguments = raw_args
+        try:
+            parsed_args = json.loads(raw_args)
+            arguments = json.dumps(parsed_args, ensure_ascii=False)
+        except json.JSONDecodeError:
+            logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
+
+        tool_calls.append(
+            ToolCall(
+                id=f"call_{uuid.uuid4().hex}",
+                type="function",
+                function=FunctionCall(name=name, arguments=arguments),
+            )
+        )
+
+    def _replace_block(match: re.Match[str]) -> str:
+        block_content = match.group(1)
+        if not block_content:
+            return match.group(0)
+
+        found_in_block = False
+        for call_match in TOOL_CALL_RE.finditer(block_content):
+            found_in_block = True
+            name = (call_match.group(1) or "").strip()
+            raw_args = (call_match.group(2) or "").strip()
+            _create_tool_call(name, raw_args)
+
+        if found_in_block:
+            return ""
+        else:
+            return match.group(0)
+
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
+
+    def _replace_orphan(match: re.Match[str]) -> str:
+        name = (match.group(1) or "").strip()
+        raw_args = (match.group(2) or "").strip()
+        _create_tool_call(name, raw_args)
+        return ""
+
+    cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
+
+    cleaned = strip_system_hints(cleaned)
+    return cleaned, tool_calls
+
+
+def iter_stream_segments(model_output: str, chunk_size: int = 64) -> Iterator[str]:
+    """Yield stream segments while keeping <think> markers and words intact."""
+    if not model_output:
+        return
+
+    token_pattern = re.compile(r"\s+|\S+\s*")
+    pending = ""
+
+    def _flush_pending() -> Iterator[str]:
+        nonlocal pending
+        if pending:
+            yield pending
+            pending = ""
+
+    # Split on <think> boundaries so the markers are never fragmented.
+    parts = re.split(r"(</?think>)", model_output)
+    for part in parts:
+        if not part:
+            continue
+        if part in {"<think>", "</think>"}:
+            yield from _flush_pending()
+            yield part
+            continue
+
+        for match in token_pattern.finditer(part):
+            token = match.group(0)
+
+            if len(token) > chunk_size:
+                yield from _flush_pending()
+                for idx in range(0, len(token), chunk_size):
+                    yield token[idx : idx + chunk_size]
+                continue
+
+            if pending and len(pending) + len(token) > chunk_size:
+                yield from _flush_pending()
+
+            pending += token
+
+    yield from _flush_pending()
+
+
+def text_from_message(message: Message) -> str:
+    """Return text content from a message for token estimation."""
+    base_text = ""
+    if isinstance(message.content, str):
+        base_text = message.content
+    elif isinstance(message.content, list):
+        base_text = "\n".join(
+            item.text or "" for item in message.content if getattr(item, "type", "") == "text"
+        )
+    elif message.content is None:
+        base_text = ""
+
+    if message.tool_calls:
+        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
+        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
+
+    return base_text
+
+
+def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
+    """Return image dimensions (width, height) if PNG or JPEG headers are present."""
+    # PNG: dimensions stored in bytes 16..24 of the IHDR chunk
+    if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
+        try:
+            width, height = struct.unpack(">II", data[16:24])
+            return int(width), int(height)
+        except struct.error:
+            return None, None
+
+    # JPEG: dimensions stored in SOF segment; iterate through markers to locate it
+    if len(data) >= 4 and data[0:2] == b"\xff\xd8":
+        idx = 2
+        length = len(data)
+        sof_markers = {
+            0xC0,
+            0xC1,
+            0xC2,
+            0xC3,
+            0xC5,
+            0xC6,
+            0xC7,
+            0xC9,
+            0xCA,
+            0xCB,
+            0xCD,
+            0xCE,
+            0xCF,
+        }
+        while idx < length:
+            # Find marker alignment (markers are prefixed with 0xFF bytes)
+            if data[idx] != 0xFF:
+                idx += 1
+                continue
+            while idx < length and data[idx] == 0xFF:
+                idx += 1
+            if idx >= length:
+                break
+            marker = data[idx]
+            idx += 1
+
+            if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7:
+                continue
+
+            if idx + 1 >= length:
+                break
+            segment_length = (data[idx] << 8) + data[idx + 1]
+            idx += 2
+            if segment_length < 2:
+                break
+
+            if marker in sof_markers:
+                if idx + 4 < length:
+                    # Skip precision byte at idx, then read height/width (big-endian)
+                    height = (data[idx + 1] << 8) + data[idx + 2]
+                    width = (data[idx + 3] << 8) + data[idx + 4]
+                    return int(width), int(height)
+                break
+
+            idx += segment_length - 2
+
+    return None, None

From a1bc8e289ee797a761eb506dc4d01e486c919aef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 10:01:17 +0700
Subject: [PATCH 030/236] fix: Handle None input in `estimate_tokens` and
 return 0 for empty text

---
 app/utils/helper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2627faa..28be240 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -47,8 +47,10 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str:
     return f"<|im_start|>{role}\n{content}" + ("\n<|im_end|>" if not unclose else "")
 
 
-def estimate_tokens(text: str) -> int:
+def estimate_tokens(text: str | None) -> int:
     """Estimate the number of tokens heuristically based on character count"""
+    if not text:
+        return 0
     return int(len(text) / 3)
 
 

From a7e15d96bd2a4f62094bea02be7e86c8d305e59e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 13:32:49 +0700
Subject: [PATCH 031/236] refactor: Simplify model configuration and add JSON
 parsing validators

- Replaced unused model placeholder in `config.yaml` with an empty list.
- Added JSON parsing validators for `model_header` and `models` to enhance flexibility and error handling.
- Improved validation to filter out incomplete model configurations.
---
 app/utils/config.py | 24 +++++++++++++++++++++++-
 config/config.yaml  |  5 +----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 5782c66..69a4fac 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,3 +1,4 @@
+import json
 import os
 import sys
 from typing import Any, Literal, Optional
@@ -58,6 +59,17 @@ class GeminiModelConfig(BaseModel):
         default=None, description="Header for the model"
     )
 
+    @field_validator("model_header", mode="before")
+    @classmethod
+    def _parse_json_string(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("{"):
+            try:
+                return json.loads(v)
+            except json.JSONDecodeError:
+                # Return the original value to let Pydantic handle the error or type mismatch
+                return v
+        return v
+
 
 class GeminiConfig(BaseModel):
     """Gemini API configuration"""
@@ -82,11 +94,21 @@ class GeminiConfig(BaseModel):
         description="Maximum characters Gemini Web can accept per request",
     )
 
+    @field_validator("models", mode="before")
+    @classmethod
+    def _parse_models_json(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("["):
+            try:
+                return json.loads(v)
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse models JSON string: {e}")
+                return v
+        return v
+
     @field_validator("models")
     @classmethod
     def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
         """Filter out models that don't have a name set (placeholders)."""
-
         return [model for model in v if model.model_name]
 
 
diff --git a/config/config.yaml b/config/config.yaml
index 2fbc061..f2b17fb 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -28,10 +28,7 @@ gemini:
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
   model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
-  models:
-    - model_name: null
-      model_header:
-        x-goog-ext-xxxxxxxxx-jspb: null
+  models: []
 
 storage:
   path: "data/lmdb"        # Database storage path

From 61c5f3b7af4ef6b78d5dc7e3d5ba9e6009b7d3cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 13:46:58 +0700
Subject: [PATCH 032/236] refactor: Simplify Gemini model environment variable
 parsing with JSON support

- Replaced prefix-based parsing with a root key approach.
- Added JSON parsing to handle list-based model configurations.
- Improved handling of errors and cleanup of environment variables.
---
 app/utils/config.py | 44 +++++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 69a4fac..6cb5664 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -256,36 +256,26 @@ def _merge_clients_with_env(
 
 def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
     """Extract and remove all Gemini models related environment variables, supporting nested fields."""
-    prefix = "CONFIG_GEMINI__MODELS__"
-    env_overrides: dict[int, dict[str, Any]] = {}
-    to_delete = []
-    for k, v in os.environ.items():
-        if k.startswith(prefix):
-            parts = k.split("__")
-            if len(parts) < 4:
-                continue
-            index_str = parts[2]
-            if not index_str.isdigit():
-                continue
-            idx = int(index_str)
+    import json
 
-            # Navigate to the correct nested dict
-            current = env_overrides.setdefault(idx, {})
-            for i in range(3, len(parts) - 1):
-                field_name = parts[i].lower()
-                current = current.setdefault(field_name, {})
+    root_key = "CONFIG_GEMINI__MODELS"
+    env_overrides: dict[int, dict[str, Any]] = {}
 
-            # Set the value (lowercase root field names, preserve sub-key casing)
-            last_part = parts[-1]
-            if len(parts) == 4:
-                current[last_part.lower()] = v
-            else:
-                current[last_part] = v
+    if root_key in os.environ:
+        try:
+            val = os.environ[root_key]
+            if val.strip().startswith("["):
+                models_list = json.loads(val)
+                if isinstance(models_list, list):
+                    for idx, model_data in enumerate(models_list):
+                        if isinstance(model_data, dict):
+                            env_overrides[idx] = model_data
+
+            # Remove the environment variable to avoid Pydantic parsing errors
+            del os.environ[root_key]
+        except Exception as e:
+            logger.warning(f"Failed to parse {root_key} as JSON: {e}")
 
-            to_delete.append(k)
-    # Remove these environment variables to avoid Pydantic parsing errors
-    for k in to_delete:
-        del os.environ[k]
     return env_overrides
 
 

From efd056c270db5130c59b4e66c2543be7f5e8c6e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 14:09:41 +0700
Subject: [PATCH 033/236] fix: Enhance Gemini model environment variable
 parsing with fallback to Python literals

- Added `ast.literal_eval` as a fallback for parsing environment variables when JSON decoding fails.
- Improved error handling and logging for invalid configurations.
- Ensured proper cleanup of environment variables post-parsing.
---
 app/utils/config.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 6cb5664..74a5294 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,3 +1,4 @@
+import ast
 import json
 import os
 import sys
@@ -256,25 +257,31 @@ def _merge_clients_with_env(
 
 def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
     """Extract and remove all Gemini models related environment variables, supporting nested fields."""
-    import json
-
     root_key = "CONFIG_GEMINI__MODELS"
     env_overrides: dict[int, dict[str, Any]] = {}
 
     if root_key in os.environ:
+        val = os.environ[root_key]
+        models_list = None
+        parsed_successfully = False
+
         try:
-            val = os.environ[root_key]
-            if val.strip().startswith("["):
-                models_list = json.loads(val)
-                if isinstance(models_list, list):
-                    for idx, model_data in enumerate(models_list):
-                        if isinstance(model_data, dict):
-                            env_overrides[idx] = model_data
+            models_list = json.loads(val)
+            parsed_successfully = True
+        except json.JSONDecodeError:
+            try:
+                models_list = ast.literal_eval(val)
+                parsed_successfully = True
+            except (ValueError, SyntaxError) as e:
+                logger.warning(f"Failed to parse {root_key} as JSON or Python literal: {e}")
+
+        if parsed_successfully and isinstance(models_list, list):
+            for idx, model_data in enumerate(models_list):
+                if isinstance(model_data, dict):
+                    env_overrides[idx] = model_data
 
             # Remove the environment variable to avoid Pydantic parsing errors
             del os.environ[root_key]
-        except Exception as e:
-            logger.warning(f"Failed to parse {root_key} as JSON: {e}")
 
     return env_overrides
 
@@ -298,7 +305,7 @@ def _merge_models_with_env(
             model_dict.update(overrides)
             result_models[idx] = GeminiModelConfig(**model_dict)
         elif idx == len(result_models):
-            # Append new model
+            # Append new models
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:

From 476b9dd228aa99501638987d1f44fe3c5eb23067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 31 Dec 2025 17:53:38 +0700
Subject: [PATCH 034/236] fix: Improve regex patterns in helper module

- Adjusted `TOOL_CALL_RE` regex pattern for better accuracy.
---
 app/utils/helper.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 28be240..99e6d7a 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -30,7 +30,7 @@
 )
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
-    r"<tool_call\s+name=[\"']([^\"']+)[\"']\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
+    r"<tool_call\s+name=\"([^\"]+)\"\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
 )
 JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
@@ -140,7 +140,7 @@ def strip_tagged_blocks(text: str) -> str:
         role_start = start + len(start_marker)
         newline = text.find("\n", role_start)
         if newline == -1:
-            # malformed block; keep remainder as-is (safe behavior)
+            # malformed block; keep the remainder as-is (safe behavior)
             result.append(text[start:])
             break
 
@@ -150,7 +150,7 @@ def strip_tagged_blocks(text: str) -> str:
         if end == -1:
             # missing end marker
             if role == "tool":
-                # drop from start marker to EOF (skip remainder)
+                # drop from the start marker to EOF (skip the remainder)
                 break
             else:
                 # keep inner content from after the role newline to EOF
@@ -160,7 +160,7 @@ def strip_tagged_blocks(text: str) -> str:
         block_end = end + len(end_marker)
 
         if role == "tool":
-            # drop whole block
+            # drop the whole block
             idx = block_end
             continue
 
@@ -217,7 +217,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
-        """Helper to parse args and append to tool_calls list."""
+        """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return

From 35c1e99993d11033ae9047e85f645ce5def7f09b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 13 Jan 2026 09:02:10 +0700
Subject: [PATCH 035/236] docs: Update README files to include custom model
 configuration and environment variable setup

---
 README.md    | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 README.zh.md | 51 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 2df3a73..5d6de40 100644
--- a/README.md
+++ b/README.md
@@ -118,7 +118,7 @@ services:
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID}
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS}
       - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above
-    restart: on-failure:3             # Avoid retrying too many times
+    restart: on-failure:3 # Avoid retrying too many times
 ```
 
 Then run:
@@ -187,6 +187,51 @@ To use Gemini-FastAPI, you need to extract your Gemini session cookies:
 
 Each client entry can be configured with a different proxy to work around rate limits. Omit the `proxy` field or set it to `null` or an empty string to keep a direct connection.
 
+### Custom Models
+
+You can define custom models in `config/config.yaml` or via environment variables.
+
+#### YAML Configuration
+
+```yaml
+gemini:
+  model_strategy: "append" # "append" (default + custom) or "overwrite" (custom only)
+  models:
+    - model_name: "gemini-3.0-pro"
+      model_header:
+        x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]'
+```
+
+#### Environment Variables
+
+You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`.
+
+##### Bash
+
+```bash
+export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
+export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
+```
+
+##### Docker Compose
+
+```yaml
+services:
+  gemini-fastapi:
+    environment:
+      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
+      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
+```
+
+##### Docker CLI
+
+```bash
+docker run -d \
+  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
+  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
+  ghcr.io/nativu5/gemini-fastapi
+```
+
 ## Acknowledgments
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client
diff --git a/README.zh.md b/README.zh.md
index 6b7dd74..791afd8 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -4,7 +4,6 @@
 [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
-
 [ [English](README.md) | 中文 ]
 
 将 Gemini 网页端模型封装为兼容 OpenAI API 的 API Server。基于 [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) 实现。
@@ -50,6 +49,7 @@ pip install -e .
 ### 配置
 
 编辑 `config/config.yaml` 并提供至少一组凭证：
+
 ```yaml
 gemini:
   clients:
@@ -118,7 +118,7 @@ services:
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSID=${SECURE_1PSID}
       - CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS=${SECURE_1PSIDTS}
       - GEMINI_COOKIE_PATH=/app/cache # must match the cache volume mount above
-    restart: on-failure:3             # Avoid retrying too many times
+    restart: on-failure:3 # Avoid retrying too many times
 ```
 
 然后运行：
@@ -186,6 +186,51 @@ export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
 
 每个客户端条目可以配置不同的代理，从而规避速率限制。省略 `proxy` 字段或将其设置为 `null` 或空字符串以保持直连。
 
+### 自定义模型
+
+你可以在 `config/config.yaml` 中或通过环境变量定义自定义模型。
+
+#### YAML 配置
+
+```yaml
+gemini:
+  model_strategy: "append" # "append" (默认 + 自定义) 或 "overwrite" (仅限自定义)
+  models:
+    - model_name: "gemini-3.0-pro"
+      model_header:
+        x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]'
+```
+
+#### 环境变量
+
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。
+
+##### Bash
+
+```bash
+export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
+export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
+```
+
+##### Docker Compose
+
+```yaml
+services:
+  gemini-fastapi:
+    environment:
+      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
+      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
+```
+
+##### Docker CLI
+
+```bash
+docker run -d \
+  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
+  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
+  ghcr.io/nativu5/gemini-fastapi
+```
+
 ## 鸣谢
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端
@@ -193,4 +238,4 @@ export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
 
 ## 免责声明
 
-本项目与 Google 或 OpenAI 无关，仅供学习和研究使用。本项目使用了逆向工程 API，可能不符合 Google 服务条款。使用风险自负。
\ No newline at end of file
+本项目与 Google 或 OpenAI 无关，仅供学习和研究使用。本项目使用了逆向工程 API，可能不符合 Google 服务条款。使用风险自负。

From 9b8162133e86a323400e7e2fb36ed651b31c795f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 13 Jan 2026 09:23:28 +0700
Subject: [PATCH 036/236] fix: Remove unused headers from HTTP client in helper
 module

---
 app/utils/helper.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 99e6d7a..51a6ccf 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,9 +14,6 @@
 
 from ..models import FunctionCall, Message, ToolCall
 
-HEADERS = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36"
-}
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 XML_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
@@ -82,7 +79,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         if not suffix:
             suffix = f".{mime_type.split('/')[1]}"
     else:
-        async with httpx.AsyncClient(headers=HEADERS, follow_redirects=True) as client:
+        async with httpx.AsyncClient(follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content

From 32a48dcdc98d9e96e791ae6f914e6b3f12804c97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 15 Jan 2026 10:18:58 +0700
Subject: [PATCH 037/236] fix: Update README and README.zh to clarify model
 configuration via environment variables; enhance error logging in config
 validation

---
 README.md           | 23 +----------------------
 README.zh.md        | 23 +----------------------
 app/server/chat.py  |  6 ++++--
 app/utils/config.py | 27 +++++++++++++++++++++++----
 4 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/README.md b/README.md
index 5d6de40..d7a7214 100644
--- a/README.md
+++ b/README.md
@@ -204,34 +204,13 @@ gemini:
 
 #### Environment Variables
 
-You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`.
-
-##### Bash
+You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file.
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
 export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
 ```
 
-##### Docker Compose
-
-```yaml
-services:
-  gemini-fastapi:
-    environment:
-      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
-      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
-```
-
-##### Docker CLI
-
-```bash
-docker run -d \
-  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
-  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
-  ghcr.io/nativu5/gemini-fastapi
-```
-
 ## Acknowledgments
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - The underlying Gemini web API client
diff --git a/README.zh.md b/README.zh.md
index 791afd8..09d80a4 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -203,34 +203,13 @@ gemini:
 
 #### 环境变量
 
-你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。
-
-##### Bash
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式，无需修改配置文件。
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
 export CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]'
 ```
 
-##### Docker Compose
-
-```yaml
-services:
-  gemini-fastapi:
-    environment:
-      - CONFIG_GEMINI__MODEL_STRATEGY=overwrite
-      - CONFIG_GEMINI__MODELS=[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]
-```
-
-##### Docker CLI
-
-```bash
-docker run -d \
-  -e CONFIG_GEMINI__MODEL_STRATEGY="overwrite" \
-  -e CONFIG_GEMINI__MODELS='[{"model_name": "gemini-3.0-pro", "model_header": {"x-goog-ext-525001261-jspb": "[1,null,null,null,\"9d8ca3786ebdfbea\",null,null,0,[4],null,null,1]"}}]' \
-  ghcr.io/nativu5/gemini-fastapi
-```
-
 ## 鸣谢
 
 - [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) - 底层 Gemini Web API 客户端
diff --git a/app/server/chat.py b/app/server/chat.py
index 9485f7a..6e517ea 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -924,7 +924,7 @@ async def _build_payload(
 
         image_call_items.append(
             ResponseImageGenerationCall(
-                id=filename.split(".")[0],
+                id=filename.rsplit(".", 1)[0],
                 status="completed",
                 result=image_base64,
                 output_format=img_format,
@@ -1350,7 +1350,9 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
         try:
             saved_path = await image.save(path=str(temp_dir), full_size=True)
         except Exception as e:
-            logger.warning(f"Failed to download full-size image, retrying with default size: {e}")
+            logger.warning(
+                f"Failed to download full-size GeneratedImage, retrying with default size: {e}"
+            )
             saved_path = await image.save(path=str(temp_dir), full_size=False)
     else:
         saved_path = await image.save(path=str(temp_dir))
diff --git a/app/utils/config.py b/app/utils/config.py
index 74a5294..a9c5d44 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -109,8 +109,21 @@ def _parse_models_json(cls, v: Any) -> Any:
     @field_validator("models")
     @classmethod
     def _filter_valid_models(cls, v: list[GeminiModelConfig]) -> list[GeminiModelConfig]:
-        """Filter out models that don't have a name set (placeholders)."""
-        return [model for model in v if model.model_name]
+        """Filter out models that don't have all required fields set."""
+        valid_models = []
+        for model in v:
+            if model.model_name and model.model_header:
+                valid_models.append(model)
+            else:
+                missing = []
+                if not model.model_name:
+                    missing.append("model_name")
+                if not model.model_header:
+                    missing.append("model_header")
+                logger.warning(
+                    f"Discarding custom model due to missing {', '.join(missing)}: {model}"
+                )
+        return valid_models
 
 
 class CORSConfig(BaseModel):
@@ -251,7 +264,10 @@ def _merge_clients_with_env(
             new_client = GeminiClientSettings(**overrides)
             result_clients.append(new_client)
         else:
-            raise IndexError(f"Client index {idx} in env is out of range.")
+            raise IndexError(
+                f"Client index {idx} in env is out of range (current count: {len(result_clients)}). "
+                "Client indices must be contiguous starting from 0."
+            )
     return result_clients if result_clients else base_clients
 
 
@@ -309,7 +325,10 @@ def _merge_models_with_env(
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:
-            raise IndexError(f"Model index {idx} in env is out of range (must be contiguous).")
+            raise IndexError(
+                f"Model index {idx} in env is out of range (current count: {len(result_models)}). "
+                "Model indices must be contiguous starting from 0."
+            )
     return result_models
 
 

From 0c00b089d5b33e394abaac6a1d36ae08cede166c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 15 Jan 2026 11:24:08 +0700
Subject: [PATCH 038/236] Update README and README.zh to clarify model
 configuration via JSON string or list structure for enhanced flexibility in
 automated environments

---
 README.md    | 2 +-
 README.zh.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d7a7214..330e9c8 100644
--- a/README.md
+++ b/README.md
@@ -204,7 +204,7 @@ gemini:
 
 #### Environment Variables
 
-You can supply models as a JSON string via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments without modifying the configuration file.
+You can supply models as a JSON string or list structure via `CONFIG_GEMINI__MODELS`. This provides a flexible way to override settings via the shell or in automated environments (e.g. Docker) without modifying the configuration file.
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"
diff --git a/README.zh.md b/README.zh.md
index 09d80a4..2f9e1b5 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -203,7 +203,7 @@ gemini:
 
 #### 环境变量
 
-你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串的形式提供模型。这为通过 shell 或在自动化环境中覆盖设置提供了一种灵活的方式，无需修改配置文件。
+你可以通过 `CONFIG_GEMINI__MODELS` 以 JSON 字符串或列表结构的形式提供模型。这为通过 shell 或在自动化环境（例如 Docker）中覆盖设置提供了一种灵活的方式，而无需修改配置文件。
 
 ```bash
 export CONFIG_GEMINI__MODEL_STRATEGY="overwrite"

From b599d99f9967188bb8a277fd09951ddf32006f20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 23 Jan 2026 12:14:40 +0700
Subject: [PATCH 039/236] Refactor: compress JSON content to save tokens and
 streamline sending multiple chunks

---
 app/server/chat.py     | 50 +++++++++++++++++++++++++++++-------------
 app/services/client.py |  4 ++--
 app/utils/helper.py    |  2 +-
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 6e517ea..1e7d786 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,5 +1,7 @@
+import asyncio
 import base64
 import json
+import random
 import re
 import uuid
 from dataclasses import dataclass
@@ -95,7 +97,7 @@ def _build_structured_requirement(
     schema_name = json_schema.get("name") or "response"
     strict = json_schema.get("strict", True)
 
-    pretty_schema = json.dumps(schema, ensure_ascii=False, indent=2, sort_keys=True)
+    pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
     instruction_parts = [
         "You must respond with a single valid JSON document that conforms to the schema shown below.",
         "Do not include explanations, comments, or any text before or after the JSON.",
@@ -135,7 +137,7 @@ def _build_tool_prompt(
         description = function.description or "No description provided."
         lines.append(f"Tool `{function.name}`: {description}")
         if function.parameters:
-            schema_text = json.dumps(function.parameters, ensure_ascii=False, indent=2)
+            schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":"))
             lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
@@ -635,7 +637,7 @@ async def create_chat_completion(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False)
+        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
         visible_output = canonical_output
         storage_output = canonical_output
 
@@ -875,7 +877,7 @@ async def _build_payload(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False)
+        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
         assistant_text = canonical_output
         storage_output = canonical_output
         logger.debug(
@@ -1081,38 +1083,56 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
     that Gemini can produce the actual answer.
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
-        # No need to split - a single request is fine.
         try:
             return await session.send_message(text, files=files)
         except Exception as e:
             logger.exception(f"Error sending message to Gemini: {e}")
             raise
+
     hint_len = len(CONTINUATION_HINT)
-    chunk_size = MAX_CHARS_PER_REQUEST - hint_len
+    safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len
 
     chunks: list[str] = []
     pos = 0
     total = len(text)
+
     while pos < total:
-        end = min(pos + chunk_size, total)
-        chunk = text[pos:end]
-        pos = end
+        remaining = total - pos
+        if remaining <= MAX_CHARS_PER_REQUEST:
+            chunks.append(text[pos:])
+            break
+
+        end = pos + safe_chunk_size
+        slice_candidate = text[pos:end]
+        # Try to find a safe split point
+        split_idx = -1
+        idx = slice_candidate.rfind("\n")
+        if idx != -1:
+            split_idx = idx
+
+        if split_idx != -1:
+            split_at = pos + split_idx + 1
+        else:
+            split_at = end
 
-        # If this is NOT the last chunk, add the continuation hint.
-        if end < total:
-            chunk += CONTINUATION_HINT
+        chunk = text[pos:split_at] + CONTINUATION_HINT
         chunks.append(chunk)
+        pos = split_at
 
-    # Fire off all but the last chunk, discarding the interim "ok" replies.
-    for chk in chunks[:-1]:
+    chunks_size = len(chunks)
+    for i, chk in enumerate(chunks[:-1]):
         try:
+            logger.debug(f"Sending chunk {i + 1}/{chunks_size}...")
             await session.send_message(chk)
+            delay = random.uniform(1.0, 3.0)
+            logger.debug(f"Sleeping for {delay:.2f}s...")
+            await asyncio.sleep(delay)
         except Exception as e:
             logger.exception(f"Error sending chunk to Gemini: {e}")
             raise
 
-    # The last chunk carries the files (if any) and we return its response.
     try:
+        logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...")
         return await session.send_message(chunks[-1], files=files)
     except Exception as e:
         logger.exception(f"Error sending final chunk to Gemini: {e}")
diff --git a/app/services/client.py b/app/services/client.py
index 87c0ca7..1f23271 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -123,7 +123,7 @@ async def process_message(
                 args_text = call.function.arguments.strip()
                 try:
                     parsed_args = json.loads(args_text)
-                    args_text = json.dumps(parsed_args, ensure_ascii=False)
+                    args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
                 except (json.JSONDecodeError, TypeError):
                     # Leave args_text as is if it is not valid JSON
                     pass
@@ -132,7 +132,7 @@ async def process_message(
                 )
 
             if tool_blocks:
-                tool_section = "```xml\n" + "\n".join(tool_blocks) + "\n```"
+                tool_section = "```xml\n" + "".join(tool_blocks) + "\n```"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 51a6ccf..578b666 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -222,7 +222,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         arguments = raw_args
         try:
             parsed_args = json.loads(raw_args)
-            arguments = json.dumps(parsed_args, ensure_ascii=False)
+            arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
         except json.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 

From 186b8448d7f088df621b627ca7b28c5a7acaf341 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 23 Jan 2026 23:08:32 +0700
Subject: [PATCH 040/236] Refactor: Modify the LMDB store to fix issues where
 no conversation is found in either the raw or cleaned history.

---
 app/services/lmdb.py | 46 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 8ccb0d4..d671663 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -9,7 +9,7 @@
 import orjson
 from loguru import logger
 
-from ..models import ConversationInStore, Message
+from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
 from ..utils.singleton import Singleton
 
@@ -18,6 +18,19 @@ def _hash_message(message: Message) -> str:
     """Generate a hash for a single message."""
     # Convert message to dict and sort keys for consistent hashing
     message_dict = message.model_dump(mode="json")
+    content = message_dict.get("content")
+    if isinstance(content, list):
+        is_pure_text = True
+        text_parts = []
+        for item in content:
+            if not isinstance(item, dict) or item.get("type") != "text":
+                is_pure_text = False
+                break
+            text_parts.append(item.get("text") or "")
+
+        if is_pure_text:
+            message_dict["content"] = "".join(text_parts)
+
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()
 
@@ -435,12 +448,31 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
         """
         cleaned_messages = []
         for msg in messages:
-            if msg.role == "assistant" and isinstance(msg.content, str):
-                normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
-                # Only create a new object if content actually changed
-                if normalized_content != msg.content:
-                    cleaned_msg = Message(role=msg.role, content=normalized_content, name=msg.name)
-                    cleaned_messages.append(cleaned_msg)
+            if msg.role == "assistant":
+                if isinstance(msg.content, str):
+                    normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
+                    if normalized_content != msg.content:
+                        cleaned_msg = Message(
+                            role=msg.role, content=normalized_content, name=msg.name
+                        )
+                        cleaned_messages.append(cleaned_msg)
+                    else:
+                        cleaned_messages.append(msg)
+                elif isinstance(msg.content, list):
+                    new_content = []
+                    changed = False
+                    for item in msg.content:
+                        if isinstance(item, ContentItem) and item.type == "text" and item.text:
+                            cleaned_text = LMDBConversationStore.remove_think_tags(item.text)
+                            if cleaned_text != item.text:
+                                changed = True
+                                item = item.model_copy(update={"text": cleaned_text})
+                        new_content.append(item)
+
+                    if changed:
+                        cleaned_messages.append(msg.model_copy(update={"content": new_content}))
+                    else:
+                        cleaned_messages.append(msg)
                 else:
                     cleaned_messages.append(msg)
             else:

From 6dd1fecdced932c537f579a3c5dd3db87847d475 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:03:24 +0700
Subject: [PATCH 041/236] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/services/lmdb.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index d671663..93c7723 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -18,8 +18,12 @@ def _hash_message(message: Message) -> str:
     """Generate a hash for a single message."""
     # Convert message to dict and sort keys for consistent hashing
     message_dict = message.model_dump(mode="json")
+
+    # Normalize content: empty string -> None
     content = message_dict.get("content")
-    if isinstance(content, list):
+    if content == "":
+        message_dict["content"] = None
+    elif isinstance(content, list):
         is_pure_text = True
         text_parts = []
         for item in content:
@@ -29,7 +33,27 @@ def _hash_message(message: Message) -> str:
             text_parts.append(item.get("text") or "")
 
         if is_pure_text:
-            message_dict["content"] = "".join(text_parts)
+            text_content = "".join(text_parts)
+            message_dict["content"] = text_content if text_content else None
+
+    # Normalize tool_calls: empty list -> None, and canonicalize arguments
+    tool_calls = message_dict.get("tool_calls")
+    if not tool_calls:
+        message_dict["tool_calls"] = None
+    elif isinstance(tool_calls, list):
+        for tool_call in tool_calls:
+            if isinstance(tool_call, dict) and "function" in tool_call:
+                func = tool_call["function"]
+                args = func.get("arguments")
+                if isinstance(args, str):
+                    try:
+                        # Parse and re-dump to canonicalize (remove extra whitespace, sort keys)
+                        parsed = orjson.loads(args)
+                        func["arguments"] = orjson.dumps(
+                            parsed, option=orjson.OPT_SORT_KEYS
+                        ).decode("utf-8")
+                    except Exception:
+                        pass
 
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()

From 20ed2456d2324501bbe4ba6392870cd612c9083c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:46:27 +0700
Subject: [PATCH 042/236] Refactor: Update all functions to use orjson for
 better performance

---
 app/main.py            |  2 ++
 app/server/chat.py     | 17 ++++++++---------
 app/services/client.py |  8 ++++----
 app/utils/config.py    | 14 +++++++-------
 app/utils/helper.py    |  8 ++++----
 5 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/app/main.py b/app/main.py
index c215e2a..307eb36 100644
--- a/app/main.py
+++ b/app/main.py
@@ -2,6 +2,7 @@
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI
+from fastapi.responses import ORJSONResponse
 from loguru import logger
 
 from .server.chat import router as chat_router
@@ -92,6 +93,7 @@ def create_app() -> FastAPI:
         description="OpenAI-compatible API for Gemini Web",
         version="1.0.0",
         lifespan=lifespan,
+        default_response_class=ORJSONResponse,
     )
 
     add_cors_middleware(app)
diff --git a/app/server/chat.py b/app/server/chat.py
index 1e7d786..a9d9dec 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,6 +1,5 @@
 import asyncio
 import base64
-import json
 import random
 import re
 import uuid
@@ -97,7 +96,7 @@ def _build_structured_requirement(
     schema_name = json_schema.get("name") or "response"
     strict = json_schema.get("strict", True)
 
-    pretty_schema = json.dumps(schema, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
+    pretty_schema = orjson.dumps(schema, option=orjson.OPT_SORT_KEYS).decode("utf-8")
     instruction_parts = [
         "You must respond with a single valid JSON document that conforms to the schema shown below.",
         "Do not include explanations, comments, or any text before or after the JSON.",
@@ -137,7 +136,7 @@ def _build_tool_prompt(
         description = function.description or "No description provided."
         lines.append(f"Tool `{function.name}`: {description}")
         if function.parameters:
-            schema_text = json.dumps(function.parameters, ensure_ascii=False, separators=(",", ":"))
+            schema_text = orjson.dumps(function.parameters).decode("utf-8")
             lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
@@ -626,8 +625,8 @@ async def create_chat_completion(
                 detail="LLM returned an empty response while JSON schema output was requested.",
             )
         try:
-            structured_payload = json.loads(cleaned_visible)
-        except json.JSONDecodeError as exc:
+            structured_payload = orjson.loads(cleaned_visible)
+        except orjson.JSONDecodeError as exc:
             logger.warning(
                 f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
                 f"{cleaned_visible}"
@@ -637,7 +636,7 @@ async def create_chat_completion(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
+        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
         visible_output = canonical_output
         storage_output = canonical_output
 
@@ -866,8 +865,8 @@ async def _build_payload(
                 detail="LLM returned an empty response while JSON schema output was requested.",
             )
         try:
-            structured_payload = json.loads(cleaned_visible)
-        except json.JSONDecodeError as exc:
+            structured_payload = orjson.loads(cleaned_visible)
+        except orjson.JSONDecodeError as exc:
             logger.warning(
                 f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
                 f"{cleaned_visible}"
@@ -877,7 +876,7 @@ async def _build_payload(
                 detail="LLM returned invalid JSON for the requested response_format.",
             ) from exc
 
-        canonical_output = json.dumps(structured_payload, ensure_ascii=False, separators=(",", ":"))
+        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
         assistant_text = canonical_output
         storage_output = canonical_output
         logger.debug(
diff --git a/app/services/client.py b/app/services/client.py
index 1f23271..55be11a 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,9 +1,9 @@
 import html
-import json
 import re
 from pathlib import Path
 from typing import Any, cast
 
+import orjson
 from gemini_webapi import GeminiClient, ModelOutput
 from loguru import logger
 
@@ -122,9 +122,9 @@ async def process_message(
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
                 try:
-                    parsed_args = json.loads(args_text)
-                    args_text = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
-                except (json.JSONDecodeError, TypeError):
+                    parsed_args = orjson.loads(args_text)
+                    args_text = orjson.dumps(parsed_args).decode("utf-8")
+                except orjson.JSONDecodeError:
                     # Leave args_text as is if it is not valid JSON
                     pass
                 tool_blocks.append(
diff --git a/app/utils/config.py b/app/utils/config.py
index a9c5d44..708462d 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,9 +1,9 @@
 import ast
-import json
 import os
 import sys
 from typing import Any, Literal, Optional
 
+import orjson
 from loguru import logger
 from pydantic import BaseModel, Field, ValidationError, field_validator
 from pydantic_settings import (
@@ -65,8 +65,8 @@ class GeminiModelConfig(BaseModel):
     def _parse_json_string(cls, v: Any) -> Any:
         if isinstance(v, str) and v.strip().startswith("{"):
             try:
-                return json.loads(v)
-            except json.JSONDecodeError:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError:
                 # Return the original value to let Pydantic handle the error or type mismatch
                 return v
         return v
@@ -100,8 +100,8 @@ class GeminiConfig(BaseModel):
     def _parse_models_json(cls, v: Any) -> Any:
         if isinstance(v, str) and v.strip().startswith("["):
             try:
-                return json.loads(v)
-            except json.JSONDecodeError as e:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError as e:
                 logger.warning(f"Failed to parse models JSON string: {e}")
                 return v
         return v
@@ -282,9 +282,9 @@ def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
         parsed_successfully = False
 
         try:
-            models_list = json.loads(val)
+            models_list = orjson.loads(val)
             parsed_successfully = True
-        except json.JSONDecodeError:
+        except orjson.JSONDecodeError:
             try:
                 models_list = ast.literal_eval(val)
                 parsed_successfully = True
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 578b666..1dc518f 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,5 +1,4 @@
 import base64
-import json
 import mimetypes
 import re
 import struct
@@ -10,6 +9,7 @@
 from urllib.parse import urlparse
 
 import httpx
+import orjson
 from loguru import logger
 
 from ..models import FunctionCall, Message, ToolCall
@@ -221,9 +221,9 @@ def _create_tool_call(name: str, raw_args: str) -> None:
 
         arguments = raw_args
         try:
-            parsed_args = json.loads(raw_args)
-            arguments = json.dumps(parsed_args, ensure_ascii=False, separators=(",", ":"))
-        except json.JSONDecodeError:
+            parsed_args = orjson.loads(raw_args)
+            arguments = orjson.dumps(parsed_args).decode("utf-8")
+        except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
         tool_calls.append(

From f67fe63b3b654d3a28cc5ca0363a4ad894831d84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 10:47:26 +0700
Subject: [PATCH 043/236] Update project dependencies

---
 pyproject.toml |  21 ++++-----
 uv.lock        | 118 +++++++++++++++++++++++++------------------------
 2 files changed, 71 insertions(+), 68 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 32a42b4..1c30f8e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,24 +5,25 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.115.12",
-    "gemini-webapi>=1.17.0",
-    "lmdb>=1.6.2",
-    "loguru>=0.7.0",
-    "pydantic-settings[yaml]>=2.9.1",
-    "uvicorn>=0.34.1",
-    "uvloop>=0.21.0; sys_platform != 'win32'",
+    "fastapi>=0.128.0",
+    "gemini-webapi>=1.17.3",
+    "lmdb>=1.7.5",
+    "loguru>=0.7.3",
+    "orjson>=3.11.5",
+    "pydantic-settings[yaml]>=2.12.0",
+    "uvicorn>=0.40.0",
+    "uvloop>=0.22.1; sys_platform != 'win32'",
 ]
 
 [project.optional-dependencies]
 dev = [
-    "ruff>=0.11.7",
+    "ruff>=0.14.14",
 ]
 
 [tool.ruff]
 line-length = 100
 lint.select = ["E", "F", "W", "I", "RUF"]
-lint.ignore = ["E501"] 
+lint.ignore = ["E501"]
 
 [tool.ruff.format]
 quote-style = "double"
@@ -30,5 +31,5 @@ indent-style = "space"
 
 [dependency-groups]
 dev = [
-    "ruff>=0.11.13",
+    "ruff>=0.14.14",
 ]
diff --git a/uv.lock b/uv.lock
index 923e6d3..50a73be 100644
--- a/uv.lock
+++ b/uv.lock
@@ -22,24 +22,24 @@ wheels = [
 
 [[package]]
 name = "anyio"
-version = "4.12.0"
+version = "4.12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/16/ce/8a777047513153587e5434fd752e89334ac33e379aa3497db860eeb60377/anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", size = 228266, upload-time = "2025-11-28T23:37:38.911Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" },
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
 [[package]]
 name = "certifi"
-version = "2025.11.12"
+version = "2026.1.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
 [[package]]
@@ -65,7 +65,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.123.10"
+version = "0.128.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -73,9 +73,9 @@ dependencies = [
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/ff/e01087de891010089f1620c916c0c13130f3898177955c13e2b02d22ec4a/fastapi-0.123.10.tar.gz", hash = "sha256:624d384d7cda7c096449c889fc776a0571948ba14c3c929fa8e9a78cd0b0a6a8", size = 356360, upload-time = "2025-12-05T21:27:46.237Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/08/8c8508db6c7b9aae8f7175046af41baad690771c9bcde676419965e338c7/fastapi-0.128.0.tar.gz", hash = "sha256:1cc179e1cef10a6be60ffe429f79b829dce99d8de32d7acb7e6c8dfdf7f2645a", size = 365682, upload-time = "2025-12-27T15:21:13.714Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/f0/7cb92c4a720def85240fd63fbbcf147ce19e7a731c8e1032376bb5a486ac/fastapi-0.123.10-py3-none-any.whl", hash = "sha256:0503b7b7bc71bc98f7c90c9117d21fdf6147c0d74703011b87936becc86985c1", size = 111774, upload-time = "2025-12-05T21:27:44.78Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/05/5cbb59154b093548acd0f4c7c474a118eda06da25aa75c616b72d8fcd92a/fastapi-0.128.0-py3-none-any.whl", hash = "sha256:aebd93f9716ee3b4f4fcfe13ffb7cf308d99c9f3ab5622d8877441072561582d", size = 103094, upload-time = "2025-12-27T15:21:12.154Z" },
 ]
 
 [[package]]
@@ -87,6 +87,7 @@ dependencies = [
     { name = "gemini-webapi" },
     { name = "lmdb" },
     { name = "loguru" },
+    { name = "orjson" },
     { name = "pydantic-settings", extra = ["yaml"] },
     { name = "uvicorn" },
     { name = "uvloop", marker = "sys_platform != 'win32'" },
@@ -104,19 +105,20 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.115.12" },
-    { name = "gemini-webapi", specifier = ">=1.17.0" },
-    { name = "lmdb", specifier = ">=1.6.2" },
-    { name = "loguru", specifier = ">=0.7.0" },
-    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.9.1" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.11.7" },
-    { name = "uvicorn", specifier = ">=0.34.1" },
-    { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.21.0" },
+    { name = "fastapi", specifier = ">=0.128.0" },
+    { name = "gemini-webapi", specifier = ">=1.17.3" },
+    { name = "lmdb", specifier = ">=1.7.5" },
+    { name = "loguru", specifier = ">=0.7.3" },
+    { name = "orjson", specifier = ">=3.11.5" },
+    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" },
+    { name = "uvicorn", specifier = ">=0.40.0" },
+    { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.11.13" }]
+dev = [{ name = "ruff", specifier = ">=0.14.14" }]
 
 [[package]]
 name = "gemini-webapi"
@@ -209,25 +211,25 @@ wheels = [
 
 [[package]]
 name = "orjson"
-version = "3.11.4"
+version = "3.11.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c6/fe/ed708782d6709cc60eb4c2d8a361a440661f74134675c72990f2c48c785f/orjson-3.11.4.tar.gz", hash = "sha256:39485f4ab4c9b30a3943cfe99e1a213c4776fb69e8abd68f66b83d5a0b0fdc6d", size = 5945188, upload-time = "2025-10-24T15:50:38.027Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/51/6b556192a04595b93e277a9ff71cd0cc06c21a7df98bcce5963fa0f5e36f/orjson-3.11.4-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d4371de39319d05d3f482f372720b841c841b52f5385bd99c61ed69d55d9ab50", size = 243571, upload-time = "2025-10-24T15:49:10.008Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/2c/2602392ddf2601d538ff11848b98621cd465d1a1ceb9db9e8043181f2f7b/orjson-3.11.4-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:e41fd3b3cac850eaae78232f37325ed7d7436e11c471246b87b2cd294ec94853", size = 128891, upload-time = "2025-10-24T15:49:11.297Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/47/bf85dcf95f7a3a12bf223394a4f849430acd82633848d52def09fa3f46ad/orjson-3.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600e0e9ca042878c7fdf189cf1b028fe2c1418cc9195f6cb9824eb6ed99cb938", size = 130137, upload-time = "2025-10-24T15:49:12.544Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/4d/a0cb31007f3ab6f1fd2a1b17057c7c349bc2baf8921a85c0180cc7be8011/orjson-3.11.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7bbf9b333f1568ef5da42bc96e18bf30fd7f8d54e9ae066d711056add508e415", size = 129152, upload-time = "2025-10-24T15:49:13.754Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/ef/2811def7ce3d8576b19e3929fff8f8f0d44bc5eb2e0fdecb2e6e6cc6c720/orjson-3.11.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4806363144bb6e7297b8e95870e78d30a649fdc4e23fc84daa80c8ebd366ce44", size = 136834, upload-time = "2025-10-24T15:49:15.307Z" },
-    { url = "https://files.pythonhosted.org/packages/00/d4/9aee9e54f1809cec8ed5abd9bc31e8a9631d19460e3b8470145d25140106/orjson-3.11.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad355e8308493f527d41154e9053b86a5be892b3b359a5c6d5d95cda23601cb2", size = 137519, upload-time = "2025-10-24T15:49:16.557Z" },
-    { url = "https://files.pythonhosted.org/packages/db/ea/67bfdb5465d5679e8ae8d68c11753aaf4f47e3e7264bad66dc2f2249e643/orjson-3.11.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a7517482667fb9f0ff1b2f16fe5829296ed7a655d04d68cd9711a4d8a4e708", size = 136749, upload-time = "2025-10-24T15:49:17.796Z" },
-    { url = "https://files.pythonhosted.org/packages/01/7e/62517dddcfce6d53a39543cd74d0dccfcbdf53967017c58af68822100272/orjson-3.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97eb5942c7395a171cbfecc4ef6701fc3c403e762194683772df4c54cfbb2210", size = 136325, upload-time = "2025-10-24T15:49:19.347Z" },
-    { url = "https://files.pythonhosted.org/packages/18/ae/40516739f99ab4c7ec3aaa5cc242d341fcb03a45d89edeeaabc5f69cb2cf/orjson-3.11.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:149d95d5e018bdd822e3f38c103b1a7c91f88d38a88aada5c4e9b3a73a244241", size = 140204, upload-time = "2025-10-24T15:49:20.545Z" },
-    { url = "https://files.pythonhosted.org/packages/82/18/ff5734365623a8916e3a4037fcef1cd1782bfc14cf0992afe7940c5320bf/orjson-3.11.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:624f3951181eb46fc47dea3d221554e98784c823e7069edb5dbd0dc826ac909b", size = 406242, upload-time = "2025-10-24T15:49:21.884Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/43/96436041f0a0c8c8deca6a05ebeaf529bf1de04839f93ac5e7c479807aec/orjson-3.11.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:03bfa548cf35e3f8b3a96c4e8e41f753c686ff3d8e182ce275b1751deddab58c", size = 150013, upload-time = "2025-10-24T15:49:23.185Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/48/78302d98423ed8780479a1e682b9aecb869e8404545d999d34fa486e573e/orjson-3.11.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:525021896afef44a68148f6ed8a8bf8375553d6066c7f48537657f64823565b9", size = 139951, upload-time = "2025-10-24T15:49:24.428Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/7b/ad613fdcdaa812f075ec0875143c3d37f8654457d2af17703905425981bf/orjson-3.11.4-cp312-cp312-win32.whl", hash = "sha256:b58430396687ce0f7d9eeb3dd47761ca7d8fda8e9eb92b3077a7a353a75efefa", size = 136049, upload-time = "2025-10-24T15:49:25.973Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/3c/9cf47c3ff5f39b8350fb21ba65d789b6a1129d4cbb3033ba36c8a9023520/orjson-3.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:c6dbf422894e1e3c80a177133c0dda260f81428f9de16d61041949f6a2e5c140", size = 131461, upload-time = "2025-10-24T15:49:27.259Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/3b/e2425f61e5825dc5b08c2a5a2b3af387eaaca22a12b9c8c01504f8614c36/orjson-3.11.4-cp312-cp312-win_arm64.whl", hash = "sha256:d38d2bc06d6415852224fcc9c0bfa834c25431e466dc319f0edd56cca81aa96e", size = 126167, upload-time = "2025-10-24T15:49:28.511Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" },
+    { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" },
+    { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" },
+    { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" },
+    { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" },
 ]
 
 [[package]]
@@ -322,28 +324,28 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.14.8"
+version = "0.14.14"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ed/d9/f7a0c4b3a2bf2556cd5d99b05372c29980249ef71e8e32669ba77428c82c/ruff-0.14.8.tar.gz", hash = "sha256:774ed0dd87d6ce925e3b8496feb3a00ac564bea52b9feb551ecd17e0a23d1eed", size = 5765385, upload-time = "2025-12-04T15:06:17.669Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/b8/9537b52010134b1d2b72870cc3f92d5fb759394094741b09ceccae183fbe/ruff-0.14.8-py3-none-linux_armv6l.whl", hash = "sha256:ec071e9c82eca417f6111fd39f7043acb53cd3fde9b1f95bbed745962e345afb", size = 13441540, upload-time = "2025-12-04T15:06:14.896Z" },
-    { url = "https://files.pythonhosted.org/packages/24/00/99031684efb025829713682012b6dd37279b1f695ed1b01725f85fd94b38/ruff-0.14.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8cdb162a7159f4ca36ce980a18c43d8f036966e7f73f866ac8f493b75e0c27e9", size = 13669384, upload-time = "2025-12-04T15:06:51.809Z" },
-    { url = "https://files.pythonhosted.org/packages/72/64/3eb5949169fc19c50c04f28ece2c189d3b6edd57e5b533649dae6ca484fe/ruff-0.14.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e2fcbefe91f9fad0916850edf0854530c15bd1926b6b779de47e9ab619ea38f", size = 12806917, upload-time = "2025-12-04T15:06:08.925Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/08/5250babb0b1b11910f470370ec0cbc67470231f7cdc033cee57d4976f941/ruff-0.14.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d70721066a296f45786ec31916dc287b44040f553da21564de0ab4d45a869b", size = 13256112, upload-time = "2025-12-04T15:06:23.498Z" },
-    { url = "https://files.pythonhosted.org/packages/78/4c/6c588e97a8e8c2d4b522c31a579e1df2b4d003eddfbe23d1f262b1a431ff/ruff-0.14.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c87e09b3cd9d126fc67a9ecd3b5b1d3ded2b9c7fce3f16e315346b9d05cfb52", size = 13227559, upload-time = "2025-12-04T15:06:33.432Z" },
-    { url = "https://files.pythonhosted.org/packages/23/ce/5f78cea13eda8eceac71b5f6fa6e9223df9b87bb2c1891c166d1f0dce9f1/ruff-0.14.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d62cb310c4fbcb9ee4ac023fe17f984ae1e12b8a4a02e3d21489f9a2a5f730c", size = 13896379, upload-time = "2025-12-04T15:06:02.687Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/79/13de4517c4dadce9218a20035b21212a4c180e009507731f0d3b3f5df85a/ruff-0.14.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1af35c2d62633d4da0521178e8a2641c636d2a7153da0bac1b30cfd4ccd91344", size = 15372786, upload-time = "2025-12-04T15:06:29.828Z" },
-    { url = "https://files.pythonhosted.org/packages/00/06/33df72b3bb42be8a1c3815fd4fae83fa2945fc725a25d87ba3e42d1cc108/ruff-0.14.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25add4575ffecc53d60eed3f24b1e934493631b48ebbc6ebaf9d8517924aca4b", size = 14990029, upload-time = "2025-12-04T15:06:36.812Z" },
-    { url = "https://files.pythonhosted.org/packages/64/61/0f34927bd90925880394de0e081ce1afab66d7b3525336f5771dcf0cb46c/ruff-0.14.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c943d847b7f02f7db4201a0600ea7d244d8a404fbb639b439e987edcf2baf9a", size = 14407037, upload-time = "2025-12-04T15:06:39.979Z" },
-    { url = "https://files.pythonhosted.org/packages/96/bc/058fe0aefc0fbf0d19614cb6d1a3e2c048f7dc77ca64957f33b12cfdc5ef/ruff-0.14.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb6e8bf7b4f627548daa1b69283dac5a296bfe9ce856703b03130732e20ddfe2", size = 14102390, upload-time = "2025-12-04T15:06:46.372Z" },
-    { url = "https://files.pythonhosted.org/packages/af/a4/e4f77b02b804546f4c17e8b37a524c27012dd6ff05855d2243b49a7d3cb9/ruff-0.14.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:7aaf2974f378e6b01d1e257c6948207aec6a9b5ba53fab23d0182efb887a0e4a", size = 14230793, upload-time = "2025-12-04T15:06:20.497Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/52/bb8c02373f79552e8d087cedaffad76b8892033d2876c2498a2582f09dcf/ruff-0.14.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e5758ca513c43ad8a4ef13f0f081f80f08008f410790f3611a21a92421ab045b", size = 13160039, upload-time = "2025-12-04T15:06:49.06Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/ad/b69d6962e477842e25c0b11622548df746290cc6d76f9e0f4ed7456c2c31/ruff-0.14.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f74f7ba163b6e85a8d81a590363bf71618847e5078d90827749bfda1d88c9cdf", size = 13205158, upload-time = "2025-12-04T15:06:54.574Z" },
-    { url = "https://files.pythonhosted.org/packages/06/63/54f23da1315c0b3dfc1bc03fbc34e10378918a20c0b0f086418734e57e74/ruff-0.14.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eed28f6fafcc9591994c42254f5a5c5ca40e69a30721d2ab18bb0bb3baac3ab6", size = 13469550, upload-time = "2025-12-04T15:05:59.209Z" },
-    { url = "https://files.pythonhosted.org/packages/70/7d/a4d7b1961e4903bc37fffb7ddcfaa7beb250f67d97cfd1ee1d5cddb1ec90/ruff-0.14.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:21d48fa744c9d1cb8d71eb0a740c4dd02751a5de9db9a730a8ef75ca34cf138e", size = 14211332, upload-time = "2025-12-04T15:06:06.027Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/93/2a5063341fa17054e5c86582136e9895db773e3c2ffb770dde50a09f35f0/ruff-0.14.8-py3-none-win32.whl", hash = "sha256:15f04cb45c051159baebb0f0037f404f1dc2f15a927418f29730f411a79bc4e7", size = 13151890, upload-time = "2025-12-04T15:06:11.668Z" },
-    { url = "https://files.pythonhosted.org/packages/02/1c/65c61a0859c0add13a3e1cbb6024b42de587456a43006ca2d4fd3d1618fe/ruff-0.14.8-py3-none-win_amd64.whl", hash = "sha256:9eeb0b24242b5bbff3011409a739929f497f3fb5fe3b5698aba5e77e8c833097", size = 14537826, upload-time = "2025-12-04T15:06:26.409Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/63/8b41cea3afd7f58eb64ac9251668ee0073789a3bc9ac6f816c8c6fef986d/ruff-0.14.8-py3-none-win_arm64.whl", hash = "sha256:965a582c93c63fe715fd3e3f8aa37c4b776777203d8e1d8aa3cc0c14424a4b99", size = 13634522, upload-time = "2025-12-04T15:06:43.212Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" },
+    { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" },
+    { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" },
+    { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" },
+    { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" },
+    { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" },
 ]
 
 [[package]]
@@ -382,15 +384,15 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.38.0"
+version = "0.40.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
 ]
 
 [[package]]

From 889f2d257ba15a61339de924fb6a67a6fefe6516 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 11:15:41 +0700
Subject: [PATCH 044/236] Fix IDE warnings

---
 app/services/lmdb.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 93c7723..dec148b 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -52,7 +52,7 @@ def _hash_message(message: Message) -> str:
                         func["arguments"] = orjson.dumps(
                             parsed, option=orjson.OPT_SORT_KEYS
                         ).decode("utf-8")
-                    except Exception:
+                    except orjson.JSONDecodeError:
                         pass
 
     message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
@@ -175,7 +175,7 @@ def store(
         value = orjson.dumps(conv.model_dump(mode="json"))
 
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 # Store main data
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
@@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
             Conversation or None if not found
         """
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 data = txn.get(key.encode("utf-8"), default=None)
                 if not data:
                     return None
@@ -255,7 +255,7 @@ def _find_by_message_list(
 
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
-                with self._get_transaction(write=False) as txn:
+                with self._get_transaction(self, write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):  # type: ignore
                         return self.get(mapped.decode("utf-8"))  # type: ignore
             except Exception as e:
@@ -279,7 +279,7 @@ def exists(self, key: str) -> bool:
             bool: True if key exists, False otherwise
         """
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
         except Exception as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
@@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
             ConversationInStore: The deleted conversation data, or None if not found
         """
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 # Get data first to clean up hash mapping
                 data = txn.get(key.encode("utf-8"))
                 if not data:
@@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
         """
         keys = []
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 cursor = txn.cursor()
                 cursor.first()
 
@@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         expired_entries: list[tuple[str, ConversationInStore]] = []
 
         try:
-            with self._get_transaction(write=False) as txn:
+            with self._get_transaction(self, write=False) as txn:
                 cursor = txn.cursor()
 
                 for key_bytes, value_bytes in cursor:
@@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
         removed = 0
         try:
-            with self._get_transaction(write=True) as txn:
+            with self._get_transaction(self, write=True) as txn:
                 for key_str, conv in expired_entries:
                     key_bytes = key_str.encode("utf-8")
                     if not txn.delete(key_bytes):

From 66b62020330e690499ef386e81cee52dc0f97cce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 11:26:16 +0700
Subject: [PATCH 045/236] Incorrect IDE warnings

---
 app/services/lmdb.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index dec148b..c8e78a9 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -175,7 +175,7 @@ def store(
         value = orjson.dumps(conv.model_dump(mode="json"))
 
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 # Store main data
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
@@ -203,7 +203,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
             Conversation or None if not found
         """
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 data = txn.get(key.encode("utf-8"), default=None)
                 if not data:
                     return None
@@ -255,7 +255,7 @@ def _find_by_message_list(
 
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
-                with self._get_transaction(self, write=False) as txn:
+                with self._get_transaction(write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):  # type: ignore
                         return self.get(mapped.decode("utf-8"))  # type: ignore
             except Exception as e:
@@ -279,7 +279,7 @@ def exists(self, key: str) -> bool:
             bool: True if key exists, False otherwise
         """
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
         except Exception as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
@@ -296,7 +296,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
             ConversationInStore: The deleted conversation data, or None if not found
         """
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 # Get data first to clean up hash mapping
                 data = txn.get(key.encode("utf-8"))
                 if not data:
@@ -333,7 +333,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
         """
         keys = []
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
                 cursor.first()
 
@@ -377,7 +377,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         expired_entries: list[tuple[str, ConversationInStore]] = []
 
         try:
-            with self._get_transaction(self, write=False) as txn:
+            with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
 
                 for key_bytes, value_bytes in cursor:
@@ -407,7 +407,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
         removed = 0
         try:
-            with self._get_transaction(self, write=True) as txn:
+            with self._get_transaction(write=True) as txn:
                 for key_str, conv in expired_entries:
                     key_bytes = key_str.encode("utf-8")
                     if not txn.delete(key_bytes):

From 3297f534f035f869bd7e4a867618b39bc7256f06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 12:05:26 +0700
Subject: [PATCH 046/236] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/services/lmdb.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c8e78a9..a55d3a9 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -476,9 +476,7 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                 if isinstance(msg.content, str):
                     normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
                     if normalized_content != msg.content:
-                        cleaned_msg = Message(
-                            role=msg.role, content=normalized_content, name=msg.name
-                        )
+                        cleaned_msg = msg.model_copy(update={"content": normalized_content})
                         cleaned_messages.append(cleaned_msg)
                     else:
                         cleaned_messages.append(msg)

From 5399b260595e77d6c1f0a8d24a880c59d165a57b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 12:06:52 +0700
Subject: [PATCH 047/236] Refactor: Centralized the mapping of the 'developer'
 role to 'system' for better Gemini compatibility.

---
 app/models/models.py | 7 +++++++
 app/server/chat.py   | 6 +-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index c27e024..63ddb94 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -29,6 +29,13 @@ class Message(BaseModel):
     audio: Optional[Dict[str, Any]] = None
     annotations: List[Dict[str, Any]] = Field(default_factory=list)
 
+    @model_validator(mode="after")
+    def normalize_role(self) -> "Message":
+        """Normalize 'developer' role to 'system' for Gemini compatibility."""
+        if self.role == "developer":
+            self.role = "system"
+        return self
+
 
 class Choice(BaseModel):
     """Choice model"""
diff --git a/app/server/chat.py b/app/server/chat.py
index a9d9dec..66a2720 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -319,8 +319,6 @@ def _response_items_to_messages(
     normalized_input: list[ResponseInputItem] = []
     for item in items:
         role = item.role
-        if role == "developer":
-            role = "system"
 
         content = item.content
         normalized_contents: list[ResponseInputContent] = []
@@ -394,8 +392,6 @@ def _instructions_to_messages(
             continue
 
         role = item.role
-        if role == "developer":
-            role = "system"
 
         content = item.content
         if isinstance(content, str):
@@ -1054,7 +1050,7 @@ async def _find_reusable_session(
     while search_end >= 2:
         search_history = messages[:search_end]
 
-        # Only try to match if the last stored message would be assistant/system.
+        # Only try to match if the last stored message would be assistant/system before querying LMDB.
         if search_history[-1].role in {"assistant", "system"}:
             try:
                 if conv := db.find(model.model_name, search_history):

From de01c7850fa44f4dcbd8f31c47bccaf301861a56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:04:31 +0700
Subject: [PATCH 048/236] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/models/models.py |  1 +
 app/services/lmdb.py | 95 +++++++++++++++++++++++++-------------------
 app/utils/helper.py  | 10 +++--
 3 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 63ddb94..4072b29 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -24,6 +24,7 @@ class Message(BaseModel):
     content: Union[str, List[ContentItem], None] = None
     name: Optional[str] = None
     tool_calls: Optional[List["ToolCall"]] = None
+    tool_call_id: Optional[str] = None
     refusal: Optional[str] = None
     reasoning_content: Optional[str] = None
     audio: Optional[Dict[str, Any]] = None
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index a55d3a9..594acf0 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -15,53 +15,69 @@
 
 
 def _hash_message(message: Message) -> str:
-    """Generate a hash for a single message."""
-    # Convert message to dict and sort keys for consistent hashing
-    message_dict = message.model_dump(mode="json")
-
-    # Normalize content: empty string -> None
-    content = message_dict.get("content")
-    if content == "":
-        message_dict["content"] = None
+    """Generate a consistent hash for a single message focusing only on core identity fields."""
+    # Pick only fields that define the message in a conversation history
+    core_data = {
+        "role": message.role,
+        "name": message.name,
+        "tool_call_id": message.tool_call_id,
+    }
+
+    # Normalize content: strip, handle empty/None, and list-of-text items
+    content = message.content
+    if not content:
+        core_data["content"] = None
+    elif isinstance(content, str):
+        stripped = content.strip()
+        core_data["content"] = stripped if stripped else None
     elif isinstance(content, list):
-        is_pure_text = True
         text_parts = []
         for item in content:
-            if not isinstance(item, dict) or item.get("type") != "text":
-                is_pure_text = False
+            if isinstance(item, ContentItem) and item.type == "text":
+                text_parts.append(item.text or "")
+            elif isinstance(item, dict) and item.get("type") == "text":
+                text_parts.append(item.get("text") or "")
+            else:
+                # If it contains non-text (images/files), keep the full list for hashing
+                text_parts = None
                 break
-            text_parts.append(item.get("text") or "")
-
-        if is_pure_text:
-            text_content = "".join(text_parts)
-            message_dict["content"] = text_content if text_content else None
-
-    # Normalize tool_calls: empty list -> None, and canonicalize arguments
-    tool_calls = message_dict.get("tool_calls")
-    if not tool_calls:
-        message_dict["tool_calls"] = None
-    elif isinstance(tool_calls, list):
-        for tool_call in tool_calls:
-            if isinstance(tool_call, dict) and "function" in tool_call:
-                func = tool_call["function"]
-                args = func.get("arguments")
-                if isinstance(args, str):
-                    try:
-                        # Parse and re-dump to canonicalize (remove extra whitespace, sort keys)
-                        parsed = orjson.loads(args)
-                        func["arguments"] = orjson.dumps(
-                            parsed, option=orjson.OPT_SORT_KEYS
-                        ).decode("utf-8")
-                    except orjson.JSONDecodeError:
-                        pass
-
-    message_bytes = orjson.dumps(message_dict, option=orjson.OPT_SORT_KEYS)
+
+        if text_parts is not None:
+            text_content = "".join(text_parts).strip()
+            core_data["content"] = text_content if text_content else None
+        else:
+            core_data["content"] = message.model_dump(mode="json")["content"]
+
+    # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist
+    if message.tool_calls:
+        calls_data = []
+        for tc in message.tool_calls:
+            args = tc.function.arguments or "{}"
+            try:
+                parsed = orjson.loads(args)
+                canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+            except orjson.JSONDecodeError:
+                canon_args = args
+
+            calls_data.append(
+                {
+                    "id": tc.id,  # Deterministic IDs ensure this is stable
+                    "name": tc.function.name,
+                    "arguments": canon_args,
+                }
+            )
+        # Sort calls to be order-independent
+        calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
+        core_data["tool_calls"] = calls_data
+    else:
+        core_data["tool_calls"] = None
+
+    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
     return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
-    """Generate a hash for a list of messages and client id."""
-    # Create a combined hash from all individual message hashes
+    """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
     combined_hash.update(client_id.encode("utf-8"))
     combined_hash.update(model.encode("utf-8"))
@@ -252,7 +268,6 @@ def _find_by_message_list(
         """Internal find implementation based on a message list."""
         for c in g_config.gemini.clients:
             message_hash = _hash_conversation(c.id, model, messages)
-
             key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
             try:
                 with self._get_transaction(write=False) as txn:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 1dc518f..239b7f4 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,9 +1,9 @@
 import base64
+import hashlib
 import mimetypes
 import re
 import struct
 import tempfile
-import uuid
 from pathlib import Path
 from typing import Iterator
 from urllib.parse import urlparse
@@ -222,13 +222,17 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         arguments = raw_args
         try:
             parsed_args = orjson.loads(raw_args)
-            arguments = orjson.dumps(parsed_args).decode("utf-8")
+            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
+        # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB
+        seed = f"{name}:{arguments}".encode("utf-8")
+        call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
+
         tool_calls.append(
             ToolCall(
-                id=f"call_{uuid.uuid4().hex}",
+                id=call_id,
                 type="function",
                 function=FunctionCall(name=name, arguments=arguments),
             )

From 196414755e860f1f6d9c840954eb45c53225a864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:26:58 +0700
Subject: [PATCH 049/236] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/server/chat.py   | 10 +++++++++-
 app/services/lmdb.py |  7 ++-----
 app/utils/helper.py  | 13 +++++++------
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 66a2720..7c683cd 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1047,6 +1047,8 @@ async def _find_reusable_session(
 
     # Start with the full history and iteratively trim from the end.
     search_end = len(messages)
+    logger.debug(f"Searching for reusable session in history of length {search_end}...")
+
     while search_end >= 2:
         search_history = messages[:search_end]
 
@@ -1057,14 +1059,20 @@ async def _find_reusable_session(
                     client = await pool.acquire(conv.client_id)
                     session = client.start_chat(metadata=conv.metadata, model=model)
                     remain = messages[search_end:]
+                    logger.debug(
+                        f"Match found at prefix length {search_end}. Client: {conv.client_id}"
+                    )
                     return session, client, remain
             except Exception as e:
-                logger.warning(f"Error checking LMDB for reusable session: {e}")
+                logger.warning(
+                    f"Error checking LMDB for reusable session at length {search_end}: {e}"
+                )
                 break
 
         # Trim one message and try again.
         search_end -= 1
 
+    logger.debug("No reusable session found after checking all possible prefixes.")
     return None, None, messages
 
 
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 594acf0..5aefa4b 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -15,12 +15,10 @@
 
 
 def _hash_message(message: Message) -> str:
-    """Generate a consistent hash for a single message focusing only on core identity fields."""
-    # Pick only fields that define the message in a conversation history
+    """Generate a consistent hash for a single message focusing ONLY on logic/content, ignoring technical IDs."""
     core_data = {
         "role": message.role,
         "name": message.name,
-        "tool_call_id": message.tool_call_id,
     }
 
     # Normalize content: strip, handle empty/None, and list-of-text items
@@ -48,7 +46,7 @@ def _hash_message(message: Message) -> str:
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
 
-    # Normalize tool_calls: canonicalize arguments and sort by name if multiple calls exist
+    # Normalize tool_calls: Focus ONLY on function name and arguments
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
@@ -61,7 +59,6 @@ def _hash_message(message: Message) -> str:
 
             calls_data.append(
                 {
-                    "id": tc.id,  # Deterministic IDs ensure this is stable
                     "name": tc.function.name,
                     "arguments": canon_args,
                 }
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 239b7f4..ecf4a47 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_args: str) -> None:
+    def _create_tool_call(name: str, raw_args: str, index: int) -> None:
         """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
@@ -226,8 +226,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
-        # Generate a deterministic ID based on name and arguments to avoid hash mismatch in LMDB
-        seed = f"{name}:{arguments}".encode("utf-8")
+        # Generate a deterministic ID based on name, arguments, and index to avoid collisions
+        seed = f"{name}:{arguments}:{index}".encode("utf-8")
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
         tool_calls.append(
@@ -244,11 +244,11 @@ def _replace_block(match: re.Match[str]) -> str:
             return match.group(0)
 
         found_in_block = False
-        for call_match in TOOL_CALL_RE.finditer(block_content):
+        for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)):
             found_in_block = True
             name = (call_match.group(1) or "").strip()
             raw_args = (call_match.group(2) or "").strip()
-            _create_tool_call(name, raw_args)
+            _create_tool_call(name, raw_args, i)
 
         if found_in_block:
             return ""
@@ -258,9 +258,10 @@ def _replace_block(match: re.Match[str]) -> str:
     cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
 
     def _replace_orphan(match: re.Match[str]) -> str:
+        # Note: orphan calls are handled with a fallback index if they appear outside blocks
         name = (match.group(1) or "").strip()
         raw_args = (match.group(2) or "").strip()
-        _create_tool_call(name, raw_args)
+        _create_tool_call(name, raw_args, len(tool_calls))
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)

From 8c5c7498230bc680bf50464dacf0b6f001888981 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 13:42:09 +0700
Subject: [PATCH 050/236] Refactor: Modify the LMDB store to fix issues where
 no conversation is found.

---
 app/server/chat.py   | 4 ++--
 app/services/lmdb.py | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 7c683cd..0d64b71 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1052,8 +1052,8 @@ async def _find_reusable_session(
     while search_end >= 2:
         search_history = messages[:search_end]
 
-        # Only try to match if the last stored message would be assistant/system before querying LMDB.
-        if search_history[-1].role in {"assistant", "system"}:
+        # Only try to match if the last stored message would be assistant/system/tool before querying LMDB.
+        if search_history[-1].role in {"assistant", "system", "tool"}:
             try:
                 if conv := db.find(model.model_name, search_history):
                     client = await pool.acquire(conv.client_id)
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 5aefa4b..c612d9e 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -70,7 +70,11 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = None
 
     message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    return hashlib.sha256(message_bytes).hexdigest()
+    msg_hash = hashlib.sha256(message_bytes).hexdigest()
+    logger.debug(
+        f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}"
+    )
+    return msg_hash
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:

From ce67d664b5443726fe518aee1cc9ef550ae640fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 14:41:55 +0700
Subject: [PATCH 051/236] Refactor: Avoid reusing an existing chat session if
 its idle time exceeds METADATA_TTL_MINUTES.

---
 app/server/chat.py   | 14 ++++++++++++--
 app/services/lmdb.py |  9 ++-------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0d64b71..6fbb818 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -58,6 +58,7 @@
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
+METADATA_TTL_MINUTES = 20
 
 router = APIRouter()
 
@@ -1047,7 +1048,6 @@ async def _find_reusable_session(
 
     # Start with the full history and iteratively trim from the end.
     search_end = len(messages)
-    logger.debug(f"Searching for reusable session in history of length {search_end}...")
 
     while search_end >= 2:
         search_history = messages[:search_end]
@@ -1056,6 +1056,17 @@ async def _find_reusable_session(
         if search_history[-1].role in {"assistant", "system", "tool"}:
             try:
                 if conv := db.find(model.model_name, search_history):
+                    # Check if metadata is too old
+                    now = datetime.now()
+                    updated_at = conv.updated_at or conv.created_at or now
+                    age_minutes = (now - updated_at).total_seconds() / 60
+
+                    if age_minutes > METADATA_TTL_MINUTES:
+                        logger.debug(
+                            f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse."
+                        )
+                        break
+
                     client = await pool.acquire(conv.client_id)
                     session = client.start_chat(metadata=conv.metadata, model=model)
                     remain = messages[search_end:]
@@ -1072,7 +1083,6 @@ async def _find_reusable_session(
         # Trim one message and try again.
         search_end -= 1
 
-    logger.debug("No reusable session found after checking all possible prefixes.")
     return None, None, messages
 
 
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c612d9e..424b357 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -68,13 +68,8 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = calls_data
     else:
         core_data["tool_calls"] = None
-
-    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    msg_hash = hashlib.sha256(message_bytes).hexdigest()
-    logger.debug(
-        f"Hashing message (role={message.role}): {message_bytes.decode('utf-8')} -> {msg_hash}"
-    )
-    return msg_hash
+        message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
+        return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:

From 3d32d1226b1399f4286aadd95b2c4a52228fac45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 14:58:58 +0700
Subject: [PATCH 052/236] Refactor: Update the LMDB store to resolve issues
 preventing conversation from being saved

---
 app/services/lmdb.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 424b357..2dbe7b2 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -68,15 +68,16 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = calls_data
     else:
         core_data["tool_calls"] = None
-        message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-        return hashlib.sha256(message_bytes).hexdigest()
+
+    message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
+    return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
-    combined_hash.update(client_id.encode("utf-8"))
-    combined_hash.update(model.encode("utf-8"))
+    combined_hash.update((client_id or "").encode("utf-8"))
+    combined_hash.update((model or "").encode("utf-8"))
     for message in messages:
         message_hash = _hash_message(message)
         combined_hash.update(message_hash.encode("utf-8"))

From 2eb9f05142ddfa1cb665b248f3faf2e278b619c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 24 Jan 2026 17:57:04 +0700
Subject: [PATCH 053/236] Refactor: Update the _prepare_messages_for_model
 helper to omit the system instruction when reusing a session to save tokens.

---
 app/server/chat.py   | 66 +++++++++++++++++++++++++----------------
 app/services/lmdb.py | 70 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 96 insertions(+), 40 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 6fbb818..646f4fa 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -58,7 +58,7 @@
 # Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
-METADATA_TTL_MINUTES = 20
+METADATA_TTL_MINUTES = 15
 
 router = APIRouter()
 
@@ -268,31 +268,35 @@ def _prepare_messages_for_model(
     tools: list[Tool] | None,
     tool_choice: str | ToolChoiceFunction | None,
     extra_instructions: list[str] | None = None,
+    inject_system_defaults: bool = True,
 ) -> list[Message]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
 
     instructions: list[str] = []
-    if tools:
-        tool_prompt = _build_tool_prompt(tools, tool_choice)
-        if tool_prompt:
-            instructions.append(tool_prompt)
-
-    if extra_instructions:
-        instructions.extend(instr for instr in extra_instructions if instr)
-        logger.debug(
-            f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
-        )
+    if inject_system_defaults:
+        if tools:
+            tool_prompt = _build_tool_prompt(tools, tool_choice)
+            if tool_prompt:
+                instructions.append(tool_prompt)
+
+        if extra_instructions:
+            instructions.extend(instr for instr in extra_instructions if instr)
+            logger.debug(
+                f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
+            )
 
-    if not _conversation_has_code_hint(prepared):
-        instructions.append(CODE_BLOCK_HINT)
-        logger.debug("Injected default code block hint for Gemini conversation.")
+        if not _conversation_has_code_hint(prepared):
+            instructions.append(CODE_BLOCK_HINT)
+            logger.debug("Injected default code block hint for Gemini conversation.")
 
     if not instructions:
+        # Still need to ensure XML hint for the last user message if tools are present
+        if tools and tool_choice != "none":
+            _append_xml_hint_to_last_user_message(prepared)
         return prepared
 
     combined_instructions = "\n\n".join(instructions)
-
     if prepared and prepared[0].role == "system" and isinstance(prepared[0].content, str):
         existing = prepared[0].content or ""
         separator = "\n\n" if existing else ""
@@ -530,8 +534,14 @@ async def create_chat_completion(
     )
 
     if session:
+        # Optimization: When reusing a session, we don't need to resend the heavy tool definitions
+        # or structured output instructions as they are already in the Gemini session history.
         messages_to_send = _prepare_messages_for_model(
-            remaining_messages, request.tools, request.tool_choice, extra_instructions
+            remaining_messages,
+            request.tools,
+            request.tool_choice,
+            extra_instructions,
+            inject_system_defaults=False,
         )
         if not messages_to_send:
             raise HTTPException(
@@ -642,17 +652,20 @@ async def create_chat_completion(
 
     # After formatting, persist the conversation to LMDB
     try:
-        last_message = Message(
+        current_assistant_message = Message(
             role="assistant",
             content=storage_output or None,
             tool_calls=tool_calls or None,
         )
-        cleaned_history = db.sanitize_assistant_messages(request.messages)
+        # Sanitize the entire history including the new message to ensure consistency
+        full_history = [*request.messages, current_assistant_message]
+        cleaned_history = db.sanitize_assistant_messages(full_history)
+
         conv = ConversationInStore(
             model=model.model_name,
             client_id=client.id,
             metadata=session.metadata,
-            messages=[*cleaned_history, last_message],
+            messages=cleaned_history,
         )
         key = db.store(conv)
         logger.debug(f"Conversation saved to LMDB with key: {key}")
@@ -780,9 +793,10 @@ async def _build_payload(
     if reuse_session:
         messages_to_send = _prepare_messages_for_model(
             remaining_messages,
-            tools=None,
-            tool_choice=None,
-            extra_instructions=extra_instructions or None,
+            tools=request_data.tools,  # Keep for XML hint logic
+            tool_choice=request_data.tool_choice,
+            extra_instructions=None,  # Already in session history
+            inject_system_defaults=False,
         )
         if not messages_to_send:
             raise HTTPException(
@@ -994,17 +1008,19 @@ async def _build_payload(
     )
 
     try:
-        last_message = Message(
+        current_assistant_message = Message(
             role="assistant",
             content=storage_output or None,
             tool_calls=detected_tool_calls or None,
         )
-        cleaned_history = db.sanitize_assistant_messages(messages)
+        full_history = [*messages, current_assistant_message]
+        cleaned_history = db.sanitize_assistant_messages(full_history)
+
         conv = ConversationInStore(
             model=model.model_name,
             client_id=client.id,
             metadata=session.metadata,
-            messages=[*cleaned_history, last_message],
+            messages=cleaned_history,
         )
         key = db.store(conv)
         logger.debug(f"Conversation saved to LMDB with key: {key}")
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 2dbe7b2..f4c9938 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -11,6 +11,7 @@
 
 from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
+from ..utils.helper import extract_tool_calls, remove_tool_call_blocks
 from ..utils.singleton import Singleton
 
 
@@ -26,8 +27,9 @@ def _hash_message(message: Message) -> str:
     if not content:
         core_data["content"] = None
     elif isinstance(content, str):
-        stripped = content.strip()
-        core_data["content"] = stripped if stripped else None
+        # Normalize line endings and strip whitespace
+        normalized = content.replace("\r\n", "\n").strip()
+        core_data["content"] = normalized if normalized else None
     elif isinstance(content, list):
         text_parts = []
         for item in content:
@@ -41,7 +43,7 @@ def _hash_message(message: Message) -> str:
                 break
 
         if text_parts is not None:
-            text_content = "".join(text_parts).strip()
+            text_content = "".join(text_parts).replace("\r\n", "\n").strip()
             core_data["content"] = text_content if text_content else None
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
@@ -260,7 +262,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
         return None
 
     def _find_by_message_list(
-        self, model: str, messages: List[Message]
+        self,
+        model: str,
+        messages: List[Message],
     ) -> Optional[ConversationInStore]:
         """Internal find implementation based on a message list."""
         for c in g_config.gemini.clients:
@@ -471,40 +475,76 @@ def __del__(self):
     @staticmethod
     def remove_think_tags(text: str) -> str:
         """
-        Remove <think>...</think> tags at the start of text and strip whitespace.
+        Remove all <think>...</think> tags and strip whitespace.
         """
-        cleaned_content = re.sub(r"^(\s*<think>.*?</think>\n?)", "", text, flags=re.DOTALL)
+        # Remove all think blocks anywhere in the text
+        cleaned_content = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
         return cleaned_content.strip()
 
     @staticmethod
     def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
         """
-        Create a new list of messages with assistant content cleaned of <think> tags.
-        This is useful for store the chat history.
+        Create a new list of messages with assistant content cleaned of <think> tags
+        and system hints/tool call blocks. This is used for both storing and
+        searching chat history to ensure consistency.
+
+        If a message has no tool_calls but contains tool call XML blocks in its
+        content, they will be extracted and moved to the tool_calls field.
         """
         cleaned_messages = []
         for msg in messages:
             if msg.role == "assistant":
                 if isinstance(msg.content, str):
-                    normalized_content = LMDBConversationStore.remove_think_tags(msg.content)
-                    if normalized_content != msg.content:
-                        cleaned_msg = msg.model_copy(update={"content": normalized_content})
+                    text = LMDBConversationStore.remove_think_tags(msg.content)
+                    tool_calls = msg.tool_calls
+                    if not tool_calls:
+                        text, tool_calls = extract_tool_calls(text)
+                    else:
+                        text = remove_tool_call_blocks(text).strip()
+
+                    normalized_content = text.strip()
+
+                    if normalized_content != msg.content or tool_calls != msg.tool_calls:
+                        cleaned_msg = msg.model_copy(
+                            update={
+                                "content": normalized_content or None,
+                                "tool_calls": tool_calls or None,
+                            }
+                        )
                         cleaned_messages.append(cleaned_msg)
                     else:
                         cleaned_messages.append(msg)
                 elif isinstance(msg.content, list):
                     new_content = []
+                    all_extracted_calls = list(msg.tool_calls or [])
                     changed = False
+
                     for item in msg.content:
                         if isinstance(item, ContentItem) and item.type == "text" and item.text:
-                            cleaned_text = LMDBConversationStore.remove_think_tags(item.text)
-                            if cleaned_text != item.text:
+                            text = LMDBConversationStore.remove_think_tags(item.text)
+
+                            if not msg.tool_calls:
+                                text, extracted = extract_tool_calls(text)
+                                if extracted:
+                                    all_extracted_calls.extend(extracted)
+                                    changed = True
+                            else:
+                                text = remove_tool_call_blocks(text).strip()
+
+                            if text != item.text:
                                 changed = True
-                                item = item.model_copy(update={"text": cleaned_text})
+                                item = item.model_copy(update={"text": text.strip() or None})
                         new_content.append(item)
 
                     if changed:
-                        cleaned_messages.append(msg.model_copy(update={"content": new_content}))
+                        cleaned_messages.append(
+                            msg.model_copy(
+                                update={
+                                    "content": new_content,
+                                    "tool_calls": all_extracted_calls or None,
+                                }
+                            )
+                        )
                     else:
                         cleaned_messages.append(msg)
                 else:

From ade61d6826af1f256e7141ab6c1815b047cf8744 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 26 Jan 2026 11:01:41 +0700
Subject: [PATCH 054/236] Refactor: Modify the logic to convert a large prompt
 into a temporary text file attachment

- When multiple chunks are sent simultaneously, Google will immediately invalidate the access token and reject the request
- When a prompt contains a structured format like JSON, splitting it can break the format and may cause the model to misunderstand the context
- Another minor tweak as Copilot suggested
---
 app/server/chat.py   | 104 ++++++++++++++++---------------------------
 app/services/lmdb.py |   5 ++-
 app/utils/helper.py  |  13 +++---
 3 files changed, 49 insertions(+), 73 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 646f4fa..063d4d4 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,7 +1,6 @@
-import asyncio
 import base64
-import random
 import re
+import tempfile
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timezone
@@ -375,9 +374,7 @@ def _response_items_to_messages(
             ResponseInputItem(type="message", role=item.role, content=normalized_contents or [])
         )
 
-    logger.debug(
-        f"Normalized Responses input: {len(normalized_input)} message items (developer roles mapped to system)."
-    )
+    logger.debug(f"Normalized Responses input: {len(normalized_input)} message items.")
     return messages, normalized_input
 
 
@@ -1077,19 +1074,18 @@ async def _find_reusable_session(
                     updated_at = conv.updated_at or conv.created_at or now
                     age_minutes = (now - updated_at).total_seconds() / 60
 
-                    if age_minutes > METADATA_TTL_MINUTES:
+                    if age_minutes <= METADATA_TTL_MINUTES:
+                        client = await pool.acquire(conv.client_id)
+                        session = client.start_chat(metadata=conv.metadata, model=model)
+                        remain = messages[search_end:]
+                        logger.debug(
+                            f"Match found at prefix length {search_end}. Client: {conv.client_id}"
+                        )
+                        return session, client, remain
+                    else:
                         logger.debug(
                             f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse."
                         )
-                        break
-
-                    client = await pool.acquire(conv.client_id)
-                    session = client.start_chat(metadata=conv.metadata, model=model)
-                    remain = messages[search_end:]
-                    logger.debug(
-                        f"Match found at prefix length {search_end}. Client: {conv.client_id}"
-                    )
-                    return session, client, remain
             except Exception as e:
                 logger.warning(
                     f"Error checking LMDB for reusable session at length {search_end}: {e}"
@@ -1103,13 +1099,9 @@ async def _find_reusable_session(
 
 
 async def _send_with_split(session: ChatSession, text: str, files: list[Path | str] | None = None):
-    """Send text to Gemini, automatically splitting into multiple batches if it is
-    longer than ``MAX_CHARS_PER_REQUEST``.
-
-    Every intermediate batch (that is **not** the last one) is suffixed with a hint
-    telling Gemini that more content will come, and it should simply reply with
-    "ok". The final batch carries any file uploads and the real user prompt so
-    that Gemini can produce the actual answer.
+    """
+    Send text to Gemini. If text is longer than ``MAX_CHARS_PER_REQUEST``,
+    it is converted into a temporary text file attachment to avoid splitting issues.
     """
     if len(text) <= MAX_CHARS_PER_REQUEST:
         try:
@@ -1118,55 +1110,37 @@ async def _send_with_split(session: ChatSession, text: str, files: list[Path | s
             logger.exception(f"Error sending message to Gemini: {e}")
             raise
 
-    hint_len = len(CONTINUATION_HINT)
-    safe_chunk_size = MAX_CHARS_PER_REQUEST - hint_len
-
-    chunks: list[str] = []
-    pos = 0
-    total = len(text)
-
-    while pos < total:
-        remaining = total - pos
-        if remaining <= MAX_CHARS_PER_REQUEST:
-            chunks.append(text[pos:])
-            break
-
-        end = pos + safe_chunk_size
-        slice_candidate = text[pos:end]
-        # Try to find a safe split point
-        split_idx = -1
-        idx = slice_candidate.rfind("\n")
-        if idx != -1:
-            split_idx = idx
-
-        if split_idx != -1:
-            split_at = pos + split_idx + 1
-        else:
-            split_at = end
+    logger.info(
+        f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment."
+    )
 
-        chunk = text[pos:split_at] + CONTINUATION_HINT
-        chunks.append(chunk)
-        pos = split_at
+    # Create a temporary directory to hold the message.txt file
+    # This ensures the filename is exactly 'message.txt' as expected by the instruction.
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        temp_file_path = Path(tmpdirname) / "message.txt"
+        temp_file_path.write_text(text, encoding="utf-8")
 
-    chunks_size = len(chunks)
-    for i, chk in enumerate(chunks[:-1]):
         try:
-            logger.debug(f"Sending chunk {i + 1}/{chunks_size}...")
-            await session.send_message(chk)
-            delay = random.uniform(1.0, 3.0)
-            logger.debug(f"Sleeping for {delay:.2f}s...")
-            await asyncio.sleep(delay)
+            # Prepare the files list
+            final_files = list(files) if files else []
+            final_files.append(temp_file_path)
+
+            instruction = (
+                "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
+                "**System Instruction:**\n"
+                "1. Read the content of `message.txt`.\n"
+                "2. Treat that content as the **primary** user prompt for this turn.\n"
+                "3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
+            )
+
+            logger.debug(f"Sending prompt as temporary file: {temp_file_path}")
+
+            return await session.send_message(instruction, files=final_files)
+
         except Exception as e:
-            logger.exception(f"Error sending chunk to Gemini: {e}")
+            logger.exception(f"Error sending large text as file to Gemini: {e}")
             raise
 
-    try:
-        logger.debug(f"Sending final chunk {chunks_size}/{chunks_size}...")
-        return await session.send_message(chunks[-1], files=files)
-    except Exception as e:
-        logger.exception(f"Error sending final chunk to Gemini: {e}")
-        raise
-
 
 def _create_streaming_response(
     model_output: str,
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index f4c9938..c9d42cd 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -43,8 +43,9 @@ def _hash_message(message: Message) -> str:
                 break
 
         if text_parts is not None:
-            text_content = "".join(text_parts).replace("\r\n", "\n").strip()
-            core_data["content"] = text_content if text_content else None
+            # Normalize each part but keep them as a list to preserve boundaries and avoid collisions
+            normalized_parts = [p.replace("\r\n", "\n") for p in text_parts]
+            core_data["content"] = normalized_parts if normalized_parts else None
         else:
             core_data["content"] = message.model_dump(mode="json")["content"]
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index ecf4a47..190b5ce 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -213,7 +213,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_args: str, index: int) -> None:
+    def _create_tool_call(name: str, raw_args: str) -> None:
         """Helper to parse args and append to the tool_calls list."""
         if not name:
             logger.warning("Encountered tool_call without a function name.")
@@ -226,7 +226,9 @@ def _create_tool_call(name: str, raw_args: str, index: int) -> None:
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
-        # Generate a deterministic ID based on name, arguments, and index to avoid collisions
+        # Generate a deterministic ID based on name, arguments, and its global sequence index
+        # to ensure uniqueness across multiple fenced blocks while remaining stable for storage.
+        index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
@@ -244,11 +246,11 @@ def _replace_block(match: re.Match[str]) -> str:
             return match.group(0)
 
         found_in_block = False
-        for i, call_match in enumerate(TOOL_CALL_RE.finditer(block_content)):
+        for call_match in TOOL_CALL_RE.finditer(block_content):
             found_in_block = True
             name = (call_match.group(1) or "").strip()
             raw_args = (call_match.group(2) or "").strip()
-            _create_tool_call(name, raw_args, i)
+            _create_tool_call(name, raw_args)
 
         if found_in_block:
             return ""
@@ -258,10 +260,9 @@ def _replace_block(match: re.Match[str]) -> str:
     cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
 
     def _replace_orphan(match: re.Match[str]) -> str:
-        # Note: orphan calls are handled with a fallback index if they appear outside blocks
         name = (match.group(1) or "").strip()
         raw_args = (match.group(2) or "").strip()
-        _create_tool_call(name, raw_args, len(tool_calls))
+        _create_tool_call(name, raw_args)
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)

From bdd893ff9a2d2c58fcbc3eb0c01aab337177edd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 28 Jan 2026 13:37:47 +0700
Subject: [PATCH 055/236] Enable streaming responses and fully resolve the
 problem with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
---
 app/main.py            |    2 +-
 app/models/models.py   |    4 +-
 app/server/chat.py     | 1867 ++++++++++++++++++++++------------------
 app/services/client.py |   11 +-
 app/services/lmdb.py   |  152 ++--
 app/services/pool.py   |    4 +-
 app/utils/helper.py    |  113 +--
 7 files changed, 1162 insertions(+), 991 deletions(-)

diff --git a/app/main.py b/app/main.py
index 307eb36..f4e6711 100644
--- a/app/main.py
+++ b/app/main.py
@@ -15,7 +15,7 @@
 )
 from .services import GeminiClientPool, LMDBConversationStore
 
-RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60  # 6 hours
+RETENTION_CLEANUP_INTERVAL_SECONDS = 6 * 60 * 60  # Check every 6 hours
 
 
 async def _run_retention_cleanup(stop_event: asyncio.Event) -> None:
diff --git a/app/models/models.py b/app/models/models.py
index 4072b29..64ceaa9 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -7,7 +7,7 @@
 
 
 class ContentItem(BaseModel):
-    """Content item model"""
+    """Individual content item (text, image, or file) within a message."""
 
     type: Literal["text", "image_url", "file", "input_audio"]
     text: Optional[str] = None
@@ -159,7 +159,7 @@ class ConversationInStore(BaseModel):
     created_at: Optional[datetime] = Field(default=None)
     updated_at: Optional[datetime] = Field(default=None)
 
-    # NOTE: Gemini Web API do not support changing models once a conversation is created.
+    # Gemini Web API does not support changing models once a conversation is created.
     model: str = Field(..., description="Model used for the conversation")
     client_id: str = Field(..., description="Identifier of the Gemini client")
     metadata: list[str | None] = Field(
diff --git a/app/server/chat.py b/app/server/chat.py
index 063d4d4..37d3c70 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,18 +1,18 @@
 import base64
-import re
-import tempfile
+import io
+import reprlib
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any
+from typing import Any, AsyncGenerator
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
+from gemini_webapi import ModelOutput
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
-from gemini_webapi.exceptions import APIError
 from gemini_webapi.types.image import GeneratedImage, Image
 from loguru import logger
 
@@ -42,21 +42,18 @@
 from ..utils.helper import (
     CODE_BLOCK_HINT,
     CODE_HINT_STRIPPED,
+    CONTROL_TOKEN_RE,
     XML_HINT_STRIPPED,
     XML_WRAP_HINT,
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
-    iter_stream_segments,
-    remove_tool_call_blocks,
     strip_code_fence,
     text_from_message,
 )
 from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
-# Maximum characters Gemini Web can accept in a single request (configurable)
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
-CONTINUATION_HINT = "\n(More messages to come, please reply with just 'ok.')"
 METADATA_TTL_MINUTES = 15
 
 router = APIRouter()
@@ -72,6 +69,210 @@ class StructuredOutputRequirement:
     raw_format: dict[str, Any]
 
 
+# --- Helper Functions ---
+
+
+async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
+    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
+    if isinstance(image, GeneratedImage):
+        try:
+            saved_path = await image.save(path=str(temp_dir), full_size=True)
+        except Exception as e:
+            logger.warning(
+                f"Failed to download full-size GeneratedImage, retrying with default size: {e}"
+            )
+            saved_path = await image.save(path=str(temp_dir), full_size=False)
+    else:
+        saved_path = await image.save(path=str(temp_dir))
+
+    if not saved_path:
+        raise ValueError("Failed to save generated image")
+
+    original_path = Path(saved_path)
+    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
+    new_path = temp_dir / random_name
+    original_path.rename(new_path)
+
+    data = new_path.read_bytes()
+    width, height = extract_image_dimensions(data)
+    filename = random_name
+    return base64.b64encode(data).decode("ascii"), width, height, filename
+
+
+def _calculate_usage(
+    messages: list[Message],
+    assistant_text: str | None,
+    tool_calls: list[Any] | None,
+) -> tuple[int, int, int]:
+    """Calculate prompt, completion and total tokens consistently."""
+    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
+    tool_args_text = ""
+    if tool_calls:
+        for call in tool_calls:
+            if hasattr(call, "function"):
+                tool_args_text += call.function.arguments or ""
+            elif isinstance(call, dict):
+                tool_args_text += call.get("function", {}).get("arguments", "")
+
+    completion_basis = assistant_text or ""
+    if tool_args_text:
+        completion_basis = (
+            f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text
+        )
+
+    completion_tokens = estimate_tokens(completion_basis)
+    return prompt_tokens, completion_tokens, prompt_tokens + completion_tokens
+
+
+def _create_responses_standard_payload(
+    response_id: str,
+    created_time: int,
+    model_name: str,
+    assistant_text: str | None,
+    detected_tool_calls: list[Any] | None,
+    image_call_items: list[ResponseImageGenerationCall],
+    response_contents: list[ResponseOutputContent],
+    usage: ResponseUsage,
+    request_data: ResponseCreateRequest,
+    normalized_input: Any,
+) -> ResponseCreateResponse:
+    """Unified factory for building ResponseCreateResponse objects."""
+    message_id = f"msg_{uuid.uuid4().hex}"
+    tool_call_items: list[ResponseToolCall] = []
+    if detected_tool_calls:
+        tool_call_items = [
+            ResponseToolCall(
+                id=call.id if hasattr(call, "id") else call["id"],
+                status="completed",
+                function=call.function if hasattr(call, "function") else call["function"],
+            )
+            for call in detected_tool_calls
+        ]
+
+    return ResponseCreateResponse(
+        id=response_id,
+        created_at=created_time,
+        model=model_name,
+        output=[
+            ResponseOutputMessage(
+                id=message_id,
+                type="message",
+                role="assistant",
+                content=response_contents,
+            ),
+            *tool_call_items,
+            *image_call_items,
+        ],
+        status="completed",
+        usage=usage,
+        input=normalized_input or None,
+        metadata=request_data.metadata or None,
+        tools=request_data.tools,
+        tool_choice=request_data.tool_choice,
+    )
+
+
+def _create_chat_completion_standard_payload(
+    completion_id: str,
+    created_time: int,
+    model_name: str,
+    visible_output: str | None,
+    tool_calls_payload: list[dict] | None,
+    finish_reason: str,
+    usage: dict,
+) -> dict:
+    """Unified factory for building Chat Completion response dictionaries."""
+    return {
+        "id": completion_id,
+        "object": "chat.completion",
+        "created": created_time,
+        "model": model_name,
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": visible_output or None,
+                    "tool_calls": tool_calls_payload or None,
+                },
+                "finish_reason": finish_reason,
+            }
+        ],
+        "usage": usage,
+    }
+
+
+def _process_llm_output(
+    raw_output_with_think: str,
+    raw_output_clean: str,
+    structured_requirement: StructuredOutputRequirement | None,
+) -> tuple[str, str, list[Any]]:
+    """
+    Common post-processing logic for Gemini output.
+    Returns: (visible_text, storage_output, tool_calls)
+    """
+    visible_with_think, tool_calls = extract_tool_calls(raw_output_with_think)
+    if tool_calls:
+        logger.debug(f"Detected {len(tool_calls)} tool call(s) in model output.")
+
+    visible_output = visible_with_think.strip()
+
+    storage_output, _ = extract_tool_calls(raw_output_clean)
+    storage_output = storage_output.strip()
+
+    if structured_requirement:
+        cleaned_for_json = LMDBConversationStore.remove_think_tags(visible_output)
+        json_text = strip_code_fence(cleaned_for_json or "")
+        if json_text:
+            try:
+                structured_payload = orjson.loads(json_text)
+                canonical_output = orjson.dumps(structured_payload).decode("utf-8")
+                visible_output = canonical_output
+                storage_output = canonical_output
+                logger.debug(
+                    f"Structured response fulfilled (schema={structured_requirement.schema_name})."
+                )
+            except orjson.JSONDecodeError:
+                logger.warning(
+                    f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name})."
+                )
+
+    return visible_output, storage_output, tool_calls
+
+
+def _persist_conversation(
+    db: LMDBConversationStore,
+    model_name: str,
+    client_id: str,
+    metadata: list[str | None],
+    messages: list[Message],
+    storage_output: str | None,
+    tool_calls: list[Any] | None,
+) -> str | None:
+    """Unified logic to save conversation history to LMDB."""
+    try:
+        current_assistant_message = Message(
+            role="assistant",
+            content=storage_output or None,
+            tool_calls=tool_calls or None,
+        )
+        full_history = [*messages, current_assistant_message]
+        cleaned_history = db.sanitize_assistant_messages(full_history)
+
+        conv = ConversationInStore(
+            model=model_name,
+            client_id=client_id,
+            metadata=metadata,
+            messages=cleaned_history,
+        )
+        key = db.store(conv)
+        logger.debug(f"Conversation saved to LMDB with key: {key[:12]}")
+        return key
+    except Exception as e:
+        logger.warning(f"Failed to save {len(messages) + 1} messages to LMDB: {e}")
+        return None
+
+
 def _build_structured_requirement(
     response_format: dict[str, Any] | None,
 ) -> StructuredOutputRequirement | None:
@@ -80,17 +281,23 @@ def _build_structured_requirement(
         return None
 
     if response_format.get("type") != "json_schema":
-        logger.warning(f"Unsupported response_format type requested: {response_format}")
+        logger.warning(
+            f"Unsupported response_format type requested: {reprlib.repr(response_format)}"
+        )
         return None
 
     json_schema = response_format.get("json_schema")
     if not isinstance(json_schema, dict):
-        logger.warning(f"Invalid json_schema payload in response_format: {response_format}")
+        logger.warning(
+            f"Invalid json_schema payload in response_format: {reprlib.repr(response_format)}"
+        )
         return None
 
     schema = json_schema.get("schema")
     if not isinstance(schema, dict):
-        logger.warning(f"Missing `schema` object in response_format payload: {response_format}")
+        logger.warning(
+            f"Missing `schema` object in response_format payload: {reprlib.repr(response_format)}"
+        )
         return None
 
     schema_name = json_schema.get("name") or "response"
@@ -136,7 +343,9 @@ def _build_tool_prompt(
         description = function.description or "No description provided."
         lines.append(f"Tool `{function.name}`: {description}")
         if function.parameters:
-            schema_text = orjson.dumps(function.parameters).decode("utf-8")
+            schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode(
+                "utf-8"
+            )
             lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
@@ -155,7 +364,6 @@ def _build_tool_prompt(
         lines.append(
             f"You are required to call the tool named `{target}`. Do not call any other tool."
         )
-    # `auto` or None fall back to default instructions.
 
     lines.append(
         "When you decide to call a tool you MUST respond with nothing except a single fenced block exactly like the template below."
@@ -221,7 +429,7 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
 
         if isinstance(msg.content, str):
             if XML_HINT_STRIPPED not in msg.content:
-                msg.content = f"{msg.content}{XML_WRAP_HINT}"
+                msg.content = f"{msg.content}\n{XML_WRAP_HINT}"
             return
 
         if isinstance(msg.content, list):
@@ -231,15 +439,13 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
                 text_value = part.text or ""
                 if XML_HINT_STRIPPED in text_value:
                     return
-                part.text = f"{text_value}{XML_WRAP_HINT}"
+                part.text = f"{text_value}\n{XML_WRAP_HINT}"
                 return
 
             messages_text = XML_WRAP_HINT.strip()
             msg.content.append(ContentItem(type="text", text=messages_text))
             return
 
-    # No user message to annotate; nothing to do.
-
 
 def _conversation_has_code_hint(messages: list[Message]) -> bool:
     """Return True if any system message already includes the code block hint."""
@@ -290,7 +496,6 @@ def _prepare_messages_for_model(
             logger.debug("Injected default code block hint for Gemini conversation.")
 
     if not instructions:
-        # Still need to ensure XML hint for the last user message if tools are present
         if tools and tool_choice != "none":
             _append_xml_hint_to_last_user_message(prepared)
         return prepared
@@ -323,7 +528,6 @@ def _response_items_to_messages(
     normalized_input: list[ResponseInputItem] = []
     for item in items:
         role = item.role
-
         content = item.content
         normalized_contents: list[ResponseInputContent] = []
         if isinstance(content, str):
@@ -394,7 +598,6 @@ def _instructions_to_messages(
             continue
 
         role = item.role
-
         content = item.content
         if isinstance(content, str):
             instruction_messages.append(Message(role=role, content=content))
@@ -432,10 +635,7 @@ def _instructions_to_messages(
 
 
 def _get_model_by_name(name: str) -> Model:
-    """
-    Retrieve a Model instance by name, considering custom models from config
-    and the update strategy (append or overwrite).
-    """
+    """Retrieve a Model instance by name."""
     strategy = g_config.gemini.model_strategy
     custom_models = {m.model_name: m for m in g_config.gemini.models if m.model_name}
 
@@ -449,9 +649,7 @@ def _get_model_by_name(name: str) -> Model:
 
 
 def _get_available_models() -> list[ModelData]:
-    """
-    Return a list of available models based on configuration strategy.
-    """
+    """Return a list of available models based on configuration strategy."""
     now = int(datetime.now(tz=timezone.utc).timestamp())
     strategy = g_config.gemini.model_strategy
     models_data = []
@@ -486,910 +684,897 @@ def _get_available_models() -> list[ModelData]:
     return models_data
 
 
-@router.get("/v1/models", response_model=ModelListResponse)
-async def list_models(api_key: str = Depends(verify_api_key)):
-    models = _get_available_models()
-    return ModelListResponse(data=models)
-
-
-@router.post("/v1/chat/completions")
-async def create_chat_completion(
-    request: ChatCompletionRequest,
-    api_key: str = Depends(verify_api_key),
-    tmp_dir: Path = Depends(get_temp_dir),
-    image_store: Path = Depends(get_image_store_dir),
-):
-    pool = GeminiClientPool()
-    db = LMDBConversationStore()
-
-    try:
-        model = _get_model_by_name(request.model)
-    except ValueError as exc:
-        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
-
-    if len(request.messages) == 0:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="At least one message is required in the conversation.",
-        )
+async def _find_reusable_session(
+    db: LMDBConversationStore,
+    pool: GeminiClientPool,
+    model: Model,
+    messages: list[Message],
+) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[Message]]:
+    """Find an existing chat session matching the longest suitable history prefix."""
+    if len(messages) < 2:
+        return None, None, messages
 
-    structured_requirement = _build_structured_requirement(request.response_format)
-    if structured_requirement and request.stream:
-        logger.debug(
-            "Structured response requested with streaming enabled; will stream canonical JSON once ready."
-        )
-    if structured_requirement:
-        logger.debug(
-            f"Structured response requested for /v1/chat/completions (schema={structured_requirement.schema_name})."
-        )
+    search_end = len(messages)
+    while search_end >= 2:
+        search_history = messages[:search_end]
+        if search_history[-1].role in {"assistant", "system", "tool"}:
+            try:
+                if conv := db.find(model.model_name, search_history):
+                    now = datetime.now()
+                    updated_at = conv.updated_at or conv.created_at or now
+                    age_minutes = (now - updated_at).total_seconds() / 60
+                    if age_minutes <= METADATA_TTL_MINUTES:
+                        client = await pool.acquire(conv.client_id)
+                        session = client.start_chat(metadata=conv.metadata, model=model)
+                        remain = messages[search_end:]
+                        logger.debug(
+                            f"Match found at prefix length {search_end}/{len(messages)}. Client: {conv.client_id}"
+                        )
+                        return session, client, remain
+                    else:
+                        logger.debug(
+                            f"Matched conversation at length {search_end} is too old ({age_minutes:.1f}m), skipping reuse."
+                        )
+                else:
+                    # Log that we tried this prefix but failed
+                    pass
+            except Exception as e:
+                logger.warning(
+                    f"Error checking LMDB for reusable session at length {search_end}: {e}"
+                )
+                break
+        search_end -= 1
 
-    extra_instructions = [structured_requirement.instruction] if structured_requirement else None
+    logger.debug(f"No reusable session found for {len(messages)} messages.")
+    return None, None, messages
 
-    # Check if conversation is reusable
-    session, client, remaining_messages = await _find_reusable_session(
-        db, pool, model, request.messages
-    )
 
-    if session:
-        # Optimization: When reusing a session, we don't need to resend the heavy tool definitions
-        # or structured output instructions as they are already in the Gemini session history.
-        messages_to_send = _prepare_messages_for_model(
-            remaining_messages,
-            request.tools,
-            request.tool_choice,
-            extra_instructions,
-            inject_system_defaults=False,
-        )
-        if not messages_to_send:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="No new messages to send for the existing session.",
-            )
-        if len(messages_to_send) == 1:
-            model_input, files = await GeminiClientWrapper.process_message(
-                messages_to_send[0], tmp_dir, tagged=False
-            )
-        else:
-            model_input, files = await GeminiClientWrapper.process_conversation(
-                messages_to_send, tmp_dir
-            )
-        logger.debug(
-            f"Reused session {session.metadata} - sending {len(messages_to_send)} prepared messages."
-        )
-    else:
-        # Start a new session and concat messages into a single string
+async def _send_with_split(
+    session: ChatSession,
+    text: str,
+    files: list[Path | str | io.BytesIO] | None = None,
+    stream: bool = False,
+) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
+    """Send text to Gemini, splitting or converting to attachment if too long."""
+    if len(text) <= MAX_CHARS_PER_REQUEST:
         try:
-            client = await pool.acquire()
-            session = client.start_chat(model=model)
-            messages_to_send = _prepare_messages_for_model(
-                request.messages, request.tools, request.tool_choice, extra_instructions
-            )
-            model_input, files = await GeminiClientWrapper.process_conversation(
-                messages_to_send, tmp_dir
-            )
-        except ValueError as e:
-            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
-        except RuntimeError as e:
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
+            if stream:
+                return session.send_message_stream(text, files=files)
+            return await session.send_message(text, files=files)
         except Exception as e:
-            logger.exception(f"Error in preparing conversation: {e}")
+            logger.exception(f"Error sending message to Gemini: {e}")
             raise
-        logger.debug("New session started.")
 
-    # Generate response
+    logger.info(
+        f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment."
+    )
+    file_obj = io.BytesIO(text.encode("utf-8"))
+    file_obj.name = "message.txt"
     try:
-        assert session and client, "Session and client not available"
-        client_id = client.id
-        logger.debug(
-            f"Client ID: {client_id}, Input length: {len(model_input)}, files count: {len(files)}"
+        final_files = list(files) if files else []
+        final_files.append(file_obj)
+        instruction = (
+            "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
+            "**System Instruction:**\n"
+            "1. Read the content of `message.txt`.\n"
+            "2. Treat that content as the **primary** user prompt for this turn.\n"
+            "3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
         )
-        response = await _send_with_split(session, model_input, files=files)
-    except APIError as exc:
-        client_id = client.id if client else "unknown"
-        logger.warning(f"Gemini API returned invalid response for client {client_id}: {exc}")
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Gemini temporarily returned an invalid response. Please retry.",
-        ) from exc
-    except HTTPException:
-        raise
+        if stream:
+            return session.send_message_stream(instruction, files=final_files)
+        return await session.send_message(instruction, files=final_files)
     except Exception as e:
-        logger.exception(f"Unexpected error generating content from Gemini API: {e}")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Gemini returned an unexpected error.",
-        ) from e
+        logger.exception(f"Error sending large text as file to Gemini: {e}")
+        raise
 
-    # Format the response from API
-    try:
-        raw_output_with_think = GeminiClientWrapper.extract_output(response, include_thoughts=True)
-        raw_output_clean = GeminiClientWrapper.extract_output(response, include_thoughts=False)
-    except IndexError as exc:
-        logger.exception("Gemini output parsing failed (IndexError).")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Gemini returned malformed response content.",
-        ) from exc
-    except Exception as exc:
-        logger.exception("Gemini output parsing failed unexpectedly.")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Gemini output parsing failed unexpectedly.",
-        ) from exc
 
-    visible_output, tool_calls = extract_tool_calls(raw_output_with_think)
-    storage_output = remove_tool_call_blocks(raw_output_clean).strip()
-    tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
+class StreamingOutputFilter:
+    """
+    Enhanced streaming filter that suppresses:
+    1. XML tool call blocks: ```xml ... ```
+    2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|>
+    3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content)
+    4. Control tokens: <|im_start|>, <|im_end|>
+    5. System instructions/hints: XML_WRAP_HINT, CODE_BLOCK_HINT, etc.
+    """
 
-    if structured_requirement:
-        cleaned_visible = strip_code_fence(visible_output or "")
-        if not cleaned_visible:
-            raise HTTPException(
-                status_code=status.HTTP_502_BAD_GATEWAY,
-                detail="LLM returned an empty response while JSON schema output was requested.",
-            )
-        try:
-            structured_payload = orjson.loads(cleaned_visible)
-        except orjson.JSONDecodeError as exc:
-            logger.warning(
-                f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
-                f"{cleaned_visible}"
-            )
-            raise HTTPException(
-                status_code=status.HTTP_502_BAD_GATEWAY,
-                detail="LLM returned invalid JSON for the requested response_format.",
-            ) from exc
+    def __init__(self):
+        self.buffer = ""
+        self.in_xml_tool = False
+        self.in_tagged_block = False
+        self.in_role_header = False
+        self.current_role = ""
+
+        self.XML_START = "```xml"
+        self.XML_END = "```"
+        self.TAG_START = "<|im_start|>"
+        self.TAG_END = "<|im_end|>"
+        self.SYSTEM_HINTS = [
+            XML_WRAP_HINT,
+            XML_HINT_STRIPPED,
+            CODE_BLOCK_HINT,
+            CODE_HINT_STRIPPED,
+        ]
 
-        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
-        visible_output = canonical_output
-        storage_output = canonical_output
+    def process(self, chunk: str) -> str:
+        self.buffer += chunk
+        to_yield = ""
+
+        while self.buffer:
+            if self.in_xml_tool:
+                end_idx = self.buffer.find(self.XML_END)
+                if end_idx != -1:
+                    self.buffer = self.buffer[end_idx + len(self.XML_END) :]
+                    self.in_xml_tool = False
+                else:
+                    break
+            elif self.in_role_header:
+                nl_idx = self.buffer.find("\n")
+                if nl_idx != -1:
+                    role_text = self.buffer[:nl_idx].strip().lower()
+                    self.current_role = role_text
+                    self.buffer = self.buffer[nl_idx + 1 :]
+                    self.in_role_header = False
+                    self.in_tagged_block = True
+                else:
+                    break
+            elif self.in_tagged_block:
+                end_idx = self.buffer.find(self.TAG_END)
+                if end_idx != -1:
+                    content = self.buffer[:end_idx]
+                    if self.current_role != "tool":
+                        to_yield += content
+                    self.buffer = self.buffer[end_idx + len(self.TAG_END) :]
+                    self.in_tagged_block = False
+                    self.current_role = ""
+                else:
+                    if self.current_role == "tool":
+                        break
+                    else:
+                        yield_len = len(self.buffer) - (len(self.TAG_END) - 1)
+                        if yield_len > 0:
+                            to_yield += self.buffer[:yield_len]
+                            self.buffer = self.buffer[yield_len:]
+                        break
+            else:
+                # Outside any special block. Look for starts.
+                earliest_idx = -1
+                match_type = ""
+
+                xml_idx = self.buffer.find(self.XML_START)
+                if xml_idx != -1:
+                    earliest_idx = xml_idx
+                    match_type = "xml"
+
+                tag_s_idx = self.buffer.find(self.TAG_START)
+                if tag_s_idx != -1:
+                    if earliest_idx == -1 or tag_s_idx < earliest_idx:
+                        earliest_idx = tag_s_idx
+                        match_type = "tag_start"
+
+                tag_e_idx = self.buffer.find(self.TAG_END)
+                if tag_e_idx != -1:
+                    if earliest_idx == -1 or tag_e_idx < earliest_idx:
+                        earliest_idx = tag_e_idx
+                        match_type = "tag_end"
+
+                if earliest_idx != -1:
+                    # Yield text before the match
+                    to_yield += self.buffer[:earliest_idx]
+                    self.buffer = self.buffer[earliest_idx:]
+
+                    if match_type == "xml":
+                        self.in_xml_tool = True
+                        self.buffer = self.buffer[len(self.XML_START) :]
+                    elif match_type == "tag_start":
+                        self.in_role_header = True
+                        self.buffer = self.buffer[len(self.TAG_START) :]
+                    elif match_type == "tag_end":
+                        # Orphaned end tag, just skip it
+                        self.buffer = self.buffer[len(self.TAG_END) :]
+                    continue
+                else:
+                    # Check for prefixes
+                    prefixes = [self.XML_START, self.TAG_START, self.TAG_END]
+                    max_keep = 0
+                    for p in prefixes:
+                        for i in range(len(p) - 1, 0, -1):
+                            if self.buffer.endswith(p[:i]):
+                                max_keep = max(max_keep, i)
+                                break
 
-    if tool_calls_payload:
-        logger.debug(f"Detected tool calls: {tool_calls_payload}")
+                    yield_len = len(self.buffer) - max_keep
+                    if yield_len > 0:
+                        to_yield += self.buffer[:yield_len]
+                        self.buffer = self.buffer[yield_len:]
+                    break
 
-    # After formatting, persist the conversation to LMDB
-    try:
-        current_assistant_message = Message(
-            role="assistant",
-            content=storage_output or None,
-            tool_calls=tool_calls or None,
-        )
-        # Sanitize the entire history including the new message to ensure consistency
-        full_history = [*request.messages, current_assistant_message]
-        cleaned_history = db.sanitize_assistant_messages(full_history)
+        # Final pass: filter out system hints from the text to be yielded
+        for hint in self.SYSTEM_HINTS:
+            if hint in to_yield:
+                to_yield = to_yield.replace(hint, "")
 
-        conv = ConversationInStore(
-            model=model.model_name,
-            client_id=client.id,
-            metadata=session.metadata,
-            messages=cleaned_history,
-        )
-        key = db.store(conv)
-        logger.debug(f"Conversation saved to LMDB with key: {key}")
-    except Exception as e:
-        # We can still return the response even if saving fails
-        logger.warning(f"Failed to save conversation to LMDB: {e}")
+        return to_yield
 
-    # Return with streaming or standard response
-    completion_id = f"chatcmpl-{uuid.uuid4()}"
-    timestamp = int(datetime.now(tz=timezone.utc).timestamp())
-    if request.stream:
-        return _create_streaming_response(
-            visible_output,
-            tool_calls_payload,
-            completion_id,
-            timestamp,
-            request.model,
-            request.messages,
-        )
-    else:
-        return _create_standard_response(
-            visible_output,
-            tool_calls_payload,
-            completion_id,
-            timestamp,
-            request.model,
-            request.messages,
-        )
-
-
-@router.post("/v1/responses")
-async def create_response(
-    request_data: ResponseCreateRequest,
-    request: Request,
-    api_key: str = Depends(verify_api_key),
-    tmp_dir: Path = Depends(get_temp_dir),
-    image_store: Path = Depends(get_image_store_dir),
-):
-    base_messages, normalized_input = _response_items_to_messages(request_data.input)
-    structured_requirement = _build_structured_requirement(request_data.response_format)
-    if structured_requirement and request_data.stream:
-        logger.debug(
-            "Structured response requested with streaming enabled; streaming not supported for Responses."
-        )
-
-    extra_instructions: list[str] = []
-    if structured_requirement:
-        extra_instructions.append(structured_requirement.instruction)
-        logger.debug(
-            f"Structured response requested for /v1/responses (schema={structured_requirement.schema_name})."
-        )
-
-    # Separate standard tools from image generation tools
-    standard_tools: list[Tool] = []
-    image_tools: list[ResponseImageTool] = []
-
-    if request_data.tools:
-        for t in request_data.tools:
-            if isinstance(t, Tool):
-                standard_tools.append(t)
-            elif isinstance(t, ResponseImageTool):
-                image_tools.append(t)
-            # Handle dicts if Pydantic didn't convert them fully (fallback)
-            elif isinstance(t, dict):
-                t_type = t.get("type")
-                if t_type == "function":
-                    standard_tools.append(Tool.model_validate(t))
-                elif t_type == "image_generation":
-                    image_tools.append(ResponseImageTool.model_validate(t))
-
-    image_instruction = _build_image_generation_instruction(
-        image_tools,
-        request_data.tool_choice
-        if isinstance(request_data.tool_choice, ResponseToolChoice)
-        else None,
-    )
-    if image_instruction:
-        extra_instructions.append(image_instruction)
-        logger.debug("Image generation support enabled for /v1/responses request.")
-
-    preface_messages = _instructions_to_messages(request_data.instructions)
-    conversation_messages = base_messages
-    if preface_messages:
-        conversation_messages = [*preface_messages, *base_messages]
-        logger.debug(
-            f"Injected {len(preface_messages)} instruction messages before sending to Gemini."
-        )
-
-    # Pass standard tools to the prompt builder
-    # Determine tool_choice for standard tools (ignore image_generation choice here as it is handled via instruction)
-    model_tool_choice = None
-    if isinstance(request_data.tool_choice, str):
-        model_tool_choice = request_data.tool_choice
-    elif isinstance(request_data.tool_choice, ToolChoiceFunction):
-        model_tool_choice = request_data.tool_choice
-    # If tool_choice is ResponseToolChoice (image_generation), we don't pass it as a function tool choice.
-
-    messages = _prepare_messages_for_model(
-        conversation_messages,
-        tools=standard_tools or None,
-        tool_choice=model_tool_choice,
-        extra_instructions=extra_instructions or None,
-    )
-
-    pool = GeminiClientPool()
-    db = LMDBConversationStore()
-
-    try:
-        model = _get_model_by_name(request_data.model)
-    except ValueError as exc:
-        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
-
-    session, client, remaining_messages = await _find_reusable_session(db, pool, model, messages)
-
-    async def _build_payload(
-        _payload_messages: list[Message], _reuse_session: bool
-    ) -> tuple[str, list[Path | str]]:
-        if _reuse_session and len(_payload_messages) == 1:
-            return await GeminiClientWrapper.process_message(
-                _payload_messages[0], tmp_dir, tagged=False
-            )
-        return await GeminiClientWrapper.process_conversation(_payload_messages, tmp_dir)
-
-    reuse_session = session is not None
-    if reuse_session:
-        messages_to_send = _prepare_messages_for_model(
-            remaining_messages,
-            tools=request_data.tools,  # Keep for XML hint logic
-            tool_choice=request_data.tool_choice,
-            extra_instructions=None,  # Already in session history
-            inject_system_defaults=False,
-        )
-        if not messages_to_send:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="No new messages to send for the existing session.",
-            )
-        payload_messages = messages_to_send
-        model_input, files = await _build_payload(payload_messages, _reuse_session=True)
-        logger.debug(
-            f"Reused session {session.metadata} - sending {len(payload_messages)} prepared messages."
-        )
-    else:
-        try:
-            client = await pool.acquire()
-            session = client.start_chat(model=model)
-            payload_messages = messages
-            model_input, files = await _build_payload(payload_messages, _reuse_session=False)
-        except ValueError as e:
-            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
-        except RuntimeError as e:
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
-        except Exception as e:
-            logger.exception(f"Error in preparing conversation for responses API: {e}")
-            raise
-        logger.debug("New session started for /v1/responses request.")
+    def flush(self) -> str:
+        # If we are stuck in a tool block or role header at the end,
+        # it usually means malformed output.
+        if self.in_xml_tool or (self.in_tagged_block and self.current_role == "tool"):
+            return ""
 
-    try:
-        assert session and client, "Session and client not available"
-        client_id = client.id
-        logger.debug(
-            f"Client ID: {client_id}, Input length: {len(model_input)}, files count: {len(files)}"
-        )
-        model_output = await _send_with_split(session, model_input, files=files)
-    except APIError as exc:
-        client_id = client.id if client else "unknown"
-        logger.warning(f"Gemini API returned invalid response for client {client_id}: {exc}")
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Gemini temporarily returned an invalid response. Please retry.",
-        ) from exc
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception(f"Unexpected error generating content from Gemini API for responses: {e}")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Gemini returned an unexpected error.",
-        ) from e
+        final_text = self.buffer
+        self.buffer = ""
 
-    try:
-        text_with_think = GeminiClientWrapper.extract_output(model_output, include_thoughts=True)
-        text_without_think = GeminiClientWrapper.extract_output(
-            model_output, include_thoughts=False
-        )
-    except IndexError as exc:
-        logger.exception("Gemini output parsing failed (IndexError).")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Gemini returned malformed response content.",
-        ) from exc
-    except Exception as exc:
-        logger.exception("Gemini output parsing failed unexpectedly.")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Gemini output parsing failed unexpectedly.",
-        ) from exc
+        # Filter out any orphaned/partial control tokens or hints
+        final_text = CONTROL_TOKEN_RE.sub("", final_text)
+        for hint in self.SYSTEM_HINTS:
+            final_text = final_text.replace(hint, "")
 
-    visible_text, detected_tool_calls = extract_tool_calls(text_with_think)
-    storage_output = remove_tool_call_blocks(text_without_think).strip()
-    assistant_text = LMDBConversationStore.remove_think_tags(visible_text.strip())
+        return final_text.strip()
 
-    if structured_requirement:
-        cleaned_visible = strip_code_fence(assistant_text or "")
-        if not cleaned_visible:
-            raise HTTPException(
-                status_code=status.HTTP_502_BAD_GATEWAY,
-                detail="LLM returned an empty response while JSON schema output was requested.",
-            )
-        try:
-            structured_payload = orjson.loads(cleaned_visible)
-        except orjson.JSONDecodeError as exc:
-            logger.warning(
-                f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name}): "
-                f"{cleaned_visible}"
-            )
-            raise HTTPException(
-                status_code=status.HTTP_502_BAD_GATEWAY,
-                detail="LLM returned invalid JSON for the requested response_format.",
-            ) from exc
-
-        canonical_output = orjson.dumps(structured_payload).decode("utf-8")
-        assistant_text = canonical_output
-        storage_output = canonical_output
-        logger.debug(
-            f"Structured response fulfilled for /v1/responses (schema={structured_requirement.schema_name})."
-        )
 
-    expects_image = (
-        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
-    )
-    images = model_output.images or []
-    logger.debug(
-        f"Gemini returned {len(images)} image(s) for /v1/responses "
-        f"(expects_image={expects_image}, instruction_applied={bool(image_instruction)})."
-    )
-    if expects_image and not images:
-        summary = assistant_text.strip() if assistant_text else ""
-        if summary:
-            summary = re.sub(r"\s+", " ", summary)
-            if len(summary) > 200:
-                summary = f"{summary[:197]}..."
-        logger.warning(
-            "Image generation requested but Gemini produced no images. "
-            f"client_id={client_id}, forced_tool_choice={request_data.tool_choice is not None}, "
-            f"instruction_applied={bool(image_instruction)}, assistant_preview='{summary}'"
-        )
-        detail = "LLM returned no images for the requested image_generation tool."
-        if summary:
-            detail = f"{detail} Assistant response: {summary}"
-        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=detail)
+# --- Response Builders & Streaming ---
 
-    response_contents: list[ResponseOutputContent] = []
-    image_call_items: list[ResponseImageGenerationCall] = []
-    for image in images:
-        try:
-            image_base64, width, height, filename = await _image_to_base64(image, image_store)
-        except Exception as exc:
-            logger.warning(f"Failed to download generated image: {exc}")
-            continue
-
-        img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
-
-        # Use static URL for compatibility
-        image_url = (
-            f"![{filename}]({request.base_url}images/{filename}?token={get_image_token(filename)})"
-        )
-
-        image_call_items.append(
-            ResponseImageGenerationCall(
-                id=filename.rsplit(".", 1)[0],
-                status="completed",
-                result=image_base64,
-                output_format=img_format,
-                size=f"{width}x{height}" if width and height else None,
-            )
-        )
-        # Add as output_text content for compatibility
-        response_contents.append(
-            ResponseOutputContent(type="output_text", text=image_url, annotations=[])
-        )
-
-    tool_call_items: list[ResponseToolCall] = []
-    if detected_tool_calls:
-        tool_call_items = [
-            ResponseToolCall(
-                id=call.id,
-                status="completed",
-                function=call.function,
-            )
-            for call in detected_tool_calls
-        ]
-
-    if assistant_text:
-        response_contents.append(
-            ResponseOutputContent(type="output_text", text=assistant_text, annotations=[])
-        )
 
-    if not response_contents:
-        response_contents.append(ResponseOutputContent(type="output_text", text="", annotations=[]))
-
-    created_time = int(datetime.now(tz=timezone.utc).timestamp())
-    response_id = f"resp_{uuid.uuid4().hex}"
-    message_id = f"msg_{uuid.uuid4().hex}"
-
-    input_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_arg_text = "".join(call.function.arguments or "" for call in detected_tool_calls)
-    completion_basis = assistant_text or ""
-    if tool_arg_text:
-        completion_basis = (
-            f"{completion_basis}\n{tool_arg_text}" if completion_basis else tool_arg_text
-        )
-    output_tokens = estimate_tokens(completion_basis)
-    usage = ResponseUsage(
-        input_tokens=input_tokens,
-        output_tokens=output_tokens,
-        total_tokens=input_tokens + output_tokens,
-    )
-
-    response_payload = ResponseCreateResponse(
-        id=response_id,
-        created_at=created_time,
-        model=request_data.model,
-        output=[
-            ResponseOutputMessage(
-                id=message_id,
-                type="message",
-                role="assistant",
-                content=response_contents,
-            ),
-            *tool_call_items,
-            *image_call_items,
-        ],
-        status="completed",
-        usage=usage,
-        input=normalized_input or None,
-        metadata=request_data.metadata or None,
-        tools=request_data.tools,
-        tool_choice=request_data.tool_choice,
-    )
-
-    try:
-        current_assistant_message = Message(
-            role="assistant",
-            content=storage_output or None,
-            tool_calls=detected_tool_calls or None,
-        )
-        full_history = [*messages, current_assistant_message]
-        cleaned_history = db.sanitize_assistant_messages(full_history)
-
-        conv = ConversationInStore(
-            model=model.model_name,
-            client_id=client.id,
-            metadata=session.metadata,
-            messages=cleaned_history,
-        )
-        key = db.store(conv)
-        logger.debug(f"Conversation saved to LMDB with key: {key}")
-    except Exception as exc:
-        logger.warning(f"Failed to save Responses conversation to LMDB: {exc}")
-
-    if request_data.stream:
-        logger.debug(
-            f"Streaming Responses API payload (response_id={response_payload.id}, text_chunks={bool(assistant_text)})."
-        )
-        return _create_responses_streaming_response(response_payload, assistant_text or "")
-
-    return response_payload
-
-
-async def _find_reusable_session(
+def _create_real_streaming_response(
+    generator: AsyncGenerator[ModelOutput, None],
+    completion_id: str,
+    created_time: int,
+    model_name: str,
+    messages: list[Message],
     db: LMDBConversationStore,
-    pool: GeminiClientPool,
     model: Model,
-    messages: list[Message],
-) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[Message]]:
-    """Find an existing chat session that matches the *longest* prefix of
-    ``messages`` **whose last element is an assistant/system reply**.
-
-    Rationale
-    ---------
-    When a reply was generated by *another* server instance, the local LMDB may
-    only contain an older part of the conversation.  However, as long as we can
-    line up **any** earlier assistant/system response, we can restore the
-    corresponding Gemini session and replay the *remaining* turns locally
-    (including that missing assistant reply and the subsequent user prompts).
-
-    The algorithm therefore walks backwards through the history **one message at
-    a time**, each time requiring the current tail to be assistant/system before
-    querying LMDB.  As soon as a match is found we recreate the session and
-    return the untouched suffix as ``remaining_messages``.
-    """
-
-    if len(messages) < 2:
-        return None, None, messages
-
-    # Start with the full history and iteratively trim from the end.
-    search_end = len(messages)
-
-    while search_end >= 2:
-        search_history = messages[:search_end]
-
-        # Only try to match if the last stored message would be assistant/system/tool before querying LMDB.
-        if search_history[-1].role in {"assistant", "system", "tool"}:
-            try:
-                if conv := db.find(model.model_name, search_history):
-                    # Check if metadata is too old
-                    now = datetime.now()
-                    updated_at = conv.updated_at or conv.created_at or now
-                    age_minutes = (now - updated_at).total_seconds() / 60
-
-                    if age_minutes <= METADATA_TTL_MINUTES:
-                        client = await pool.acquire(conv.client_id)
-                        session = client.start_chat(metadata=conv.metadata, model=model)
-                        remain = messages[search_end:]
-                        logger.debug(
-                            f"Match found at prefix length {search_end}. Client: {conv.client_id}"
-                        )
-                        return session, client, remain
-                    else:
-                        logger.debug(
-                            f"Matched conversation is too old ({age_minutes:.1f}m), skipping reuse."
-                        )
-            except Exception as e:
-                logger.warning(
-                    f"Error checking LMDB for reusable session at length {search_end}: {e}"
-                )
-                break
-
-        # Trim one message and try again.
-        search_end -= 1
-
-    return None, None, messages
-
-
-async def _send_with_split(session: ChatSession, text: str, files: list[Path | str] | None = None):
+    client_wrapper: GeminiClientWrapper,
+    session: ChatSession,
+    structured_requirement: StructuredOutputRequirement | None = None,
+) -> StreamingResponse:
     """
-    Send text to Gemini. If text is longer than ``MAX_CHARS_PER_REQUEST``,
-    it is converted into a temporary text file attachment to avoid splitting issues.
+    Create a real-time streaming response.
+    Reconciles manual delta accumulation with the model's final authoritative state.
     """
-    if len(text) <= MAX_CHARS_PER_REQUEST:
-        try:
-            return await session.send_message(text, files=files)
-        except Exception as e:
-            logger.exception(f"Error sending message to Gemini: {e}")
-            raise
-
-    logger.info(
-        f"Message length ({len(text)}) exceeds limit ({MAX_CHARS_PER_REQUEST}). Converting text to file attachment."
-    )
-
-    # Create a temporary directory to hold the message.txt file
-    # This ensures the filename is exactly 'message.txt' as expected by the instruction.
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        temp_file_path = Path(tmpdirname) / "message.txt"
-        temp_file_path.write_text(text, encoding="utf-8")
 
+    async def generate_stream():
+        full_thoughts, full_text = "", ""
+        has_started = False
+        last_chunk_was_thought = False
+        all_outputs: list[ModelOutput] = []
+        suppressor = StreamingOutputFilter()
         try:
-            # Prepare the files list
-            final_files = list(files) if files else []
-            final_files.append(temp_file_path)
-
-            instruction = (
-                "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
-                "**System Instruction:**\n"
-                "1. Read the content of `message.txt`.\n"
-                "2. Treat that content as the **primary** user prompt for this turn.\n"
-                "3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
-            )
-
-            logger.debug(f"Sending prompt as temporary file: {temp_file_path}")
-
-            return await session.send_message(instruction, files=final_files)
-
+            async for chunk in generator:
+                all_outputs.append(chunk)
+                if not has_started:
+                    data = {
+                        "id": completion_id,
+                        "object": "chat.completion.chunk",
+                        "created": created_time,
+                        "model": model_name,
+                        "choices": [
+                            {"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}
+                        ],
+                    }
+                    yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+                    has_started = True
+
+                if t_delta := chunk.thoughts_delta:
+                    if not last_chunk_was_thought and not full_thoughts:
+                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
+                    full_thoughts += t_delta
+                    data = {
+                        "id": completion_id,
+                        "object": "chat.completion.chunk",
+                        "created": created_time,
+                        "model": model_name,
+                        "choices": [
+                            {"index": 0, "delta": {"content": t_delta}, "finish_reason": None}
+                        ],
+                    }
+                    yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+                    last_chunk_was_thought = True
+
+                if text_delta := chunk.text_delta:
+                    if last_chunk_was_thought:
+                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '</think>\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
+                        last_chunk_was_thought = False
+                    full_text += text_delta
+                    if visible_delta := suppressor.process(text_delta):
+                        data = {
+                            "id": completion_id,
+                            "object": "chat.completion.chunk",
+                            "created": created_time,
+                            "model": model_name,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"content": visible_delta},
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
         except Exception as e:
-            logger.exception(f"Error sending large text as file to Gemini: {e}")
-            raise
+            logger.exception(f"Error during OpenAI streaming: {e}")
+            yield f"data: {orjson.dumps({'error': {'message': 'Streaming error occurred.', 'type': 'server_error', 'param': None, 'code': None}}).decode('utf-8')}\n\n"
+            return
 
+        if all_outputs:
+            final_chunk = all_outputs[-1]
+            if final_chunk.text:
+                full_text = final_chunk.text
+            if final_chunk.thoughts:
+                full_thoughts = final_chunk.thoughts
 
-def _create_streaming_response(
-    model_output: str,
-    tool_calls: list[dict],
-    completion_id: str,
-    created_time: int,
-    model: str,
-    messages: list[Message],
-) -> StreamingResponse:
-    """Create streaming response with `usage` calculation included in the final chunk."""
+        if last_chunk_was_thought:
+            yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '</think>\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
 
-    # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
-    completion_tokens = estimate_tokens(model_output + tool_args)
-    total_tokens = prompt_tokens + completion_tokens
-    finish_reason = "tool_calls" if tool_calls else "stop"
+        if remaining_text := suppressor.flush():
+            data = {
+                "id": completion_id,
+                "object": "chat.completion.chunk",
+                "created": created_time,
+                "model": model_name,
+                "choices": [
+                    {"index": 0, "delta": {"content": remaining_text}, "finish_reason": None}
+                ],
+            }
+            yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-    async def generate_stream():
-        # Send start event
-        data = {
-            "id": completion_id,
-            "object": "chat.completion.chunk",
-            "created": created_time,
-            "model": model,
-            "choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}],
-        }
-        yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+        raw_output_with_think = f"<think>{full_thoughts}</think>\n" if full_thoughts else ""
+        raw_output_with_think += full_text
+        assistant_text, storage_output, tool_calls = _process_llm_output(
+            raw_output_with_think, full_text, structured_requirement
+        )
 
-        # Stream output text in chunks for efficiency
-        for chunk in iter_stream_segments(model_output):
+        images = []
+        for out in all_outputs:
+            if out.images:
+                images.extend(out.images)
+
+        image_markdown = ""
+        for image in images:
+            try:
+                image_store = get_image_store_dir()
+                _, _, _, filename = await _image_to_base64(image, image_store)
+                img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+                image_markdown += f"\n\n{img_url}"
+            except Exception as exc:
+                logger.warning(f"Failed to process image in OpenAI stream: {exc}")
+
+        if image_markdown:
+            assistant_text += image_markdown
+            storage_output += image_markdown
+            # Send the image markdown as a final text chunk before usage
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
                 "created": created_time,
-                "model": model,
-                "choices": [{"index": 0, "delta": {"content": chunk}, "finish_reason": None}],
+                "model": model_name,
+                "choices": [
+                    {"index": 0, "delta": {"content": image_markdown}, "finish_reason": None}
+                ],
             }
             yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        if tool_calls:
-            tool_calls_delta = [{**call, "index": idx} for idx, call in enumerate(tool_calls)]
+        tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
+        if tool_calls_payload:
+            tool_calls_delta = [
+                {**call, "index": idx} for idx, call in enumerate(tool_calls_payload)
+            ]
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
                 "created": created_time,
-                "model": model,
+                "model": model_name,
                 "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"tool_calls": tool_calls_delta},
-                        "finish_reason": None,
-                    }
+                    {"index": 0, "delta": {"tool_calls": tool_calls_delta}, "finish_reason": None}
                 ],
             }
             yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        # Send end event
+        p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, tool_calls)
+        usage = {"prompt_tokens": p_tok, "completion_tokens": c_tok, "total_tokens": t_tok}
         data = {
             "id": completion_id,
             "object": "chat.completion.chunk",
             "created": created_time,
-            "model": model,
-            "choices": [{"index": 0, "delta": {}, "finish_reason": finish_reason}],
-            "usage": {
-                "prompt_tokens": prompt_tokens,
-                "completion_tokens": completion_tokens,
-                "total_tokens": total_tokens,
-            },
+            "model": model_name,
+            "choices": [
+                {"index": 0, "delta": {}, "finish_reason": "tool_calls" if tool_calls else "stop"}
+            ],
+            "usage": usage,
         }
+        _persist_conversation(
+            db,
+            model.model_name,
+            client_wrapper.id,
+            session.metadata,
+            messages,  # This should be the prepared messages
+            storage_output,
+            tool_calls,
+        )
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
         yield "data: [DONE]\n\n"
 
     return StreamingResponse(generate_stream(), media_type="text/event-stream")
 
 
-def _create_responses_streaming_response(
-    response_payload: ResponseCreateResponse,
-    assistant_text: str | None,
+def _create_responses_real_streaming_response(
+    generator: AsyncGenerator[ModelOutput, None],
+    response_id: str,
+    created_time: int,
+    model_name: str,
+    messages: list[Message],
+    db: LMDBConversationStore,
+    model: Model,
+    client_wrapper: GeminiClientWrapper,
+    session: ChatSession,
+    request_data: ResponseCreateRequest,
+    image_store: Path,
+    base_url: str,
+    structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
-    """Create streaming response for Responses API using event types defined by OpenAI."""
-
-    response_dict = response_payload.model_dump(mode="json")
-    response_id = response_payload.id
-    created_time = response_payload.created_at
-    model = response_payload.model
-
-    logger.debug(
-        f"Preparing streaming envelope for /v1/responses (response_id={response_id}, model={model})."
-    )
-
+    """
+    Create a real-time streaming response for the Responses API.
+    Ensures final accumulated text and thoughts are synchronized.
+    """
     base_event = {
         "id": response_id,
         "object": "response",
         "created_at": created_time,
-        "model": model,
+        "model": model_name,
     }
 
-    created_snapshot: dict[str, Any] = {
-        "id": response_id,
-        "object": "response",
-        "created_at": created_time,
-        "model": model,
-        "status": "in_progress",
-    }
-    if response_dict.get("metadata") is not None:
-        created_snapshot["metadata"] = response_dict["metadata"]
-    if response_dict.get("input") is not None:
-        created_snapshot["input"] = response_dict["input"]
-    if response_dict.get("tools") is not None:
-        created_snapshot["tools"] = response_dict["tools"]
-    if response_dict.get("tool_choice") is not None:
-        created_snapshot["tool_choice"] = response_dict["tool_choice"]
-
     async def generate_stream():
-        # Emit creation event
-        data = {
-            **base_event,
-            "type": "response.created",
-            "response": created_snapshot,
-        }
-        yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request_data.metadata, 'input': None, 'tools': request_data.tools, 'tool_choice': request_data.tool_choice}}).decode('utf-8')}\n\n"
+        message_id = f"msg_{uuid.uuid4().hex}"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
 
-        # Stream output items (Message/Text, Tool Calls, Images)
-        for i, item in enumerate(response_payload.output):
-            item_json = item.model_dump(mode="json", exclude_none=True)
+        full_thoughts, full_text = "", ""
+        last_chunk_was_thought = False
+        all_outputs: list[ModelOutput] = []
+        suppressor = StreamingOutputFilter()
 
-            added_event = {
-                **base_event,
-                "type": "response.output_item.added",
-                "output_index": i,
-                "item": item_json,
-            }
-            yield f"data: {orjson.dumps(added_event).decode('utf-8')}\n\n"
-
-            # 2. Stream content if it's a message (text)
-            if item.type == "message":
-                content_text = ""
-                # Aggregate text content to stream
-                for c in item.content:
-                    if c.type == "output_text" and c.text:
-                        content_text += c.text
-
-                if content_text:
-                    for chunk in iter_stream_segments(content_text):
-                        delta_event = {
-                            **base_event,
-                            "type": "response.output_text.delta",
-                            "output_index": i,
-                            "delta": chunk,
-                        }
-                        yield f"data: {orjson.dumps(delta_event).decode('utf-8')}\n\n"
+        try:
+            async for chunk in generator:
+                all_outputs.append(chunk)
+                if t_delta := chunk.thoughts_delta:
+                    if not last_chunk_was_thought and not full_thoughts:
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '<think>'}).decode('utf-8')}\n\n"
+                    full_thoughts += t_delta
+                    yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': t_delta}).decode('utf-8')}\n\n"
+                    last_chunk_was_thought = True
+                if text_delta := chunk.text_delta:
+                    if last_chunk_was_thought:
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '</think>\n'}).decode('utf-8')}\n\n"
+                        last_chunk_was_thought = False
+                    full_text += text_delta
+                    if visible_delta := suppressor.process(text_delta):
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': visible_delta}).decode('utf-8')}\n\n"
+        except Exception as e:
+            logger.exception(f"Error during Responses API streaming: {e}")
+            yield f"data: {orjson.dumps({**base_event, 'type': 'error', 'error': {'message': 'Streaming error.'}}).decode('utf-8')}\n\n"
+            return
 
-                    # Text done
-                    done_event = {
-                        **base_event,
-                        "type": "response.output_text.done",
-                        "output_index": i,
-                    }
-                    yield f"data: {orjson.dumps(done_event).decode('utf-8')}\n\n"
-
-            # 3. Emit output_item.done for all types
-            # This confirms the item is fully transferred.
-            item_done_event = {
-                **base_event,
-                "type": "response.output_item.done",
-                "output_index": i,
-                "item": item_json,
-            }
-            yield f"data: {orjson.dumps(item_done_event).decode('utf-8')}\n\n"
+        if all_outputs:
+            final_chunk = all_outputs[-1]
+            if final_chunk.text:
+                full_text = final_chunk.text
+            if final_chunk.thoughts:
+                full_thoughts = final_chunk.thoughts
+
+        if last_chunk_was_thought:
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '</think>\n'}).decode('utf-8')}\n\n"
+        if remaining_text := suppressor.flush():
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': remaining_text}).decode('utf-8')}\n\n"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': 0}).decode('utf-8')}\n\n"
+
+        raw_output_with_think = f"<think>{full_thoughts}</think>\n" if full_thoughts else ""
+        raw_output_with_think += full_text
+        assistant_text, storage_output, detected_tool_calls = _process_llm_output(
+            raw_output_with_think, full_text, structured_requirement
+        )
 
-        # Emit completed event with full payload
-        completed_event = {
-            **base_event,
-            "type": "response.completed",
-            "response": response_dict,
-        }
-        yield f"data: {orjson.dumps(completed_event).decode('utf-8')}\n\n"
+        images = []
+        for out in all_outputs:
+            if out.images:
+                images.extend(out.images)
+
+        response_contents, image_call_items = [], []
+        for image in images:
+            try:
+                image_base64, width, height, filename = await _image_to_base64(image, image_store)
+                img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+                image_url = (
+                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
+                )
+                image_call_items.append(
+                    ResponseImageGenerationCall(
+                        id=filename.rsplit(".", 1)[0],
+                        result=image_base64,
+                        output_format=img_format,
+                        size=f"{width}x{height}" if width and height else None,
+                    )
+                )
+                response_contents.append(ResponseOutputContent(type="output_text", text=image_url))
+            except Exception as exc:
+                logger.warning(f"Failed to process image in stream: {exc}")
+
+        if assistant_text:
+            response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+        if not response_contents:
+            response_contents.append(ResponseOutputContent(type="output_text", text=""))
+
+        # Aggregate images for storage
+        image_markdown = ""
+        for img_call in image_call_items:
+            fname = f"{img_call.id}.{img_call.output_format}"
+            img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})"
+            image_markdown += f"\n\n{img_url}"
+
+        if image_markdown:
+            storage_output += image_markdown
+
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': [c.model_dump(mode='json') for c in response_contents]}}).decode('utf-8')}\n\n"
+
+        current_idx = 1
+        for call in detected_tool_calls:
+            tc_item = ResponseToolCall(id=call.id, status="completed", function=call.function)
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
+            current_idx += 1
+        for img_call in image_call_items:
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': img_call.model_dump(mode='json')}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': img_call.model_dump(mode='json')}).decode('utf-8')}\n\n"
+            current_idx += 1
+
+        p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, detected_tool_calls)
+        usage = ResponseUsage(input_tokens=p_tok, output_tokens=c_tok, total_tokens=t_tok)
+        payload = _create_responses_standard_payload(
+            response_id,
+            created_time,
+            model_name,
+            assistant_text,
+            detected_tool_calls,
+            image_call_items,
+            response_contents,
+            usage,
+            request_data,
+            None,
+        )
+        _persist_conversation(
+            db,
+            model.model_name,
+            client_wrapper.id,
+            session.metadata,
+            messages,
+            storage_output,
+            detected_tool_calls,
+        )
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.completed', 'response': payload.model_dump(mode='json')}).decode('utf-8')}\n\n"
         yield "data: [DONE]\n\n"
 
     return StreamingResponse(generate_stream(), media_type="text/event-stream")
 
 
-def _create_standard_response(
-    model_output: str,
-    tool_calls: list[dict],
-    completion_id: str,
-    created_time: int,
-    model: str,
-    messages: list[Message],
-) -> dict:
-    """Create standard response"""
-    # Calculate token usage
-    prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_args = "".join(call.get("function", {}).get("arguments", "") for call in tool_calls or [])
-    completion_tokens = estimate_tokens(model_output + tool_args)
-    total_tokens = prompt_tokens + completion_tokens
-    finish_reason = "tool_calls" if tool_calls else "stop"
+# --- Main Router Endpoints ---
 
-    message_payload: dict = {"role": "assistant", "content": model_output or None}
-    if tool_calls:
-        message_payload["tool_calls"] = tool_calls
 
-    result = {
-        "id": completion_id,
-        "object": "chat.completion",
-        "created": created_time,
-        "model": model,
-        "choices": [
-            {
-                "index": 0,
-                "message": message_payload,
-                "finish_reason": finish_reason,
-            }
-        ],
-        "usage": {
-            "prompt_tokens": prompt_tokens,
-            "completion_tokens": completion_tokens,
-            "total_tokens": total_tokens,
-        },
-    }
+@router.get("/v1/models", response_model=ModelListResponse)
+async def list_models(api_key: str = Depends(verify_api_key)):
+    models = _get_available_models()
+    return ModelListResponse(data=models)
+
+
+@router.post("/v1/chat/completions")
+async def create_chat_completion(
+    request: ChatCompletionRequest,
+    api_key: str = Depends(verify_api_key),
+    tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
+):
+    pool, db = GeminiClientPool(), LMDBConversationStore()
+    try:
+        model = _get_model_by_name(request.model)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
+    if not request.messages:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Messages required.")
 
-    logger.debug(f"Response created with {total_tokens} total tokens")
-    return result
+    structured_requirement = _build_structured_requirement(request.response_format)
+    extra_instr = [structured_requirement.instruction] if structured_requirement else None
 
+    # This ensures that server-injected system instructions are part of the history
+    msgs = _prepare_messages_for_model(
+        request.messages, request.tools, request.tool_choice, extra_instr
+    )
 
-async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
-    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
-    if isinstance(image, GeneratedImage):
+    session, client, remain = await _find_reusable_session(db, pool, model, msgs)
+
+    if session:
+        if not remain:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
+
+        # For reused sessions, we only need to process the remaining messages.
+        # We don't re-inject system defaults to avoid duplicating instructions already in history.
+        input_msgs = _prepare_messages_for_model(
+            remain, request.tools, request.tool_choice, extra_instr, False
+        )
+        if len(input_msgs) == 1:
+            m_input, files = await GeminiClientWrapper.process_message(
+                input_msgs[0], tmp_dir, tagged=False
+            )
+        else:
+            m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir)
+
+        logger.debug(
+            f"Reused session {reprlib.repr(session.metadata)} - sending {len(input_msgs)} prepared messages."
+        )
+    else:
         try:
-            saved_path = await image.save(path=str(temp_dir), full_size=True)
+            client = await pool.acquire()
+            session = client.start_chat(model=model)
+            # Use the already prepared 'msgs' for a fresh session
+            m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
         except Exception as e:
-            logger.warning(
-                f"Failed to download full-size GeneratedImage, retrying with default size: {e}"
-            )
-            saved_path = await image.save(path=str(temp_dir), full_size=False)
+            logger.exception("Error in preparing conversation")
+            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
+
+    completion_id = f"chatcmpl-{uuid.uuid4()}"
+    created_time = int(datetime.now(tz=timezone.utc).timestamp())
+
+    try:
+        assert session and client
+        logger.debug(
+            f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}"
+        )
+        resp_or_stream = await _send_with_split(
+            session, m_input, files=files, stream=request.stream
+        )
+    except Exception as e:
+        logger.exception("Gemini API error")
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
+
+    if request.stream:
+        return _create_real_streaming_response(
+            resp_or_stream,
+            completion_id,
+            created_time,
+            request.model,
+            msgs,  # Use prepared 'msgs'
+            db,
+            model,
+            client,
+            session,
+            structured_requirement,
+        )
+
+    try:
+        raw_with_t = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=True)
+        raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
+    except Exception as exc:
+        logger.exception("Gemini output parsing failed.")
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
+        ) from exc
+
+    visible_output, storage_output, tool_calls = _process_llm_output(
+        raw_with_t, raw_clean, structured_requirement
+    )
+
+    # Process images for OpenAI non-streaming flow
+    images = resp_or_stream.images or []
+    image_markdown = ""
+    for image in images:
+        try:
+            _, _, _, filename = await _image_to_base64(image, image_store)
+            img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+            image_markdown += f"\n\n{img_url}"
+        except Exception as exc:
+            logger.warning(f"Failed to process image in OpenAI response: {exc}")
+
+    if image_markdown:
+        visible_output += image_markdown
+        storage_output += image_markdown
+
+    tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
+    if tool_calls_payload:
+        logger.debug(f"Detected tool calls: {reprlib.repr(tool_calls_payload)}")
+
+    p_tok, c_tok, t_tok = _calculate_usage(request.messages, visible_output, tool_calls)
+    usage = {"prompt_tokens": p_tok, "completion_tokens": c_tok, "total_tokens": t_tok}
+    payload = _create_chat_completion_standard_payload(
+        completion_id,
+        created_time,
+        request.model,
+        visible_output,
+        tool_calls_payload,
+        "tool_calls" if tool_calls else "stop",
+        usage,
+    )
+    _persist_conversation(
+        db,
+        model.model_name,
+        client.id,
+        session.metadata,
+        msgs,  # Use prepared messages 'msgs'
+        storage_output,
+        tool_calls,
+    )
+    return payload
+
+
+@router.post("/v1/responses")
+async def create_response(
+    request_data: ResponseCreateRequest,
+    request: Request,
+    api_key: str = Depends(verify_api_key),
+    tmp_dir: Path = Depends(get_temp_dir),
+    image_store: Path = Depends(get_image_store_dir),
+):
+    base_messages, norm_input = _response_items_to_messages(request_data.input)
+    struct_req = _build_structured_requirement(request_data.response_format)
+    extra_instr = [struct_req.instruction] if struct_req else []
+
+    standard_tools, image_tools = [], []
+    if request_data.tools:
+        for t in request_data.tools:
+            if isinstance(t, Tool):
+                standard_tools.append(t)
+            elif isinstance(t, ResponseImageTool):
+                image_tools.append(t)
+            elif isinstance(t, dict):
+                if t.get("type") == "function":
+                    standard_tools.append(Tool.model_validate(t))
+                elif t.get("type") == "image_generation":
+                    image_tools.append(ResponseImageTool.model_validate(t))
+
+    img_instr = _build_image_generation_instruction(
+        image_tools,
+        request_data.tool_choice
+        if isinstance(request_data.tool_choice, ResponseToolChoice)
+        else None,
+    )
+    if img_instr:
+        extra_instr.append(img_instr)
+    preface = _instructions_to_messages(request_data.instructions)
+    conv_messages = [*preface, *base_messages] if preface else base_messages
+    model_tool_choice = (
+        request_data.tool_choice
+        if isinstance(request_data.tool_choice, (str, ToolChoiceFunction))
+        else None
+    )
+
+    messages = _prepare_messages_for_model(
+        conv_messages, standard_tools or None, model_tool_choice, extra_instr or None
+    )
+    pool, db = GeminiClientPool(), LMDBConversationStore()
+    try:
+        model = _get_model_by_name(request_data.model)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
+
+    session, client, remain = await _find_reusable_session(db, pool, model, messages)
+    if session:
+        msgs = _prepare_messages_for_model(
+            remain, request_data.tools, request_data.tool_choice, None, False
+        )
+        if not msgs:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
+        m_input, files = (
+            await GeminiClientWrapper.process_message(msgs[0], tmp_dir, tagged=False)
+            if len(msgs) == 1
+            else await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
+        )
+        logger.debug(
+            f"Reused session {reprlib.repr(session.metadata)} - sending {len(msgs)} prepared messages."
+        )
     else:
-        saved_path = await image.save(path=str(temp_dir))
+        try:
+            client = await pool.acquire()
+            session = client.start_chat(model=model)
+            m_input, files = await GeminiClientWrapper.process_conversation(messages, tmp_dir)
+        except Exception as e:
+            logger.exception("Error in preparing conversation")
+            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
 
-    if not saved_path:
-        raise ValueError("Failed to save generated image")
+    response_id = f"resp_{uuid.uuid4().hex}"
+    created_time = int(datetime.now(tz=timezone.utc).timestamp())
 
-    # Rename file to a random UUID to ensure uniqueness and unpredictability
-    original_path = Path(saved_path)
-    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
-    new_path = temp_dir / random_name
-    original_path.rename(new_path)
+    try:
+        assert session and client
+        logger.debug(
+            f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}"
+        )
+        resp_or_stream = await _send_with_split(
+            session, m_input, files=files, stream=request_data.stream
+        )
+    except Exception as e:
+        logger.exception("Gemini API error")
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
 
-    data = new_path.read_bytes()
-    width, height = extract_image_dimensions(data)
-    filename = random_name
-    return base64.b64encode(data).decode("ascii"), width, height, filename
+    if request_data.stream:
+        return _create_responses_real_streaming_response(
+            resp_or_stream,
+            response_id,
+            created_time,
+            request_data.model,
+            messages,
+            db,
+            model,
+            client,
+            session,
+            request_data,
+            image_store,
+            str(request.base_url),
+            struct_req,
+        )
+
+    try:
+        raw_t = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=True)
+        raw_c = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
+    except Exception as exc:
+        logger.exception("Gemini parsing failed")
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
+        ) from exc
+
+    assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
+    images = resp_or_stream.images or []
+    if (
+        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
+    ) and not images:
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
+
+    contents, img_calls = [], []
+    for img in images:
+        try:
+            b64, w, h, fname = await _image_to_base64(img, image_store)
+            contents.append(
+                ResponseOutputContent(
+                    type="output_text",
+                    text=f"![{fname}]({request.base_url}images/{fname}?token={get_image_token(fname)})",
+                )
+            )
+            img_calls.append(
+                ResponseImageGenerationCall(
+                    id=fname.rsplit(".", 1)[0],
+                    result=b64,
+                    output_format="png" if isinstance(img, GeneratedImage) else "jpeg",
+                    size=f"{w}x{h}" if w and h else None,
+                )
+            )
+        except Exception as e:
+            logger.warning(f"Image error: {e}")
+
+    if assistant_text:
+        contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+    if not contents:
+        contents.append(ResponseOutputContent(type="output_text", text=""))
+
+    # Aggregate images for storage
+    image_markdown = ""
+    for img_call in img_calls:
+        fname = f"{img_call.id}.{img_call.output_format}"
+        img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})"
+        image_markdown += f"\n\n{img_url}"
+
+    if image_markdown:
+        storage_output += image_markdown
+
+    p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, tool_calls)
+    usage = ResponseUsage(input_tokens=p_tok, output_tokens=c_tok, total_tokens=t_tok)
+    payload = _create_responses_standard_payload(
+        response_id,
+        created_time,
+        request_data.model,
+        assistant_text,
+        tool_calls,
+        img_calls,
+        contents,
+        usage,
+        request_data,
+        norm_input,
+    )
+    _persist_conversation(
+        db, model.model_name, client.id, session.metadata, messages, storage_output, tool_calls
+    )
+    return payload
diff --git a/app/services/client.py b/app/services/client.py
index 55be11a..eda1691 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -78,7 +78,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a single message and return model input.
+        Process a single Message object into a format suitable for the Gemini API.
+        Extracts text fragments, handles images and files, and appends tool call blocks if present.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -88,8 +89,7 @@ async def process_message(
             if message.content:
                 text_fragments.append(message.content)
         elif isinstance(message.content, list):
-            # Mixed content
-            # TODO: Use Pydantic to enforce the value checking
+            # Mixed content (text, image_url, or file)
             for item in message.content:
                 if item.type == "text":
                     # Append multiple text fragments
@@ -177,7 +177,8 @@ async def process_conversation(
     @staticmethod
     def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         """
-        Extract and format the output text from the Gemini response.
+        Extract and format the output text from a ModelOutput.
+        Includes reasoning thoughts (wrapped in <think> tags) and unescapes content.
         """
         text = ""
 
@@ -191,6 +192,7 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
 
         # Fix some escaped characters
         def _unescape_html(text_content: str) -> str:
+            """Unescape HTML entities only in non-code sections of the text."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):
@@ -205,6 +207,7 @@ def _unescape_html(text_content: str) -> str:
             return "".join(parts)
 
         def _unescape_markdown(text_content: str) -> str:
+            """Remove backslash escapes for markdown characters in non-code sections."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c9d42cd..6ab2302 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -11,45 +11,98 @@
 
 from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
-from ..utils.helper import extract_tool_calls, remove_tool_call_blocks
+from ..utils.helper import (
+    CODE_BLOCK_HINT,
+    CODE_HINT_STRIPPED,
+    XML_HINT_STRIPPED,
+    XML_WRAP_HINT,
+    extract_tool_calls,
+    remove_tool_call_blocks,
+)
 from ..utils.singleton import Singleton
 
 
 def _hash_message(message: Message) -> str:
-    """Generate a consistent hash for a single message focusing ONLY on logic/content, ignoring technical IDs."""
+    """
+    Generate a stable, canonical hash for a single message.
+    Strips system hints, thoughts, and tool call blocks to ensure
+    identical logical content produces the same hash regardless of format.
+    """
     core_data = {
         "role": message.role,
         "name": message.name,
+        "tool_call_id": message.tool_call_id,
     }
 
-    # Normalize content: strip, handle empty/None, and list-of-text items
     content = message.content
     if not content:
         core_data["content"] = None
     elif isinstance(content, str):
-        # Normalize line endings and strip whitespace
-        normalized = content.replace("\r\n", "\n").strip()
+        normalized = content.replace("\r\n", "\n")
+
+        normalized = LMDBConversationStore.remove_think_tags(normalized)
+
+        for hint in [
+            XML_WRAP_HINT,
+            XML_HINT_STRIPPED,
+            CODE_BLOCK_HINT,
+            CODE_HINT_STRIPPED,
+        ]:
+            normalized = normalized.replace(hint, "")
+
+        if message.tool_calls:
+            normalized = remove_tool_call_blocks(normalized)
+        else:
+            temp_text, _extracted = extract_tool_calls(normalized)
+            normalized = temp_text
+
+        normalized = normalized.strip()
         core_data["content"] = normalized if normalized else None
     elif isinstance(content, list):
         text_parts = []
         for item in content:
+            text_val = ""
             if isinstance(item, ContentItem) and item.type == "text":
-                text_parts.append(item.text or "")
+                text_val = item.text or ""
             elif isinstance(item, dict) and item.get("type") == "text":
-                text_parts.append(item.get("text") or "")
+                text_val = item.get("text") or ""
+
+            if text_val:
+                text_val = text_val.replace("\r\n", "\n")
+                text_val = LMDBConversationStore.remove_think_tags(text_val)
+                for hint in [
+                    XML_WRAP_HINT,
+                    XML_HINT_STRIPPED,
+                    CODE_BLOCK_HINT,
+                    CODE_HINT_STRIPPED,
+                ]:
+                    text_val = text_val.replace(hint, "")
+                text_val = remove_tool_call_blocks(text_val).strip()
+                if text_val:
+                    text_parts.append(text_val)
+            elif isinstance(item, ContentItem) and item.type in ("image_url", "file"):
+                # For non-text items, include their unique markers to distinguish them
+                if item.type == "image_url":
+                    text_parts.append(
+                        f"[image_url:{item.image_url.get('url') if item.image_url else ''}]"
+                    )
+                elif item.type == "file":
+                    text_parts.append(
+                        f"[file:{item.file.get('url') or item.file.get('filename') if item.file else ''}]"
+                    )
             else:
-                # If it contains non-text (images/files), keep the full list for hashing
-                text_parts = None
-                break
-
-        if text_parts is not None:
-            # Normalize each part but keep them as a list to preserve boundaries and avoid collisions
-            normalized_parts = [p.replace("\r\n", "\n") for p in text_parts]
-            core_data["content"] = normalized_parts if normalized_parts else None
-        else:
-            core_data["content"] = message.model_dump(mode="json")["content"]
+                # Fallback for other dict-based content parts
+                part_type = item.get("type") if isinstance(item, dict) else None
+                if part_type == "image_url":
+                    url = item.get("image_url", {}).get("url")
+                    text_parts.append(f"[image_url:{url}]")
+                elif part_type == "file":
+                    url = item.get("file", {}).get("url") or item.get("file", {}).get("filename")
+                    text_parts.append(f"[file:{url}]")
+
+        combined_text = "\n".join(text_parts).replace("\r\n", "\n").strip()
+        core_data["content"] = combined_text if combined_text else None
 
-    # Normalize tool_calls: Focus ONLY on function name and arguments
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
@@ -66,14 +119,14 @@ def _hash_message(message: Message) -> str:
                     "arguments": canon_args,
                 }
             )
-        # Sort calls to be order-independent
         calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
         core_data["tool_calls"] = calls_data
     else:
         core_data["tool_calls"] = None
 
     message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    return hashlib.sha256(message_bytes).hexdigest()
+    digest = hashlib.sha256(message_bytes).hexdigest()
+    return digest
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
@@ -123,16 +176,14 @@ def __init__(
         self._init_environment()
 
     def _ensure_db_path(self) -> None:
-        """Ensure database directory exists."""
         self.db_path.parent.mkdir(parents=True, exist_ok=True)
 
     def _init_environment(self) -> None:
-        """Initialize LMDB environment."""
         try:
             self._env = lmdb.open(
                 str(self.db_path),
                 map_size=self.max_db_size,
-                max_dbs=3,  # main, metadata, and index databases
+                max_dbs=3,
                 writemap=True,
                 readahead=False,
                 meminit=False,
@@ -144,7 +195,6 @@ def _init_environment(self) -> None:
 
     @contextmanager
     def _get_transaction(self, write: bool = False):
-        """Get LMDB transaction context manager."""
         if not self._env:
             raise RuntimeError("LMDB environment not initialized")
 
@@ -178,11 +228,15 @@ def store(
         if not conv:
             raise ValueError("Messages list cannot be empty")
 
+        # Sanitize messages before computing hash and storing to ensure consistency
+        # with the search (find) logic, which also sanitizes its prefix.
+        sanitized_messages = self.sanitize_assistant_messages(conv.messages)
+        conv.messages = sanitized_messages
+
         # Generate hash for the message list
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
         storage_key = custom_key or message_hash
 
-        # Prepare data for storage
         now = datetime.now()
         if conv.created_at is None:
             conv.created_at = now
@@ -192,20 +246,18 @@ def store(
 
         try:
             with self._get_transaction(write=True) as txn:
-                # Store main data
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
-                # Store hash -> key mapping for reverse lookup
                 txn.put(
                     f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"),
                     storage_key.encode("utf-8"),
                 )
 
-                logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key}")
+                logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}")
                 return storage_key
 
         except Exception as e:
-            logger.error(f"Failed to store conversation: {e}")
+            logger.error(f"Failed to store messages with key {storage_key[:12]}: {e}")
             raise
 
     def get(self, key: str) -> Optional[ConversationInStore]:
@@ -227,39 +279,35 @@ def get(self, key: str) -> Optional[ConversationInStore]:
                 storage_data = orjson.loads(data)  # type: ignore
                 conv = ConversationInStore.model_validate(storage_data)
 
-                logger.debug(f"Retrieved {len(conv.messages)} messages for key: {key}")
+                logger.debug(f"Retrieved {len(conv.messages)} messages with key: {key[:12]}")
                 return conv
 
         except Exception as e:
-            logger.error(f"Failed to retrieve messages for key {key}: {e}")
+            logger.error(f"Failed to retrieve messages with key {key[:12]}: {e}")
             return None
 
     def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]:
         """
         Search conversation data by message list.
-
-        Args:
-            model: Model name of the conversations
-            messages: List of messages to search for
-
-        Returns:
-            Conversation or None if not found
         """
         if not messages:
             return None
 
         # --- Find with raw messages ---
         if conv := self._find_by_message_list(model, messages):
-            logger.debug("Found conversation with raw message history.")
+            logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.")
             return conv
 
         # --- Find with cleaned messages ---
         cleaned_messages = self.sanitize_assistant_messages(messages)
-        if conv := self._find_by_message_list(model, cleaned_messages):
-            logger.debug("Found conversation with cleaned message history.")
-            return conv
+        if cleaned_messages != messages:
+            if conv := self._find_by_message_list(model, cleaned_messages):
+                logger.debug(
+                    f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages."
+                )
+                return conv
 
-        logger.debug("No conversation found for either raw or cleaned history.")
+        logger.debug(f"No session found for '{model}' with {len(messages)} messages.")
         return None
 
     def _find_by_message_list(
@@ -330,11 +378,11 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
                 if message_hash and key != message_hash:
                     txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
 
-                logger.debug(f"Deleted messages with key: {key}")
+                logger.debug(f"Deleted messages with key: {key[:12]}")
                 return conv
 
         except Exception as e:
-            logger.error(f"Failed to delete key {key}: {e}")
+            logger.error(f"Failed to delete messages with key {key[:12]}: {e}")
             return None
 
     def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
@@ -478,6 +526,8 @@ def remove_think_tags(text: str) -> str:
         """
         Remove all <think>...</think> tags and strip whitespace.
         """
+        if not text:
+            return text
         # Remove all think blocks anywhere in the text
         cleaned_content = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
         return cleaned_content.strip()
@@ -485,12 +535,8 @@ def remove_think_tags(text: str) -> str:
     @staticmethod
     def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
         """
-        Create a new list of messages with assistant content cleaned of <think> tags
-        and system hints/tool call blocks. This is used for both storing and
-        searching chat history to ensure consistency.
-
-        If a message has no tool_calls but contains tool call XML blocks in its
-        content, they will be extracted and moved to the tool_calls field.
+        Produce a canonical history where assistant messages are cleaned of
+        internal markers and tool call blocks are moved to metadata.
         """
         cleaned_messages = []
         for msg in messages:
@@ -503,12 +549,12 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                     else:
                         text = remove_tool_call_blocks(text).strip()
 
-                    normalized_content = text.strip()
+                    normalized_content = text.strip() or None
 
                     if normalized_content != msg.content or tool_calls != msg.tool_calls:
                         cleaned_msg = msg.model_copy(
                             update={
-                                "content": normalized_content or None,
+                                "content": normalized_content,
                                 "tool_calls": tool_calls or None,
                             }
                         )
diff --git a/app/services/pool.py b/app/services/pool.py
index a134dda..0f95203 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -31,7 +31,7 @@ def __init__(self) -> None:
             self._clients.append(client)
             self._id_map[c.id] = client
             self._round_robin.append(client)
-            self._restart_locks[c.id] = asyncio.Lock()  # Pre-initialize
+            self._restart_locks[c.id] = asyncio.Lock()
 
     async def init(self) -> None:
         """Initialize all clients in the pool."""
@@ -84,7 +84,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
 
         lock = self._restart_locks.get(client.id)
         if lock is None:
-            return False  # Should not happen
+            return False
 
         async with lock:
             if client.running():
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 190b5ce..7606dd3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -5,7 +5,6 @@
 import struct
 import tempfile
 from pathlib import Path
-from typing import Iterator
 from urllib.parse import urlparse
 
 import httpx
@@ -68,7 +67,6 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     data: bytes | None = None
     suffix: str | None = None
     if url.startswith("data:image/"):
-        # Base64 encoded image
         metadata_part = url.split(",")[0]
         mime_type = metadata_part.split(":")[1].split(";")[0]
 
@@ -131,13 +129,11 @@ def strip_tagged_blocks(text: str) -> str:
             result.append(text[idx:])
             break
 
-        # append any content before this block
         result.append(text[idx:start])
 
         role_start = start + len(start_marker)
         newline = text.find("\n", role_start)
         if newline == -1:
-            # malformed block; keep the remainder as-is (safe behavior)
             result.append(text[start:])
             break
 
@@ -145,23 +141,18 @@ def strip_tagged_blocks(text: str) -> str:
 
         end = text.find(end_marker, newline + 1)
         if end == -1:
-            # missing end marker
             if role == "tool":
-                # drop from the start marker to EOF (skip the remainder)
                 break
             else:
-                # keep inner content from after the role newline to EOF
                 result.append(text[newline + 1 :])
                 break
 
         block_end = end + len(end_marker)
 
         if role == "tool":
-            # drop the whole block
             idx = block_end
             continue
 
-        # keep the content without role markers
         content = text[newline + 1 : end]
         result.append(content)
         idx = block_end
@@ -180,41 +171,19 @@ def strip_system_hints(text: str) -> str:
     return cleaned.strip()
 
 
-def remove_tool_call_blocks(text: str) -> str:
-    """Strip tool call code blocks from text."""
-    if not text:
-        return text
-
-    # 1. Remove fenced blocks ONLY if they contain tool calls
-    def _replace_block(match: re.Match[str]) -> str:
-        block_content = match.group(1)
-        if not block_content:
-            return match.group(0)
-
-        # Check if the block contains any tool call tag
-        if TOOL_CALL_RE.search(block_content):
-            return ""
-
-        # Preserve the block if no tool call found
-        return match.group(0)
-
-    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
-
-    # 2. Remove orphaned tool calls
-    cleaned = TOOL_CALL_RE.sub("", cleaned)
-
-    return strip_system_hints(cleaned)
-
-
-def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
-    """Extract tool call definitions and return cleaned text."""
+def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
+    """
+    Unified engine for stripping tool call blocks and extracting tool metadata.
+    If extract=True, parses JSON arguments and assigns deterministic call IDs.
+    """
     if not text:
         return text, []
 
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
-        """Helper to parse args and append to the tool_calls list."""
+        if not extract:
+            return
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return
@@ -226,8 +195,6 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         except orjson.JSONDecodeError:
             logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
 
-        # Generate a deterministic ID based on name, arguments, and its global sequence index
-        # to ensure uniqueness across multiple fenced blocks while remaining stable for storage.
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
@@ -245,14 +212,14 @@ def _replace_block(match: re.Match[str]) -> str:
         if not block_content:
             return match.group(0)
 
-        found_in_block = False
-        for call_match in TOOL_CALL_RE.finditer(block_content):
-            found_in_block = True
-            name = (call_match.group(1) or "").strip()
-            raw_args = (call_match.group(2) or "").strip()
-            _create_tool_call(name, raw_args)
+        is_tool_block = bool(TOOL_CALL_RE.search(block_content))
 
-        if found_in_block:
+        if is_tool_block:
+            if extract:
+                for call_match in TOOL_CALL_RE.finditer(block_content):
+                    name = (call_match.group(1) or "").strip()
+                    raw_args = (call_match.group(2) or "").strip()
+                    _create_tool_call(name, raw_args)
             return ""
         else:
             return match.group(0)
@@ -260,56 +227,26 @@ def _replace_block(match: re.Match[str]) -> str:
     cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
 
     def _replace_orphan(match: re.Match[str]) -> str:
-        name = (match.group(1) or "").strip()
-        raw_args = (match.group(2) or "").strip()
-        _create_tool_call(name, raw_args)
+        if extract:
+            name = (match.group(1) or "").strip()
+            raw_args = (match.group(2) or "").strip()
+            _create_tool_call(name, raw_args)
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
-
     cleaned = strip_system_hints(cleaned)
     return cleaned, tool_calls
 
 
-def iter_stream_segments(model_output: str, chunk_size: int = 64) -> Iterator[str]:
-    """Yield stream segments while keeping <think> markers and words intact."""
-    if not model_output:
-        return
-
-    token_pattern = re.compile(r"\s+|\S+\s*")
-    pending = ""
-
-    def _flush_pending() -> Iterator[str]:
-        nonlocal pending
-        if pending:
-            yield pending
-            pending = ""
-
-    # Split on <think> boundaries so the markers are never fragmented.
-    parts = re.split(r"(</?think>)", model_output)
-    for part in parts:
-        if not part:
-            continue
-        if part in {"<think>", "</think>"}:
-            yield from _flush_pending()
-            yield part
-            continue
-
-        for match in token_pattern.finditer(part):
-            token = match.group(0)
-
-            if len(token) > chunk_size:
-                yield from _flush_pending()
-                for idx in range(0, len(token), chunk_size):
-                    yield token[idx : idx + chunk_size]
-                continue
-
-            if pending and len(pending) + len(token) > chunk_size:
-                yield from _flush_pending()
+def remove_tool_call_blocks(text: str) -> str:
+    """Strip tool call code blocks from text."""
+    cleaned, _ = _process_tools_internal(text, extract=False)
+    return cleaned
 
-            pending += token
 
-    yield from _flush_pending()
+def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
+    """Extract tool call definitions and return cleaned text."""
+    return _process_tools_internal(text, extract=True)
 
 
 def text_from_message(message: Message) -> str:

From 52547a923276c5b5de3ba0394939478ac4166417 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 30 Jan 2026 13:34:04 +0700
Subject: [PATCH 056/236] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
---
 app/server/chat.py     | 70 +++++++++++++++++-------------------------
 app/services/client.py |  2 +-
 2 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 37d3c70..ae1533e 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -8,7 +8,7 @@
 from typing import Any, AsyncGenerator
 
 import orjson
-from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import APIRouter, Depends, HTTPException, status
 from fastapi.responses import StreamingResponse
 from gemini_webapi import ModelOutput
 from gemini_webapi.client import ChatSession
@@ -128,12 +128,11 @@ def _create_responses_standard_payload(
     response_id: str,
     created_time: int,
     model_name: str,
-    assistant_text: str | None,
     detected_tool_calls: list[Any] | None,
     image_call_items: list[ResponseImageGenerationCall],
     response_contents: list[ResponseOutputContent],
     usage: ResponseUsage,
-    request_data: ResponseCreateRequest,
+    request: ResponseCreateRequest,
     normalized_input: Any,
 ) -> ResponseCreateResponse:
     """Unified factory for building ResponseCreateResponse objects."""
@@ -166,9 +165,9 @@ def _create_responses_standard_payload(
         status="completed",
         usage=usage,
         input=normalized_input or None,
-        metadata=request_data.metadata or None,
-        tools=request_data.tools,
-        tool_choice=request_data.tool_choice,
+        metadata=request.metadata or None,
+        tools=request.tools,
+        tool_choice=request.tool_choice,
     )
 
 
@@ -1042,7 +1041,7 @@ async def generate_stream():
         if image_markdown:
             assistant_text += image_markdown
             storage_output += image_markdown
-            # Send the image markdown as a final text chunk before usage
+            # Send the image Markdown as a final text chunk before usage
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
@@ -1107,9 +1106,8 @@ def _create_responses_real_streaming_response(
     model: Model,
     client_wrapper: GeminiClientWrapper,
     session: ChatSession,
-    request_data: ResponseCreateRequest,
+    request: ResponseCreateRequest,
     image_store: Path,
-    base_url: str,
     structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
     """
@@ -1124,7 +1122,7 @@ def _create_responses_real_streaming_response(
     }
 
     async def generate_stream():
-        yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request_data.metadata, 'input': None, 'tools': request_data.tools, 'tool_choice': request_data.tool_choice}}).decode('utf-8')}\n\n"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request.metadata, 'input': None, 'tools': request.tools, 'tool_choice': request.tool_choice}}).decode('utf-8')}\n\n"
         message_id = f"msg_{uuid.uuid4().hex}"
         yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
 
@@ -1183,9 +1181,7 @@ async def generate_stream():
             try:
                 image_base64, width, height, filename = await _image_to_base64(image, image_store)
                 img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
-                image_url = (
-                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
-                )
+                image_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
                 image_call_items.append(
                     ResponseImageGenerationCall(
                         id=filename.rsplit(".", 1)[0],
@@ -1232,12 +1228,11 @@ async def generate_stream():
             response_id,
             created_time,
             model_name,
-            assistant_text,
             detected_tool_calls,
             image_call_items,
             response_contents,
             usage,
-            request_data,
+            request,
             None,
         )
         _persist_conversation(
@@ -1404,19 +1399,18 @@ async def create_chat_completion(
 
 @router.post("/v1/responses")
 async def create_response(
-    request_data: ResponseCreateRequest,
-    request: Request,
+    request: ResponseCreateRequest,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
-    base_messages, norm_input = _response_items_to_messages(request_data.input)
-    struct_req = _build_structured_requirement(request_data.response_format)
+    base_messages, norm_input = _response_items_to_messages(request.input)
+    struct_req = _build_structured_requirement(request.response_format)
     extra_instr = [struct_req.instruction] if struct_req else []
 
     standard_tools, image_tools = [], []
-    if request_data.tools:
-        for t in request_data.tools:
+    if request.tools:
+        for t in request.tools:
             if isinstance(t, Tool):
                 standard_tools.append(t)
             elif isinstance(t, ResponseImageTool):
@@ -1429,18 +1423,14 @@ async def create_response(
 
     img_instr = _build_image_generation_instruction(
         image_tools,
-        request_data.tool_choice
-        if isinstance(request_data.tool_choice, ResponseToolChoice)
-        else None,
+        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
     )
     if img_instr:
         extra_instr.append(img_instr)
-    preface = _instructions_to_messages(request_data.instructions)
+    preface = _instructions_to_messages(request.instructions)
     conv_messages = [*preface, *base_messages] if preface else base_messages
     model_tool_choice = (
-        request_data.tool_choice
-        if isinstance(request_data.tool_choice, (str, ToolChoiceFunction))
-        else None
+        request.tool_choice if isinstance(request.tool_choice, (str, ToolChoiceFunction)) else None
     )
 
     messages = _prepare_messages_for_model(
@@ -1448,15 +1438,13 @@ async def create_response(
     )
     pool, db = GeminiClientPool(), LMDBConversationStore()
     try:
-        model = _get_model_by_name(request_data.model)
+        model = _get_model_by_name(request.model)
     except ValueError as exc:
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
 
     session, client, remain = await _find_reusable_session(db, pool, model, messages)
     if session:
-        msgs = _prepare_messages_for_model(
-            remain, request_data.tools, request_data.tool_choice, None, False
-        )
+        msgs = _prepare_messages_for_model(remain, request.tools, request.tool_choice, None, False)
         if not msgs:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
         m_input, files = (
@@ -1485,26 +1473,25 @@ async def create_response(
             f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}"
         )
         resp_or_stream = await _send_with_split(
-            session, m_input, files=files, stream=request_data.stream
+            session, m_input, files=files, stream=request.stream
         )
     except Exception as e:
         logger.exception("Gemini API error")
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
 
-    if request_data.stream:
+    if request.stream:
         return _create_responses_real_streaming_response(
             resp_or_stream,
             response_id,
             created_time,
-            request_data.model,
+            request.model,
             messages,
             db,
             model,
             client,
             session,
-            request_data,
+            request,
             image_store,
-            str(request.base_url),
             struct_req,
         )
 
@@ -1520,7 +1507,7 @@ async def create_response(
     assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
     images = resp_or_stream.images or []
     if (
-        request_data.tool_choice is not None and request_data.tool_choice.type == "image_generation"
+        request.tool_choice is not None and request.tool_choice.type == "image_generation"
     ) and not images:
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
 
@@ -1531,7 +1518,7 @@ async def create_response(
             contents.append(
                 ResponseOutputContent(
                     type="output_text",
-                    text=f"![{fname}]({request.base_url}images/{fname}?token={get_image_token(fname)})",
+                    text=f"![{fname}](images/{fname}?token={get_image_token(fname)})",
                 )
             )
             img_calls.append(
@@ -1565,13 +1552,12 @@ async def create_response(
     payload = _create_responses_standard_payload(
         response_id,
         created_time,
-        request_data.model,
-        assistant_text,
+        request.model,
         tool_calls,
         img_calls,
         contents,
         usage,
-        request_data,
+        request,
         norm_input,
     )
     _persist_conversation(
diff --git a/app/services/client.py b/app/services/client.py
index eda1691..dd1d74f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -207,7 +207,7 @@ def _unescape_html(text_content: str) -> str:
             return "".join(parts)
 
         def _unescape_markdown(text_content: str) -> str:
-            """Remove backslash escapes for markdown characters in non-code sections."""
+            """Remove backslash escapes for Markdown characters in non-code sections."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):

From c0b32c62113acdac21407c629252f35c2ed2bbf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 30 Jan 2026 17:50:02 +0700
Subject: [PATCH 057/236] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
- Remove duplicate images when saving and responding.
---
 app/server/chat.py | 86 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ae1533e..4c64390 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,4 +1,5 @@
 import base64
+import hashlib
 import io
 import reprlib
 import uuid
@@ -8,7 +9,7 @@
 from typing import Any, AsyncGenerator
 
 import orjson
-from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
 from gemini_webapi import ModelOutput
 from gemini_webapi.client import ChatSession
@@ -72,8 +73,10 @@ class StructuredOutputRequirement:
 # --- Helper Functions ---
 
 
-async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | None, int | None, str]:
-    """Persist an image provided by gemini_webapi and return base64 plus dimensions and filename."""
+async def _image_to_base64(
+    image: Image, temp_dir: Path
+) -> tuple[str, int | None, int | None, str, str]:
+    """Persist an image provided by gemini_webapi and return base64 plus dimensions, filename, and hash."""
     if isinstance(image, GeneratedImage):
         try:
             saved_path = await image.save(path=str(temp_dir), full_size=True)
@@ -96,7 +99,8 @@ async def _image_to_base64(image: Image, temp_dir: Path) -> tuple[str, int | Non
     data = new_path.read_bytes()
     width, height = extract_image_dimensions(data)
     filename = random_name
-    return base64.b64encode(data).decode("ascii"), width, height, filename
+    file_hash = hashlib.sha256(data).hexdigest()
+    return base64.b64encode(data).decode("ascii"), width, height, filename, file_hash
 
 
 def _calculate_usage(
@@ -925,6 +929,7 @@ def _create_real_streaming_response(
     model: Model,
     client_wrapper: GeminiClientWrapper,
     session: ChatSession,
+    base_url: str,
     structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
     """
@@ -1024,16 +1029,30 @@ async def generate_stream():
         )
 
         images = []
+        seen_urls = set()
         for out in all_outputs:
             if out.images:
-                images.extend(out.images)
+                for img in out.images:
+                    # Use the image URL as a stable identifier across chunks
+                    if img.url not in seen_urls:
+                        images.append(img)
+                        seen_urls.add(img.url)
 
         image_markdown = ""
+        seen_hashes = set()
         for image in images:
             try:
                 image_store = get_image_store_dir()
-                _, _, _, filename = await _image_to_base64(image, image_store)
-                img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+                _, _, _, filename, file_hash = await _image_to_base64(image, image_store)
+                if file_hash in seen_hashes:
+                    # Duplicate content, delete the file and skip
+                    (image_store / filename).unlink(missing_ok=True)
+                    continue
+                seen_hashes.add(file_hash)
+
+                img_url = (
+                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
+                )
                 image_markdown += f"\n\n{img_url}"
             except Exception as exc:
                 logger.warning(f"Failed to process image in OpenAI stream: {exc}")
@@ -1108,6 +1127,7 @@ def _create_responses_real_streaming_response(
     session: ChatSession,
     request: ResponseCreateRequest,
     image_store: Path,
+    base_url: str,
     structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
     """
@@ -1172,16 +1192,30 @@ async def generate_stream():
         )
 
         images = []
+        seen_urls = set()
         for out in all_outputs:
             if out.images:
-                images.extend(out.images)
+                for img in out.images:
+                    if img.url not in seen_urls:
+                        images.append(img)
+                        seen_urls.add(img.url)
 
         response_contents, image_call_items = [], []
+        seen_hashes = set()
         for image in images:
             try:
-                image_base64, width, height, filename = await _image_to_base64(image, image_store)
+                image_base64, width, height, filename, file_hash = await _image_to_base64(
+                    image, image_store
+                )
+                if file_hash in seen_hashes:
+                    (image_store / filename).unlink(missing_ok=True)
+                    continue
+                seen_hashes.add(file_hash)
+
                 img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
-                image_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+                image_url = (
+                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
+                )
                 image_call_items.append(
                     ResponseImageGenerationCall(
                         id=filename.rsplit(".", 1)[0],
@@ -1203,7 +1237,7 @@ async def generate_stream():
         image_markdown = ""
         for img_call in image_call_items:
             fname = f"{img_call.id}.{img_call.output_format}"
-            img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})"
+            img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
             image_markdown += f"\n\n{img_url}"
 
         if image_markdown:
@@ -1262,10 +1296,12 @@ async def list_models(api_key: str = Depends(verify_api_key)):
 @router.post("/v1/chat/completions")
 async def create_chat_completion(
     request: ChatCompletionRequest,
+    raw_request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
+    base_url = str(raw_request.base_url)
     pool, db = GeminiClientPool(), LMDBConversationStore()
     try:
         model = _get_model_by_name(request.model)
@@ -1339,6 +1375,7 @@ async def create_chat_completion(
             model,
             client,
             session,
+            base_url,
             structured_requirement,
         )
 
@@ -1358,10 +1395,18 @@ async def create_chat_completion(
     # Process images for OpenAI non-streaming flow
     images = resp_or_stream.images or []
     image_markdown = ""
+    seen_hashes = set()
     for image in images:
         try:
-            _, _, _, filename = await _image_to_base64(image, image_store)
-            img_url = f"![{filename}](images/{filename}?token={get_image_token(filename)})"
+            _, _, _, filename, file_hash = await _image_to_base64(image, image_store)
+            if file_hash in seen_hashes:
+                (image_store / filename).unlink(missing_ok=True)
+                continue
+            seen_hashes.add(file_hash)
+
+            img_url = (
+                f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
+            )
             image_markdown += f"\n\n{img_url}"
         except Exception as exc:
             logger.warning(f"Failed to process image in OpenAI response: {exc}")
@@ -1400,10 +1445,12 @@ async def create_chat_completion(
 @router.post("/v1/responses")
 async def create_response(
     request: ResponseCreateRequest,
+    raw_request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
     image_store: Path = Depends(get_image_store_dir),
 ):
+    base_url = str(raw_request.base_url)
     base_messages, norm_input = _response_items_to_messages(request.input)
     struct_req = _build_structured_requirement(request.response_format)
     extra_instr = [struct_req.instruction] if struct_req else []
@@ -1492,6 +1539,7 @@ async def create_response(
             session,
             request,
             image_store,
+            base_url,
             struct_req,
         )
 
@@ -1512,13 +1560,19 @@ async def create_response(
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
 
     contents, img_calls = [], []
+    seen_hashes = set()
     for img in images:
         try:
-            b64, w, h, fname = await _image_to_base64(img, image_store)
+            b64, w, h, fname, fhash = await _image_to_base64(img, image_store)
+            if fhash in seen_hashes:
+                (image_store / fname).unlink(missing_ok=True)
+                continue
+            seen_hashes.add(fhash)
+
             contents.append(
                 ResponseOutputContent(
                     type="output_text",
-                    text=f"![{fname}](images/{fname}?token={get_image_token(fname)})",
+                    text=f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})",
                 )
             )
             img_calls.append(
@@ -1541,7 +1595,7 @@ async def create_response(
     image_markdown = ""
     for img_call in img_calls:
         fname = f"{img_call.id}.{img_call.output_format}"
-        img_url = f"![{fname}](images/{fname}?token={get_image_token(fname)})"
+        img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
         image_markdown += f"\n\n{img_url}"
 
     if image_markdown:

From 4d51a5fc8d19431712f2e13dd2d1a0395150e252 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 2 Feb 2026 18:33:27 +0700
Subject: [PATCH 058/236] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
- Remove duplicate images when saving and responding.
---
 app/server/chat.py     | 11 +++++++++++
 app/services/client.py | 22 ++++++++++++++++------
 app/utils/helper.py    |  6 +++---
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 4c64390..b8f611d 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -481,6 +481,17 @@ def _prepare_messages_for_model(
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
 
+    # Resolve tool names for 'tool' messages by looking back at previous assistant tool calls
+    tool_id_to_name = {}
+    for msg in prepared:
+        if msg.role == "assistant" and msg.tool_calls:
+            for tc in msg.tool_calls:
+                tool_id_to_name[tc.id] = tc.function.name
+
+    for msg in prepared:
+        if msg.role == "tool" and not msg.name and msg.tool_call_id:
+            msg.name = tool_id_to_name.get(msg.tool_call_id)
+
     instructions: list[str] = []
     if inject_system_defaults:
         if tools:
diff --git a/app/services/client.py b/app/services/client.py
index dd1d74f..803bc23 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -86,15 +86,15 @@ async def process_message(
 
         if isinstance(message.content, str):
             # Pure text content
-            if message.content:
-                text_fragments.append(message.content)
+            if message.content or message.role == "tool":
+                text_fragments.append(message.content or "")
         elif isinstance(message.content, list):
             # Mixed content (text, image_url, or file)
             for item in message.content:
                 if item.type == "text":
                     # Append multiple text fragments
-                    if item.text:
-                        text_fragments.append(item.text)
+                    if item.text or message.role == "tool":
+                        text_fragments.append(item.text or "")
 
                 elif item.type == "image_url":
                     if not item.image_url:
@@ -114,9 +114,19 @@ async def process_message(
                         files.append(await save_url_to_tempfile(url, tempdir))
                     else:
                         raise ValueError("File must contain 'file_data' or 'url' key")
+        elif message.content is None and message.role == "tool":
+            text_fragments.append("")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
+        # Special handling for tool response format
+        if message.role == "tool":
+            tool_name = message.name or "unknown"
+            combined_content = "\n".join(text_fragments)
+            text_fragments = [
+                f'```xml\n<tool_response name="{tool_name}">{combined_content}</tool_response>\n```'
+            ]
+
         if message.tool_calls:
             tool_blocks: list[str] = []
             for call in message.tool_calls:
@@ -135,10 +145,10 @@ async def process_message(
                 tool_section = "```xml\n" + "".join(tool_blocks) + "\n```"
                 text_fragments.append(tool_section)
 
-        model_input = "\n".join(fragment for fragment in text_fragments if fragment)
+        model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
 
         # Add role tag if needed
-        if model_input:
+        if model_input or message.role == "tool":
             if tagged:
                 model_input = add_tag(message.role, model_input)
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 7606dd3..38b6400 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -110,9 +110,9 @@ def strip_code_fence(text: str) -> str:
 
 
 def strip_tagged_blocks(text: str) -> str:
-    """Remove <|im_start|>role ... <|im_end|> sections, dropping tool blocks entirely.
-    - tool blocks are removed entirely (if missing end marker, drop to EOF).
-    - other roles: remove markers and role, keep inner content (if missing end marker, keep to EOF).
+    """Remove <|im_start|>role ... <|im_end|> sections.
+    - tool blocks are removed entirely (including content).
+    - other roles: remove markers and role, keep inner content.
     """
     if not text:
         return text

From d69aaf02f2b6b7ff331564b526178a447c3b49e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 2 Feb 2026 18:52:51 +0700
Subject: [PATCH 059/236] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
- Remove duplicate images when saving and responding.
---
 app/services/client.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/services/client.py b/app/services/client.py
index 803bc23..4146b7e 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -122,7 +122,10 @@ async def process_message(
         # Special handling for tool response format
         if message.role == "tool":
             tool_name = message.name or "unknown"
-            combined_content = "\n".join(text_fragments)
+            combined_content = "\n".join(text_fragments).strip()
+            # If the tool result is literally empty, provide a clear indicator like empty JSON
+            if not combined_content:
+                combined_content = "{}"
             text_fragments = [
                 f'```xml\n<tool_response name="{tool_name}">{combined_content}</tool_response>\n```'
             ]

From 8e15a8698d4a3df53a7bc3f676c63c0a492c9a01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 2 Feb 2026 19:27:08 +0700
Subject: [PATCH 060/236] Enable real-time streaming responses and completely
 solve the issue with reusable sessions.

- Ensure that PR https://github.com/HanaokaYuzu/Gemini-API/pull/220 is merged before proceeding with this PR.
- Introducing a new feature for real-time streaming responses.
- Fully resolve the problem with reusable sessions.
- Break down similar flow logic into helper functions.
- All endpoints now support inline Markdown images.
- Switch large prompts to use BytesIO to avoid reading and writing to disk.
- Remove duplicate images when saving and responding.
---
 app/services/client.py | 43 +++++-------------------------------------
 1 file changed, 5 insertions(+), 38 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 4146b7e..a35146f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -85,17 +85,13 @@ async def process_message(
         text_fragments: list[str] = []
 
         if isinstance(message.content, str):
-            # Pure text content
             if message.content or message.role == "tool":
-                text_fragments.append(message.content or "")
+                text_fragments.append(message.content or "{}")
         elif isinstance(message.content, list):
-            # Mixed content (text, image_url, or file)
             for item in message.content:
                 if item.type == "text":
-                    # Append multiple text fragments
                     if item.text or message.role == "tool":
-                        text_fragments.append(item.text or "")
-
+                        text_fragments.append(item.text or "{}")
                 elif item.type == "image_url":
                     if not item.image_url:
                         raise ValueError("Image URL cannot be empty")
@@ -103,7 +99,6 @@ async def process_message(
                         files.append(await save_url_to_tempfile(url, tempdir))
                     else:
                         raise ValueError("Image URL must contain 'url' key")
-
                 elif item.type == "file":
                     if not item.file:
                         raise ValueError("File cannot be empty")
@@ -115,19 +110,15 @@ async def process_message(
                     else:
                         raise ValueError("File must contain 'file_data' or 'url' key")
         elif message.content is None and message.role == "tool":
-            text_fragments.append("")
+            text_fragments.append("{}")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
-        # Special handling for tool response format
         if message.role == "tool":
             tool_name = message.name or "unknown"
-            combined_content = "\n".join(text_fragments).strip()
-            # If the tool result is literally empty, provide a clear indicator like empty JSON
-            if not combined_content:
-                combined_content = "{}"
+            combined_content = "\n".join(text_fragments).strip() or "{}"
             text_fragments = [
-                f'```xml\n<tool_response name="{tool_name}">{combined_content}</tool_response>\n```'
+                f'<tool_response name="{tool_name}">{combined_content}</tool_response>'
             ]
 
         if message.tool_calls:
@@ -138,7 +129,6 @@ async def process_message(
                     parsed_args = orjson.loads(args_text)
                     args_text = orjson.dumps(parsed_args).decode("utf-8")
                 except orjson.JSONDecodeError:
-                    # Leave args_text as is if it is not valid JSON
                     pass
                 tool_blocks.append(
                     f'<tool_call name="{call.function.name}">{args_text}</tool_call>'
@@ -150,7 +140,6 @@ async def process_message(
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
 
-        # Add role tag if needed
         if model_input or message.role == "tool":
             if tagged:
                 model_input = add_tag(message.role, model_input)
@@ -161,51 +150,30 @@ async def process_message(
     async def process_conversation(
         messages: list[Message], tempdir: Path | None = None
     ) -> tuple[str, list[Path | str]]:
-        """
-        Process the entire conversation and return a formatted string and list of
-        files. The last message is assumed to be the assistant's response.
-        """
-        # Determine once whether we need to wrap messages with role tags: only required
-        # if the history already contains assistant/system messages. When every message
-        # so far is from the user, we can skip tagging entirely.
         need_tag = any(m.role != "user" for m in messages)
-
         conversation: list[str] = []
         files: list[Path | str] = []
-
         for msg in messages:
             input_part, files_part = await GeminiClientWrapper.process_message(
                 msg, tempdir, tagged=need_tag
             )
             conversation.append(input_part)
             files.extend(files_part)
-
-        # Append an opening assistant tag only when we used tags above so that Gemini
-        # knows where to start its reply.
         if need_tag:
             conversation.append(add_tag("assistant", "", unclose=True))
-
         return "\n".join(conversation), files
 
     @staticmethod
     def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
-        """
-        Extract and format the output text from a ModelOutput.
-        Includes reasoning thoughts (wrapped in <think> tags) and unescapes content.
-        """
         text = ""
-
         if include_thoughts and response.thoughts:
             text += f"<think>{response.thoughts}</think>\n"
-
         if response.text:
             text += response.text
         else:
             text += str(response)
 
-        # Fix some escaped characters
         def _unescape_html(text_content: str) -> str:
-            """Unescape HTML entities only in non-code sections of the text."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):
@@ -220,7 +188,6 @@ def _unescape_html(text_content: str) -> str:
             return "".join(parts)
 
         def _unescape_markdown(text_content: str) -> str:
-            """Remove backslash escapes for Markdown characters in non-code sections."""
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):

From 7716c62a8df23b6557841e5e4cdd571b025d5e4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 09:33:18 +0700
Subject: [PATCH 061/236] build: update dependencies

---
 pyproject.toml |  4 +--
 uv.lock        | 82 ++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 61 insertions(+), 25 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1c30f8e..dc08571 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,10 +6,10 @@ readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
     "fastapi>=0.128.0",
-    "gemini-webapi>=1.17.3",
+    "gemini-webapi>=1.18.0",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
-    "orjson>=3.11.5",
+    "orjson>=3.11.7",
     "pydantic-settings[yaml]>=2.12.0",
     "uvicorn>=0.40.0",
     "uvloop>=0.22.1; sys_platform != 'win32'",
diff --git a/uv.lock b/uv.lock
index 50a73be..34a949c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -106,10 +106,10 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "fastapi", specifier = ">=0.128.0" },
-    { name = "gemini-webapi", specifier = ">=1.17.3" },
+    { name = "gemini-webapi", specifier = ">=1.18.0" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
-    { name = "orjson", specifier = ">=3.11.5" },
+    { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" },
     { name = "uvicorn", specifier = ">=0.40.0" },
@@ -122,17 +122,17 @@ dev = [{ name = "ruff", specifier = ">=0.14.14" }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.17.3"
+version = "1.18.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "httpx" },
+    { name = "httpx", extra = ["http2"] },
     { name = "loguru" },
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/aa/74/1a31f3605250eb5cbcbfb15559c43b0d71734c8d286cfa9a7833841306e3/gemini_webapi-1.17.3.tar.gz", hash = "sha256:6201f9eaf5f562c5dc589d71c0edbba9e2eb8f780febbcf35307697bf474d577", size = 259418, upload-time = "2025-12-05T22:38:44.426Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/03/eb06536f287a8b7fb4808b00a60d9a9a3694f8a4079b77730325c639fbbe/gemini_webapi-1.18.0.tar.gz", hash = "sha256:0688a080fc3c95be55e723a66b2b69ec3ffcd58b07c50cf627d85d59d1181a86", size = 264630, upload-time = "2026-02-03T01:18:39.794Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4c/a3/a88ff45197dce68a81d92c8d40368e4c26f67faf3af3273357f3f71f5c3d/gemini_webapi-1.17.3-py3-none-any.whl", hash = "sha256:d83969b1fa3236f3010d856d191b35264c936ece81f1be4c1de53ec1cf0855c8", size = 56659, upload-time = "2025-12-05T22:38:42.93Z" },
+    { url = "https://files.pythonhosted.org/packages/40/33/85f520f56faddd68442c7efe7086ff5593b213bd8fc3768835dbe610fd9b/gemini_webapi-1.18.0-py3-none-any.whl", hash = "sha256:2fe25b5f8185aba1ca109e1280ef3eb79e5bd8a81fba16e01fbc4a177b72362c", size = 61523, upload-time = "2026-02-03T01:18:38.322Z" },
 ]
 
 [[package]]
@@ -144,6 +144,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 
+[[package]]
+name = "h2"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "hpack" },
+    { name = "hyperframe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
+]
+
+[[package]]
+name = "hpack"
+version = "4.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" },
+]
+
 [[package]]
 name = "httpcore"
 version = "1.0.9"
@@ -172,6 +194,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
 ]
 
+[package.optional-dependencies]
+http2 = [
+    { name = "h2" },
+]
+
+[[package]]
+name = "hyperframe"
+version = "6.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.11"
@@ -211,25 +247,25 @@ wheels = [
 
 [[package]]
 name = "orjson"
-version = "3.11.5"
+version = "3.11.7"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347, upload-time = "2025-12-06T15:55:39.458Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347, upload-time = "2025-12-06T15:54:22.061Z" },
-    { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435, upload-time = "2025-12-06T15:54:23.615Z" },
-    { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074, upload-time = "2025-12-06T15:54:24.694Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520, upload-time = "2025-12-06T15:54:26.185Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209, upload-time = "2025-12-06T15:54:27.264Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837, upload-time = "2025-12-06T15:54:28.75Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307, upload-time = "2025-12-06T15:54:29.856Z" },
-    { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020, upload-time = "2025-12-06T15:54:31.024Z" },
-    { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099, upload-time = "2025-12-06T15:54:32.196Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540, upload-time = "2025-12-06T15:54:33.361Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530, upload-time = "2025-12-06T15:54:34.6Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863, upload-time = "2025-12-06T15:54:35.801Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255, upload-time = "2025-12-06T15:54:37.209Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252, upload-time = "2025-12-06T15:54:38.401Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777, upload-time = "2025-12-06T15:54:39.515Z" },
+    { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" },
+    { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" },
+    { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" },
+    { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" },
+    { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" },
+    { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" },
+    { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" },
+    { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" },
+    { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" },
 ]
 
 [[package]]

From 61672cc46948a501a0f2af3761eb231e40ec6831 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 11:44:54 +0700
Subject: [PATCH 062/236] Refactor: Use `strip_system_hints` to standardize the
 content.

---
 app/services/client.py |  4 +++-
 app/services/lmdb.py   | 22 +++-------------------
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index a35146f..89ad3ba 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -127,7 +127,9 @@ async def process_message(
                 args_text = call.function.arguments.strip()
                 try:
                     parsed_args = orjson.loads(args_text)
-                    args_text = orjson.dumps(parsed_args).decode("utf-8")
+                    args_text = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
+                        "utf-8"
+                    )
                 except orjson.JSONDecodeError:
                     pass
                 tool_blocks.append(
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 6ab2302..d5424e0 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -12,12 +12,9 @@
 from ..models import ContentItem, ConversationInStore, Message
 from ..utils import g_config
 from ..utils.helper import (
-    CODE_BLOCK_HINT,
-    CODE_HINT_STRIPPED,
-    XML_HINT_STRIPPED,
-    XML_WRAP_HINT,
     extract_tool_calls,
     remove_tool_call_blocks,
+    strip_system_hints,
 )
 from ..utils.singleton import Singleton
 
@@ -41,14 +38,7 @@ def _hash_message(message: Message) -> str:
         normalized = content.replace("\r\n", "\n")
 
         normalized = LMDBConversationStore.remove_think_tags(normalized)
-
-        for hint in [
-            XML_WRAP_HINT,
-            XML_HINT_STRIPPED,
-            CODE_BLOCK_HINT,
-            CODE_HINT_STRIPPED,
-        ]:
-            normalized = normalized.replace(hint, "")
+        normalized = strip_system_hints(normalized)
 
         if message.tool_calls:
             normalized = remove_tool_call_blocks(normalized)
@@ -70,13 +60,7 @@ def _hash_message(message: Message) -> str:
             if text_val:
                 text_val = text_val.replace("\r\n", "\n")
                 text_val = LMDBConversationStore.remove_think_tags(text_val)
-                for hint in [
-                    XML_WRAP_HINT,
-                    XML_HINT_STRIPPED,
-                    CODE_BLOCK_HINT,
-                    CODE_HINT_STRIPPED,
-                ]:
-                    text_val = text_val.replace(hint, "")
+                text_val = strip_system_hints(text_val)
                 text_val = remove_tool_call_blocks(text_val).strip()
                 if text_val:
                     text_parts.append(text_val)

From cc0b13f40b0ed875ba9f1274101c7c9be49e8e52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 19:04:01 +0700
Subject: [PATCH 063/236] Refactor: Only inject code block hint if NOT a
 structured response request

---
 app/server/chat.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index b8f611d..608b52f 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -477,6 +477,7 @@ def _prepare_messages_for_model(
     tool_choice: str | ToolChoiceFunction | None,
     extra_instructions: list[str] | None = None,
     inject_system_defaults: bool = True,
+    is_structured: bool = False,
 ) -> list[Message]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
@@ -505,7 +506,8 @@ def _prepare_messages_for_model(
                 f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
             )
 
-        if not _conversation_has_code_hint(prepared):
+        # Only inject code block hint if NOT a structured response request
+        if not is_structured and not _conversation_has_code_hint(prepared):
             instructions.append(CODE_BLOCK_HINT)
             logger.debug("Injected default code block hint for Gemini conversation.")
 
@@ -1326,7 +1328,11 @@ async def create_chat_completion(
 
     # This ensures that server-injected system instructions are part of the history
     msgs = _prepare_messages_for_model(
-        request.messages, request.tools, request.tool_choice, extra_instr
+        request.messages,
+        request.tools,
+        request.tool_choice,
+        extra_instr,
+        is_structured=structured_requirement is not None,
     )
 
     session, client, remain = await _find_reusable_session(db, pool, model, msgs)
@@ -1338,7 +1344,12 @@ async def create_chat_completion(
         # For reused sessions, we only need to process the remaining messages.
         # We don't re-inject system defaults to avoid duplicating instructions already in history.
         input_msgs = _prepare_messages_for_model(
-            remain, request.tools, request.tool_choice, extra_instr, False
+            remain,
+            request.tools,
+            request.tool_choice,
+            extra_instr,
+            False,
+            is_structured=structured_requirement is not None,
         )
         if len(input_msgs) == 1:
             m_input, files = await GeminiClientWrapper.process_message(
@@ -1492,7 +1503,11 @@ async def create_response(
     )
 
     messages = _prepare_messages_for_model(
-        conv_messages, standard_tools or None, model_tool_choice, extra_instr or None
+        conv_messages,
+        standard_tools or None,
+        model_tool_choice,
+        extra_instr or None,
+        is_structured=struct_req is not None,
     )
     pool, db = GeminiClientPool(), LMDBConversationStore()
     try:
@@ -1502,7 +1517,14 @@ async def create_response(
 
     session, client, remain = await _find_reusable_session(db, pool, model, messages)
     if session:
-        msgs = _prepare_messages_for_model(remain, request.tools, request.tool_choice, None, False)
+        msgs = _prepare_messages_for_model(
+            remain,
+            request.tools,
+            request.tool_choice,
+            None,
+            False,
+            is_structured=struct_req is not None,
+        )
         if not msgs:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
         m_input, files = (

From 6b90e5d15d942c96ccfd272f9cb9ef23e4f7ac31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 20:43:18 +0700
Subject: [PATCH 064/236] Refactor: Remove the code block hint entirely

---
 app/server/chat.py  | 56 +++++----------------------------------------
 app/utils/helper.py | 16 -------------
 2 files changed, 6 insertions(+), 66 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 608b52f..43f5e12 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -41,15 +41,12 @@
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
 from ..utils import g_config
 from ..utils.helper import (
-    CODE_BLOCK_HINT,
-    CODE_HINT_STRIPPED,
-    CONTROL_TOKEN_RE,
     XML_HINT_STRIPPED,
     XML_WRAP_HINT,
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
-    strip_code_fence,
+    strip_system_hints,
     text_from_message,
 )
 from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
@@ -225,10 +222,9 @@ def _process_llm_output(
 
     if structured_requirement:
         cleaned_for_json = LMDBConversationStore.remove_think_tags(visible_output)
-        json_text = strip_code_fence(cleaned_for_json or "")
-        if json_text:
+        if cleaned_for_json:
             try:
-                structured_payload = orjson.loads(json_text)
+                structured_payload = orjson.loads(cleaned_for_json)
                 canonical_output = orjson.dumps(structured_payload).decode("utf-8")
                 visible_output = canonical_output
                 storage_output = canonical_output
@@ -450,27 +446,6 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
             return
 
 
-def _conversation_has_code_hint(messages: list[Message]) -> bool:
-    """Return True if any system message already includes the code block hint."""
-    for msg in messages:
-        if msg.role != "system" or msg.content is None:
-            continue
-
-        if isinstance(msg.content, str):
-            if CODE_HINT_STRIPPED in msg.content:
-                return True
-            continue
-
-        if isinstance(msg.content, list):
-            for part in msg.content:
-                if getattr(part, "type", None) != "text":
-                    continue
-                if part.text and CODE_HINT_STRIPPED in part.text:
-                    return True
-
-    return False
-
-
 def _prepare_messages_for_model(
     source_messages: list[Message],
     tools: list[Tool] | None,
@@ -506,11 +481,6 @@ def _prepare_messages_for_model(
                 f"Applied {len(extra_instructions)} extra instructions for tool/structured output."
             )
 
-        # Only inject code block hint if NOT a structured response request
-        if not is_structured and not _conversation_has_code_hint(prepared):
-            instructions.append(CODE_BLOCK_HINT)
-            logger.debug("Injected default code block hint for Gemini conversation.")
-
     if not instructions:
         if tools and tool_choice != "none":
             _append_xml_hint_to_last_user_message(prepared)
@@ -791,7 +761,7 @@ class StreamingOutputFilter:
     2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|>
     3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content)
     4. Control tokens: <|im_start|>, <|im_end|>
-    5. System instructions/hints: XML_WRAP_HINT, CODE_BLOCK_HINT, etc.
+    5. System instructions/hints.
     """
 
     def __init__(self):
@@ -805,12 +775,6 @@ def __init__(self):
         self.XML_END = "```"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
-        self.SYSTEM_HINTS = [
-            XML_WRAP_HINT,
-            XML_HINT_STRIPPED,
-            CODE_BLOCK_HINT,
-            CODE_HINT_STRIPPED,
-        ]
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
@@ -906,11 +870,7 @@ def process(self, chunk: str) -> str:
                     break
 
         # Final pass: filter out system hints from the text to be yielded
-        for hint in self.SYSTEM_HINTS:
-            if hint in to_yield:
-                to_yield = to_yield.replace(hint, "")
-
-        return to_yield
+        return strip_system_hints(to_yield)
 
     def flush(self) -> str:
         # If we are stuck in a tool block or role header at the end,
@@ -922,11 +882,7 @@ def flush(self) -> str:
         self.buffer = ""
 
         # Filter out any orphaned/partial control tokens or hints
-        final_text = CONTROL_TOKEN_RE.sub("", final_text)
-        for hint in self.SYSTEM_HINTS:
-            final_text = final_text.replace(hint, "")
-
-        return final_text.strip()
+        return strip_system_hints(final_text)
 
 
 # --- Response Builders & Streaming ---
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 38b6400..1281f9b 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,19 +19,12 @@
     '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
     "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
 )
-CODE_BLOCK_HINT = (
-    "\nWhenever you include code, markup, or shell snippets, wrap each snippet in a Markdown fenced "
-    "block and supply the correct language label (for example, ```python ... ``` or ```html ... ```).\n"
-    "Fence ONLY the actual code/markup; keep all narrative or explanatory text outside the fences.\n"
-)
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
     r"<tool_call\s+name=\"([^\"]+)\"\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
 )
-JSON_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
-CODE_HINT_STRIPPED = CODE_BLOCK_HINT.strip()
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -101,14 +94,6 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     return path
 
 
-def strip_code_fence(text: str) -> str:
-    """Remove surrounding ```json fences if present."""
-    match = JSON_FENCE_RE.match(text.strip())
-    if match:
-        return match.group(1).strip()
-    return text.strip()
-
-
 def strip_tagged_blocks(text: str) -> str:
     """Remove <|im_start|>role ... <|im_end|> sections.
     - tool blocks are removed entirely (including content).
@@ -166,7 +151,6 @@ def strip_system_hints(text: str) -> str:
         return text
     cleaned = strip_tagged_blocks(text)
     cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
-    cleaned = cleaned.replace(CODE_BLOCK_HINT, "").replace(CODE_HINT_STRIPPED, "")
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
     return cleaned.strip()
 

From 553bd94b4631832694de40fe7246063ab359fc46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 20:52:54 +0700
Subject: [PATCH 065/236] Refactor: Remove the code block hint entirely

---
 app/server/chat.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 43f5e12..0bb2722 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -452,7 +452,6 @@ def _prepare_messages_for_model(
     tool_choice: str | ToolChoiceFunction | None,
     extra_instructions: list[str] | None = None,
     inject_system_defaults: bool = True,
-    is_structured: bool = False,
 ) -> list[Message]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
@@ -1288,7 +1287,6 @@ async def create_chat_completion(
         request.tools,
         request.tool_choice,
         extra_instr,
-        is_structured=structured_requirement is not None,
     )
 
     session, client, remain = await _find_reusable_session(db, pool, model, msgs)
@@ -1305,7 +1303,6 @@ async def create_chat_completion(
             request.tool_choice,
             extra_instr,
             False,
-            is_structured=structured_requirement is not None,
         )
         if len(input_msgs) == 1:
             m_input, files = await GeminiClientWrapper.process_message(
@@ -1463,7 +1460,6 @@ async def create_response(
         standard_tools or None,
         model_tool_choice,
         extra_instr or None,
-        is_structured=struct_req is not None,
     )
     pool, db = GeminiClientPool(), LMDBConversationStore()
     try:
@@ -1479,7 +1475,6 @@ async def create_response(
             request.tool_choice,
             None,
             False,
-            is_structured=struct_req is not None,
         )
         if not msgs:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")

From fd767dad79266486f14bd5610054220881bc9a73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Feb 2026 22:40:38 +0700
Subject: [PATCH 066/236] Refactor: fix missing whitespace in the streaming
 response.

---
 app/utils/helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 1281f9b..a8b40aa 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -152,7 +152,7 @@ def strip_system_hints(text: str) -> str:
     cleaned = strip_tagged_blocks(text)
     cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
-    return cleaned.strip()
+    return cleaned
 
 
 def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:

From 4beb33bb5a27368f0a14c769c213aea44103c100 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 07:24:13 +0700
Subject: [PATCH 067/236] Refactor: remove unnecessary code

---
 app/services/lmdb.py | 13 +------------
 app/utils/helper.py  |  2 +-
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index d5424e0..8dc3722 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -14,7 +14,6 @@
 from ..utils.helper import (
     extract_tool_calls,
     remove_tool_call_blocks,
-    strip_system_hints,
 )
 from ..utils.singleton import Singleton
 
@@ -36,17 +35,8 @@ def _hash_message(message: Message) -> str:
         core_data["content"] = None
     elif isinstance(content, str):
         normalized = content.replace("\r\n", "\n")
-
         normalized = LMDBConversationStore.remove_think_tags(normalized)
-        normalized = strip_system_hints(normalized)
-
-        if message.tool_calls:
-            normalized = remove_tool_call_blocks(normalized)
-        else:
-            temp_text, _extracted = extract_tool_calls(normalized)
-            normalized = temp_text
-
-        normalized = normalized.strip()
+        normalized = remove_tool_call_blocks(normalized).strip()
         core_data["content"] = normalized if normalized else None
     elif isinstance(content, list):
         text_parts = []
@@ -60,7 +50,6 @@ def _hash_message(message: Message) -> str:
             if text_val:
                 text_val = text_val.replace("\r\n", "\n")
                 text_val = LMDBConversationStore.remove_think_tags(text_val)
-                text_val = strip_system_hints(text_val)
                 text_val = remove_tool_call_blocks(text_val).strip()
                 if text_val:
                     text_parts.append(text_val)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index a8b40aa..b6bb5cb 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -16,7 +16,7 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 XML_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
-    '```xml\n<tool_call name="tool_name">{"arg": "value"}</tool_call>\n```\n'
+    '```xml\n<tool_call name="tool_name">{"argument": "value"}</tool_call>\n```\n'
     "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
 )
 TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)

From 6b8dd4e5b893e689521efe93261825e95f1a1a84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 09:35:21 +0700
Subject: [PATCH 068/236] Refactor: Update `StreamingOutputFilter` logic to
 improve handling of streaming responses

---
 app/server/chat.py  | 187 ++++++++++++++++++++++++--------------------
 app/utils/helper.py |  16 +++-
 2 files changed, 115 insertions(+), 88 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0bb2722..87f29a6 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -41,6 +41,8 @@
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
 from ..utils import g_config
 from ..utils.helper import (
+    XML_HINT_LINE_END,
+    XML_HINT_LINE_START,
     XML_HINT_STRIPPED,
     XML_WRAP_HINT,
     estimate_tokens,
@@ -755,133 +757,146 @@ async def _send_with_split(
 
 class StreamingOutputFilter:
     """
-    Enhanced streaming filter that suppresses:
-    1. XML tool call blocks: ```xml ... ```
-    2. ChatML tool blocks: <|im_start|>tool\n...<|im_end|>
-    3. ChatML role headers: <|im_start|>role\n (only suppresses the header, keeps content)
-    4. Control tokens: <|im_start|>, <|im_end|>
-    5. System instructions/hints.
+    Simplified State Machine filter to suppress technical markers, tool calls, and system hints.
+    States: NORMAL, IN_XML, IN_TAG, IN_BLOCK, IN_HINT
     """
 
     def __init__(self):
         self.buffer = ""
-        self.in_xml_tool = False
-        self.in_tagged_block = False
-        self.in_role_header = False
+        self.state = "NORMAL"
         self.current_role = ""
+        self.block_buffer = ""
 
         self.XML_START = "```xml"
         self.XML_END = "```"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
+        self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else ""
+        self.HINT_END = XML_HINT_LINE_END
+
+        self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END]
+        if self.HINT_START:
+            self.WATCH_PREFIXES.append(self.HINT_START)
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
-        to_yield = ""
+        output = []
 
         while self.buffer:
-            if self.in_xml_tool:
+            if self.state == "NORMAL":
+                xml_idx = self.buffer.find(self.XML_START)
+                tag_idx = self.buffer.find(self.TAG_START)
+                end_idx = self.buffer.find(self.TAG_END)
+                hint_idx = self.buffer.find(self.HINT_START)
+
+                indices = [
+                    (i, t)
+                    for i, t in [
+                        (xml_idx, "XML"),
+                        (tag_idx, "TAG"),
+                        (end_idx, "END"),
+                        (hint_idx, "HINT"),
+                    ]
+                    if i != -1
+                ]
+
+                if not indices:
+                    keep_len = 0
+                    for p in self.WATCH_PREFIXES:
+                        for i in range(len(p) - 1, 0, -1):
+                            if self.buffer.endswith(p[:i]):
+                                keep_len = max(keep_len, i)
+                                break
+
+                    yield_len = len(self.buffer) - keep_len
+                    if yield_len > 0:
+                        output.append(self.buffer[:yield_len])
+                        self.buffer = self.buffer[yield_len:]
+                    break
+
+                indices.sort()
+                idx, m_type = indices[0]
+                output.append(self.buffer[:idx])
+                self.buffer = self.buffer[idx:]
+
+                if m_type == "XML":
+                    self.state = "IN_XML"
+                    self.block_buffer = ""
+                    self.buffer = self.buffer[len(self.XML_START) :]
+                elif m_type == "TAG":
+                    self.state = "IN_TAG"
+                    self.buffer = self.buffer[len(self.TAG_START) :]
+                elif m_type == "END":
+                    self.buffer = self.buffer[len(self.TAG_END) :]
+                elif m_type == "HINT":
+                    self.state = "IN_HINT"
+                    self.buffer = self.buffer[len(self.HINT_START) :]
+
+            elif self.state == "IN_HINT":
+                end_idx = self.buffer.find(self.HINT_END)
+                if end_idx != -1:
+                    self.buffer = self.buffer[end_idx + len(self.HINT_END) :]
+                    self.state = "NORMAL"
+                else:
+                    self.buffer = ""
+                    break
+
+            elif self.state == "IN_XML":
                 end_idx = self.buffer.find(self.XML_END)
                 if end_idx != -1:
+                    content = self.block_buffer + self.buffer[:end_idx]
+                    if "<tool_call" not in content.lower():
+                        output.append(f"{self.XML_START}{content}{self.XML_END}")
                     self.buffer = self.buffer[end_idx + len(self.XML_END) :]
-                    self.in_xml_tool = False
+                    self.state = "NORMAL"
                 else:
+                    self.block_buffer += self.buffer
+                    self.buffer = ""
                     break
-            elif self.in_role_header:
+
+            elif self.state == "IN_TAG":
                 nl_idx = self.buffer.find("\n")
                 if nl_idx != -1:
-                    role_text = self.buffer[:nl_idx].strip().lower()
-                    self.current_role = role_text
+                    self.current_role = self.buffer[:nl_idx].strip().lower()
                     self.buffer = self.buffer[nl_idx + 1 :]
-                    self.in_role_header = False
-                    self.in_tagged_block = True
+                    self.state = "IN_BLOCK"
                 else:
                     break
-            elif self.in_tagged_block:
+
+            elif self.state == "IN_BLOCK":
                 end_idx = self.buffer.find(self.TAG_END)
                 if end_idx != -1:
                     content = self.buffer[:end_idx]
                     if self.current_role != "tool":
-                        to_yield += content
+                        output.append(content)
                     self.buffer = self.buffer[end_idx + len(self.TAG_END) :]
-                    self.in_tagged_block = False
+                    self.state = "NORMAL"
                     self.current_role = ""
                 else:
-                    if self.current_role == "tool":
-                        break
-                    else:
+                    if self.current_role != "tool":
                         yield_len = len(self.buffer) - (len(self.TAG_END) - 1)
                         if yield_len > 0:
-                            to_yield += self.buffer[:yield_len]
+                            output.append(self.buffer[:yield_len])
                             self.buffer = self.buffer[yield_len:]
-                        break
-            else:
-                # Outside any special block. Look for starts.
-                earliest_idx = -1
-                match_type = ""
-
-                xml_idx = self.buffer.find(self.XML_START)
-                if xml_idx != -1:
-                    earliest_idx = xml_idx
-                    match_type = "xml"
-
-                tag_s_idx = self.buffer.find(self.TAG_START)
-                if tag_s_idx != -1:
-                    if earliest_idx == -1 or tag_s_idx < earliest_idx:
-                        earliest_idx = tag_s_idx
-                        match_type = "tag_start"
-
-                tag_e_idx = self.buffer.find(self.TAG_END)
-                if tag_e_idx != -1:
-                    if earliest_idx == -1 or tag_e_idx < earliest_idx:
-                        earliest_idx = tag_e_idx
-                        match_type = "tag_end"
-
-                if earliest_idx != -1:
-                    # Yield text before the match
-                    to_yield += self.buffer[:earliest_idx]
-                    self.buffer = self.buffer[earliest_idx:]
-
-                    if match_type == "xml":
-                        self.in_xml_tool = True
-                        self.buffer = self.buffer[len(self.XML_START) :]
-                    elif match_type == "tag_start":
-                        self.in_role_header = True
-                        self.buffer = self.buffer[len(self.TAG_START) :]
-                    elif match_type == "tag_end":
-                        # Orphaned end tag, just skip it
-                        self.buffer = self.buffer[len(self.TAG_END) :]
-                    continue
-                else:
-                    # Check for prefixes
-                    prefixes = [self.XML_START, self.TAG_START, self.TAG_END]
-                    max_keep = 0
-                    for p in prefixes:
-                        for i in range(len(p) - 1, 0, -1):
-                            if self.buffer.endswith(p[:i]):
-                                max_keep = max(max_keep, i)
-                                break
-
-                    yield_len = len(self.buffer) - max_keep
-                    if yield_len > 0:
-                        to_yield += self.buffer[:yield_len]
-                        self.buffer = self.buffer[yield_len:]
+                    else:
+                        self.buffer = ""
                     break
 
-        # Final pass: filter out system hints from the text to be yielded
-        return strip_system_hints(to_yield)
+        return "".join(output)
 
     def flush(self) -> str:
-        # If we are stuck in a tool block or role header at the end,
-        # it usually means malformed output.
-        if self.in_xml_tool or (self.in_tagged_block and self.current_role == "tool"):
-            return ""
+        res = ""
+        if self.state == "IN_XML":
+            if "<tool_call" not in self.block_buffer.lower():
+                res = f"{self.XML_START}{self.block_buffer}"
+        elif self.state == "IN_BLOCK" and self.current_role != "tool":
+            res = self.buffer
+        elif self.state == "NORMAL":
+            res = self.buffer
 
-        final_text = self.buffer
         self.buffer = ""
-
-        # Filter out any orphaned/partial control tokens or hints
-        return strip_system_hints(final_text)
+        self.state = "NORMAL"
+        return strip_system_hints(res)
 
 
 # --- Response Builders & Streaming ---
diff --git a/app/utils/helper.py b/app/utils/helper.py
index b6bb5cb..78494a3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -25,6 +25,9 @@
 )
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
+_hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()]
+XML_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
+XML_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -149,8 +152,17 @@ def strip_system_hints(text: str) -> str:
     """Remove system-level hint text from a given string."""
     if not text:
         return text
-    cleaned = strip_tagged_blocks(text)
-    cleaned = cleaned.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+
+    # Remove the full hints first
+    cleaned = text.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+
+    # Remove fragments using derived constants
+    if XML_HINT_LINE_START:
+        cleaned = re.sub(rf"\n?{re.escape(XML_HINT_LINE_START)}:?\s*", "", cleaned)
+    if XML_HINT_LINE_END:
+        cleaned = re.sub(rf"\s*{re.escape(XML_HINT_LINE_END)}\.?\n?", "", cleaned)
+
+    cleaned = strip_tagged_blocks(cleaned)
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
     return cleaned
 

From d86ae59e5f1037bdbc79c9a84624116f23f6a302 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 20:17:32 +0700
Subject: [PATCH 069/236] Refactor: Adjust function call format to prevent
 streaming issues

Caused by Gemini Web's post-processing mechanism.
---
 app/server/chat.py     | 62 +++++++++++++++++++++++-------------------
 app/services/client.py | 16 +++++------
 app/utils/helper.py    | 12 ++++----
 3 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 87f29a6..ed0d731 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -367,19 +367,17 @@ def _build_tool_prompt(
         )
 
     lines.append(
-        "When you decide to call a tool you MUST respond with nothing except a single fenced block exactly like the template below."
+        "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below."
     )
+    lines.append("Do not add text before or after it.")
+    lines.append("[function_calls]")
+    lines.append('[call:tool_name]{"argument": "value"}[/call]')
+    lines.append("[/function_calls]")
     lines.append(
-        "The fenced block MUST use ```xml as the opening fence and ``` as the closing fence. Do not add text before or after it."
+        "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail."
     )
-    lines.append("```xml")
-    lines.append('<tool_call name="tool_name">{"argument": "value"}</tool_call>')
-    lines.append("```")
     lines.append(
-        "Use double quotes for JSON keys and values. If you omit the fenced block or include any extra text, the system will assume you are NOT calling a tool and your request will fail."
-    )
-    lines.append(
-        "If multiple tool calls are required, include multiple <tool_call> entries inside the same fenced block. Without a tool call, reply normally and do NOT emit any ```xml fence."
+        "If multiple tool calls are required, include multiple [call:...]...[/call] entries inside the same [function_calls] block. Without a tool call, reply normally and do NOT emit any [function_calls] tag."
     )
 
     return "\n".join(lines)
@@ -757,8 +755,8 @@ async def _send_with_split(
 
 class StreamingOutputFilter:
     """
-    Simplified State Machine filter to suppress technical markers, tool calls, and system hints.
-    States: NORMAL, IN_XML, IN_TAG, IN_BLOCK, IN_HINT
+    State Machine filter to suppress technical markers, tool calls, and system hints.
+    Handles fragmentation where markers are split across multiple chunks.
     """
 
     def __init__(self):
@@ -767,12 +765,13 @@ def __init__(self):
         self.current_role = ""
         self.block_buffer = ""
 
-        self.XML_START = "```xml"
-        self.XML_END = "```"
+        self.XML_START = "[function_calls]"
+        self.XML_END = "[/function_calls]"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
         self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else ""
         self.HINT_END = XML_HINT_LINE_END
+        self.TOOL_START = "[call:"
 
         self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END]
         if self.HINT_START:
@@ -787,7 +786,7 @@ def process(self, chunk: str) -> str:
                 xml_idx = self.buffer.find(self.XML_START)
                 tag_idx = self.buffer.find(self.TAG_START)
                 end_idx = self.buffer.find(self.TAG_END)
-                hint_idx = self.buffer.find(self.HINT_START)
+                hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1
 
                 indices = [
                     (i, t)
@@ -801,13 +800,13 @@ def process(self, chunk: str) -> str:
                 ]
 
                 if not indices:
+                    # Guard against split start markers
                     keep_len = 0
                     for p in self.WATCH_PREFIXES:
                         for i in range(len(p) - 1, 0, -1):
                             if self.buffer.endswith(p[:i]):
                                 keep_len = max(keep_len, i)
                                 break
-
                     yield_len = len(self.buffer) - keep_len
                     if yield_len > 0:
                         output.append(self.buffer[:yield_len])
@@ -838,20 +837,24 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[end_idx + len(self.HINT_END) :]
                     self.state = "NORMAL"
                 else:
-                    self.buffer = ""
+                    # Keep end of buffer to avoid missing split HINT_END
+                    keep_len = len(self.HINT_END) - 1
+                    if len(self.buffer) > keep_len:
+                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_XML":
                 end_idx = self.buffer.find(self.XML_END)
                 if end_idx != -1:
-                    content = self.block_buffer + self.buffer[:end_idx]
-                    if "<tool_call" not in content.lower():
-                        output.append(f"{self.XML_START}{content}{self.XML_END}")
+                    self.block_buffer += self.buffer[:end_idx]
                     self.buffer = self.buffer[end_idx + len(self.XML_END) :]
                     self.state = "NORMAL"
                 else:
-                    self.block_buffer += self.buffer
-                    self.buffer = ""
+                    # Accumulate and keep potential split end marker
+                    keep_len = len(self.XML_END) - 1
+                    if len(self.buffer) > keep_len:
+                        self.block_buffer += self.buffer[:-keep_len]
+                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_TAG":
@@ -873,21 +876,24 @@ def process(self, chunk: str) -> str:
                     self.state = "NORMAL"
                     self.current_role = ""
                 else:
+                    # Yield safe part and keep potential split TAG_END
+                    keep_len = len(self.TAG_END) - 1
                     if self.current_role != "tool":
-                        yield_len = len(self.buffer) - (len(self.TAG_END) - 1)
-                        if yield_len > 0:
-                            output.append(self.buffer[:yield_len])
-                            self.buffer = self.buffer[yield_len:]
+                        if len(self.buffer) > keep_len:
+                            output.append(self.buffer[:-keep_len])
+                            self.buffer = self.buffer[-keep_len:]
+                        break
                     else:
-                        self.buffer = ""
-                    break
+                        if len(self.buffer) > keep_len:
+                            self.buffer = self.buffer[-keep_len:]
+                        break
 
         return "".join(output)
 
     def flush(self) -> str:
         res = ""
         if self.state == "IN_XML":
-            if "<tool_call" not in self.block_buffer.lower():
+            if self.TOOL_START not in self.block_buffer.lower():
                 res = f"{self.XML_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
diff --git a/app/services/client.py b/app/services/client.py
index 89ad3ba..bc6c297 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -16,7 +16,7 @@
 )
 
 HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
-MARKDOWN_ESCAPE_RE = re.compile(r"\\(?=[-\\`*_{}\[\]()#+.!<>])")
+ESC_SYMBOLS_RE = re.compile(r"\\(?=[\\\[\]{}()<>`*_#~+.:!&^$|-])")
 CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
@@ -132,12 +132,10 @@ async def process_message(
                     )
                 except orjson.JSONDecodeError:
                     pass
-                tool_blocks.append(
-                    f'<tool_call name="{call.function.name}">{args_text}</tool_call>'
-                )
+                tool_blocks.append(f"[call:{call.function.name}]{args_text}[/call]")
 
             if tool_blocks:
-                tool_section = "```xml\n" + "".join(tool_blocks) + "\n```"
+                tool_section = "[function_calls]\n" + "".join(tool_blocks) + "\n[/function_calls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -189,22 +187,22 @@ def _unescape_html(text_content: str) -> str:
                 parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), tail))
             return "".join(parts)
 
-        def _unescape_markdown(text_content: str) -> str:
+        def _unescape_symbols(text_content: str) -> str:
             parts: list[str] = []
             last_index = 0
             for match in CODE_FENCE_RE.finditer(text_content):
                 non_code = text_content[last_index : match.start()]
                 if non_code:
-                    parts.append(MARKDOWN_ESCAPE_RE.sub("", non_code))
+                    parts.append(ESC_SYMBOLS_RE.sub("", non_code))
                 parts.append(match.group(0))
                 last_index = match.end()
             tail = text_content[last_index:]
             if tail:
-                parts.append(MARKDOWN_ESCAPE_RE.sub("", tail))
+                parts.append(ESC_SYMBOLS_RE.sub("", tail))
             return "".join(parts)
 
         text = _unescape_html(text)
-        text = _unescape_markdown(text)
+        text = _unescape_symbols(text)
 
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 78494a3..5ca812c 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -15,14 +15,14 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 XML_WRAP_HINT = (
-    "\nYou MUST wrap every tool call response inside a single fenced block exactly like:\n"
-    '```xml\n<tool_call name="tool_name">{"argument": "value"}</tool_call>\n```\n'
-    "Do not surround the fence with any other text or whitespace; otherwise the call will be ignored.\n"
+    "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
+    '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n'
+    "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n"
 )
-TOOL_BLOCK_RE = re.compile(r"```xml\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
-TOOL_CALL_RE = re.compile(
-    r"<tool_call\s+name=\"([^\"]+)\"\s*>(.*?)</tool_call>", re.DOTALL | re.IGNORECASE
+TOOL_BLOCK_RE = re.compile(
+    r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE
 )
+TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()]

From db39ad10637754ca4f109614e499e34827429b0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 20:27:40 +0700
Subject: [PATCH 070/236] Refactor: Adjust function call format to prevent
 streaming issues

Caused by Gemini Web's post-processing mechanism.
---
 app/server/chat.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ed0d731..15f59aa 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -377,7 +377,10 @@ def _build_tool_prompt(
         "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail."
     )
     lines.append(
-        "If multiple tool calls are required, include multiple [call:...]...[/call] entries inside the same [function_calls] block. Without a tool call, reply normally and do NOT emit any [function_calls] tag."
+        "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block."
+    )
+    lines.append(
+        "If no tool call is needed, provide a normal response and DO NOT use the [function_calls] tag."
     )
 
     return "\n".join(lines)

From 556a638fc1d34fc377a593fc2e98acd4d9a0ea6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Feb 2026 20:52:44 +0700
Subject: [PATCH 071/236] Refactor: Adjust function call format to prevent
 streaming issues

Caused by Gemini Web's post-processing mechanism.
---
 app/services/client.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index bc6c297..21814e5 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -117,9 +117,7 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip() or "{}"
-            text_fragments = [
-                f'<tool_response name="{tool_name}">{combined_content}</tool_response>'
-            ]
+            text_fragments = [f"[response:{tool_name}]{combined_content}[/response]"]
 
         if message.tool_calls:
             tool_blocks: list[str] = []

From d5fec7a04119ca6b668a4e98d9c6463f11a92eb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 5 Feb 2026 07:12:34 +0700
Subject: [PATCH 072/236] Refactor: Adjust function call format to prevent
 streaming issues

Caused by Gemini Web's post-processing mechanism.
---
 app/server/chat.py  | 67 +++++++++++++++++++++++----------------------
 app/utils/helper.py | 34 ++++++++++++++---------
 2 files changed, 55 insertions(+), 46 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 15f59aa..e56c926 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -41,10 +41,10 @@
 from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
 from ..utils import g_config
 from ..utils.helper import (
-    XML_HINT_LINE_END,
-    XML_HINT_LINE_START,
-    XML_HINT_STRIPPED,
-    XML_WRAP_HINT,
+    TOOL_HINT_LINE_END,
+    TOOL_HINT_LINE_START,
+    TOOL_HINT_STRIPPED,
+    TOOL_WRAP_HINT,
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
@@ -423,15 +423,15 @@ def _build_image_generation_instruction(
     return "\n\n".join(instructions)
 
 
-def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
-    """Ensure the last user message carries the XML wrap hint."""
+def _append_tool_hint_to_last_user_message(messages: list[Message]) -> None:
+    """Ensure the last user message carries the tool wrap hint."""
     for msg in reversed(messages):
         if msg.role != "user" or msg.content is None:
             continue
 
         if isinstance(msg.content, str):
-            if XML_HINT_STRIPPED not in msg.content:
-                msg.content = f"{msg.content}\n{XML_WRAP_HINT}"
+            if TOOL_HINT_STRIPPED not in msg.content:
+                msg.content = f"{msg.content}\n{TOOL_WRAP_HINT}"
             return
 
         if isinstance(msg.content, list):
@@ -439,12 +439,12 @@ def _append_xml_hint_to_last_user_message(messages: list[Message]) -> None:
                 if getattr(part, "type", None) != "text":
                     continue
                 text_value = part.text or ""
-                if XML_HINT_STRIPPED in text_value:
+                if TOOL_HINT_STRIPPED in text_value:
                     return
-                part.text = f"{text_value}\n{XML_WRAP_HINT}"
+                part.text = f"{text_value}\n{TOOL_WRAP_HINT}"
                 return
 
-            messages_text = XML_WRAP_HINT.strip()
+            messages_text = TOOL_WRAP_HINT.strip()
             msg.content.append(ContentItem(type="text", text=messages_text))
             return
 
@@ -485,19 +485,20 @@ def _prepare_messages_for_model(
 
     if not instructions:
         if tools and tool_choice != "none":
-            _append_xml_hint_to_last_user_message(prepared)
+            _append_tool_hint_to_last_user_message(prepared)
         return prepared
 
     combined_instructions = "\n\n".join(instructions)
     if prepared and prepared[0].role == "system" and isinstance(prepared[0].content, str):
         existing = prepared[0].content or ""
-        separator = "\n\n" if existing else ""
-        prepared[0].content = f"{existing}{separator}{combined_instructions}"
+        if combined_instructions not in existing:
+            separator = "\n\n" if existing else ""
+            prepared[0].content = f"{existing}{separator}{combined_instructions}"
     else:
         prepared.insert(0, Message(role="system", content=combined_instructions))
 
     if tools and tool_choice != "none":
-        _append_xml_hint_to_last_user_message(prepared)
+        _append_tool_hint_to_last_user_message(prepared)
 
     return prepared
 
@@ -768,15 +769,15 @@ def __init__(self):
         self.current_role = ""
         self.block_buffer = ""
 
-        self.XML_START = "[function_calls]"
-        self.XML_END = "[/function_calls]"
+        self.TOOL_START = "[function_calls]"
+        self.TOOL_END = "[/function_calls]"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
-        self.HINT_START = f"\n{XML_HINT_LINE_START}" if XML_HINT_LINE_START else ""
-        self.HINT_END = XML_HINT_LINE_END
-        self.TOOL_START = "[call:"
+        self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
+        self.HINT_END = TOOL_HINT_LINE_END
+        self.TOOL_PREFIX = "[call:"
 
-        self.WATCH_PREFIXES = [self.XML_START, self.TAG_START, self.TAG_END]
+        self.WATCH_PREFIXES = [self.TOOL_START, self.TAG_START, self.TAG_END]
         if self.HINT_START:
             self.WATCH_PREFIXES.append(self.HINT_START)
 
@@ -786,7 +787,7 @@ def process(self, chunk: str) -> str:
 
         while self.buffer:
             if self.state == "NORMAL":
-                xml_idx = self.buffer.find(self.XML_START)
+                tool_idx = self.buffer.find(self.TOOL_START)
                 tag_idx = self.buffer.find(self.TAG_START)
                 end_idx = self.buffer.find(self.TAG_END)
                 hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1
@@ -794,7 +795,7 @@ def process(self, chunk: str) -> str:
                 indices = [
                     (i, t)
                     for i, t in [
-                        (xml_idx, "XML"),
+                        (tool_idx, "TOOL"),
                         (tag_idx, "TAG"),
                         (end_idx, "END"),
                         (hint_idx, "HINT"),
@@ -821,10 +822,10 @@ def process(self, chunk: str) -> str:
                 output.append(self.buffer[:idx])
                 self.buffer = self.buffer[idx:]
 
-                if m_type == "XML":
-                    self.state = "IN_XML"
+                if m_type == "TOOL":
+                    self.state = "IN_TOOL"
                     self.block_buffer = ""
-                    self.buffer = self.buffer[len(self.XML_START) :]
+                    self.buffer = self.buffer[len(self.TOOL_START) :]
                 elif m_type == "TAG":
                     self.state = "IN_TAG"
                     self.buffer = self.buffer[len(self.TAG_START) :]
@@ -846,15 +847,15 @@ def process(self, chunk: str) -> str:
                         self.buffer = self.buffer[-keep_len:]
                     break
 
-            elif self.state == "IN_XML":
-                end_idx = self.buffer.find(self.XML_END)
+            elif self.state == "IN_TOOL":
+                end_idx = self.buffer.find(self.TOOL_END)
                 if end_idx != -1:
                     self.block_buffer += self.buffer[:end_idx]
-                    self.buffer = self.buffer[end_idx + len(self.XML_END) :]
+                    self.buffer = self.buffer[end_idx + len(self.TOOL_END) :]
                     self.state = "NORMAL"
                 else:
                     # Accumulate and keep potential split end marker
-                    keep_len = len(self.XML_END) - 1
+                    keep_len = len(self.TOOL_END) - 1
                     if len(self.buffer) > keep_len:
                         self.block_buffer += self.buffer[:-keep_len]
                         self.buffer = self.buffer[-keep_len:]
@@ -895,9 +896,9 @@ def process(self, chunk: str) -> str:
 
     def flush(self) -> str:
         res = ""
-        if self.state == "IN_XML":
-            if self.TOOL_START not in self.block_buffer.lower():
-                res = f"{self.XML_START}{self.block_buffer}"
+        if self.state == "IN_TOOL":
+            if self.TOOL_PREFIX not in self.block_buffer.lower():
+                res = f"{self.TOOL_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
         elif self.state == "NORMAL":
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 5ca812c..99c3d84 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,7 +14,7 @@
 from ..models import FunctionCall, Message, ToolCall
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
-XML_WRAP_HINT = (
+TOOL_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
     '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n'
     "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n"
@@ -24,10 +24,10 @@
 )
 TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
-XML_HINT_STRIPPED = XML_WRAP_HINT.strip()
-_hint_lines = [line.strip() for line in XML_WRAP_HINT.split("\n") if line.strip()]
-XML_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
-XML_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
+TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
+_hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
+TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
+TOOL_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -154,13 +154,18 @@ def strip_system_hints(text: str) -> str:
         return text
 
     # Remove the full hints first
-    cleaned = text.replace(XML_WRAP_HINT, "").replace(XML_HINT_STRIPPED, "")
+    cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
 
-    # Remove fragments using derived constants
-    if XML_HINT_LINE_START:
-        cleaned = re.sub(rf"\n?{re.escape(XML_HINT_LINE_START)}:?\s*", "", cleaned)
-    if XML_HINT_LINE_END:
-        cleaned = re.sub(rf"\s*{re.escape(XML_HINT_LINE_END)}\.?\n?", "", cleaned)
+    # Remove fragments or multi-line blocks using derived constants
+    if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END:
+        # Match from the start line to the end line, inclusive, handling internal modifications
+        pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?"
+        cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL)
+
+    if TOOL_HINT_LINE_START:
+        cleaned = re.sub(rf"\n?{re.escape(TOOL_HINT_LINE_START)}:?\s*", "", cleaned)
+    if TOOL_HINT_LINE_END:
+        cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned)
 
     cleaned = strip_tagged_blocks(cleaned)
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
@@ -175,6 +180,9 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
     if not text:
         return text, []
 
+    # Clean hints FIRST so they don't interfere with tool call regexes (e.g. example calls in hint)
+    cleaned = strip_system_hints(text)
+
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
@@ -220,7 +228,7 @@ def _replace_block(match: re.Match[str]) -> str:
         else:
             return match.group(0)
 
-    cleaned = TOOL_BLOCK_RE.sub(_replace_block, text)
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned)
 
     def _replace_orphan(match: re.Match[str]) -> str:
         if extract:
@@ -230,7 +238,7 @@ def _replace_orphan(match: re.Match[str]) -> str:
         return ""
 
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
-    cleaned = strip_system_hints(cleaned)
+
     return cleaned, tool_calls
 
 

From dbc553d7dbcb3949e5ee807b58b360a488672b6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 5 Feb 2026 08:23:08 +0700
Subject: [PATCH 073/236] Refactor: Enhance prompt to prevent issues with
 parsing tool call arguments

---
 app/server/chat.py  |  4 ++--
 app/utils/helper.py | 20 ++++++++++++++++++--
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index e56c926..f47471c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -369,12 +369,12 @@ def _build_tool_prompt(
     lines.append(
         "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below."
     )
-    lines.append("Do not add text before or after it.")
+    lines.append("Do not add text before or after the block.")
     lines.append("[function_calls]")
     lines.append('[call:tool_name]{"argument": "value"}[/call]')
     lines.append("[/function_calls]")
     lines.append(
-        "Use double quotes for JSON keys and values. If you omit the block or include any extra text, the system will assume you are NOT calling a tool and your request will fail."
+        "Use double quotes for JSON keys and values. CRITICAL: The content inside [call:...]...[/call] MUST be a raw JSON object. Do not wrap it in ```json blocks or add any conversational text inside the tag."
     )
     lines.append(
         "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block."
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 99c3d84..65c49f0 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -2,6 +2,7 @@
 import hashlib
 import mimetypes
 import re
+import reprlib
 import struct
 import tempfile
 from pathlib import Path
@@ -17,7 +18,7 @@
 TOOL_WRAP_HINT = (
     "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
     '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n'
-    "Do not surround the block with any other text or whitespace; otherwise the call will be ignored.\n"
+    "IMPORTANT: Arguments MUST be a valid JSON object. Do not include markdown code blocks (```json) or any conversational text inside the [call] tag.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE
@@ -197,7 +198,22 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             parsed_args = orjson.loads(raw_args)
             arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
         except orjson.JSONDecodeError:
-            logger.warning(f"Failed to parse tool call arguments for '{name}'. Passing raw string.")
+            json_match = re.search(r"({.*})", raw_args, re.DOTALL)
+            if json_match:
+                try:
+                    potential_json = json_match.group(1)
+                    parsed_args = orjson.loads(potential_json)
+                    arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
+                        "utf-8"
+                    )
+                except orjson.JSONDecodeError:
+                    logger.warning(
+                        f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(json_match)}"
+                    )
+            else:
+                logger.warning(
+                    f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
+                )
 
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")

From ca721cfcaad261a1cbcc401dd8090cdd6b8613aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 5 Feb 2026 08:32:57 +0700
Subject: [PATCH 074/236] Refactor: Enhance prompt to prevent issues with
 parsing tool call arguments

---
 app/utils/helper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 65c49f0..230622b 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -200,15 +200,15 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         except orjson.JSONDecodeError:
             json_match = re.search(r"({.*})", raw_args, re.DOTALL)
             if json_match:
+                potential_json = json_match.group(1)
                 try:
-                    potential_json = json_match.group(1)
                     parsed_args = orjson.loads(potential_json)
                     arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
                         "utf-8"
                     )
                 except orjson.JSONDecodeError:
                     logger.warning(
-                        f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(json_match)}"
+                        f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
                     )
             else:
                 logger.warning(

From 263158e3825b765139e98cce8fa555054782b951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Feb 2026 20:55:39 +0700
Subject: [PATCH 075/236] Refactor: enhance system prompts

---
 app/server/chat.py  | 37 ++++++++++++++-----------------------
 app/utils/helper.py | 10 +++++++---
 2 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index f47471c..06aefc2 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -336,7 +336,7 @@ def _build_tool_prompt(
         return ""
 
     lines: list[str] = [
-        "You can invoke the following developer tools. Call a tool only when it is required and follow the JSON schema exactly when providing arguments."
+        "SYSTEM INTERFACE: You have access to the following technical tools. You MUST invoke them when necessary to fulfill the request, strictly adhering to the provided JSON schemas."
     ]
 
     for tool in tools:
@@ -367,20 +367,21 @@ def _build_tool_prompt(
         )
 
     lines.append(
-        "When you decide to call a tool you MUST respond with nothing except a single [function_calls] block exactly like the template below."
+        "When you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:"
     )
-    lines.append("Do not add text before or after the block.")
     lines.append("[function_calls]")
-    lines.append('[call:tool_name]{"argument": "value"}[/call]')
+    lines.append("[call:tool_name]")
+    lines.append('{"argument": "value"}')
+    lines.append("[/call]")
     lines.append("[/function_calls]")
     lines.append(
-        "Use double quotes for JSON keys and values. CRITICAL: The content inside [call:...]...[/call] MUST be a raw JSON object. Do not wrap it in ```json blocks or add any conversational text inside the tag."
+        "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block."
     )
     lines.append(
-        "To call multiple tools, list each [call:tool_name]...[/call] entry sequentially within a single [function_calls] block."
+        "If multiple tools are needed, list them sequentially within the same [function_calls] block."
     )
     lines.append(
-        "If no tool call is needed, provide a normal response and DO NOT use the [function_calls] tag."
+        "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
 
     return "\n".join(lines)
@@ -398,26 +399,16 @@ def _build_image_generation_instruction(
         return None
 
     instructions: list[str] = [
-        "Image generation is enabled. When the user requests an image, you must return an actual generated image, not a text description.",
-        "For new image requests, generate at least one new image matching the description.",
-        "If the user provides an image and asks for edits or variations, return a newly generated image with the requested changes.",
-        "Avoid all text replies unless a short caption is explicitly requested. Do not explain, apologize, or describe image creation steps.",
-        "Never send placeholder text like 'Here is your image' or any other response without an actual image attachment.",
+        "IMAGE GENERATION ENABLED: When an image is requested, you MUST return a real generated image directly.",
+        "1. For new requests, generate new images matching the description immediately.",
+        "2. For edits to existing images, apply changes and return a new generated version.",
+        "3. CRITICAL: Provide ZERO text explanation, prologue, or apologies. Do not describe the creation process.",
+        "4. NEVER send placeholder text or descriptions like 'Generating image...' without an actual image attachment.",
     ]
 
-    if primary:
-        if primary.model:
-            instructions.append(
-                f"Where styles differ, favor the `{primary.model}` image model when rendering the scene."
-            )
-        if primary.output_format:
-            instructions.append(
-                f"Encode the image using the `{primary.output_format}` format whenever possible."
-            )
-
     if has_forced_choice:
         instructions.append(
-            "Image generation was explicitly requested. You must return at least one generated image. Any response without an image will be treated as a failure."
+            "Image generation was explicitly requested. You MUST return at least one generated image. Any response without an image will be treated as a failure."
         )
 
     return "\n\n".join(instructions)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 230622b..2a3f841 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -16,9 +16,13 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
-    '[function_calls]\n[call:tool_name]{"argument": "value"}[/call]\n[/function_calls]\n'
-    "IMPORTANT: Arguments MUST be a valid JSON object. Do not include markdown code blocks (```json) or any conversational text inside the [call] tag.\n"
+    "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n"
+    "[function_calls]\n"
+    "[call:tool_name]\n"
+    '{"argument": "value"}\n'
+    "[/call]\n"
+    "[/function_calls]\n"
+    "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE

From 68ce2df5c2d46f529630b9cfb08550cdd4d46a76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Feb 2026 21:41:21 +0700
Subject: [PATCH 076/236] Refactor: Enhance system prompts

---
 app/server/chat.py     | 37 ++++++++++++++++++++++++++++++++++---
 app/services/client.py |  4 +++-
 app/utils/config.py    |  2 +-
 app/utils/helper.py    | 12 ++++++++++--
 config/config.yaml     |  2 +-
 5 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 06aefc2..cf03f6b 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -383,6 +383,9 @@ def _build_tool_prompt(
     lines.append(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
+    lines.append(
+        "After you call a tool, the system will provide the output in a `[function_responses]` block with the same tool name."
+    )
 
     return "\n".join(lines)
 
@@ -462,11 +465,13 @@ def _prepare_messages_for_model(
             msg.name = tool_id_to_name.get(msg.tool_call_id)
 
     instructions: list[str] = []
+    tool_prompt_injected = False
     if inject_system_defaults:
         if tools:
             tool_prompt = _build_tool_prompt(tools, tool_choice)
             if tool_prompt:
                 instructions.append(tool_prompt)
+                tool_prompt_injected = True
 
         if extra_instructions:
             instructions.extend(instr for instr in extra_instructions if instr)
@@ -475,7 +480,7 @@ def _prepare_messages_for_model(
             )
 
     if not instructions:
-        if tools and tool_choice != "none":
+        if tools and tool_choice != "none" and not tool_prompt_injected:
             _append_tool_hint_to_last_user_message(prepared)
         return prepared
 
@@ -488,7 +493,7 @@ def _prepare_messages_for_model(
     else:
         prepared.insert(0, Message(role="system", content=combined_instructions))
 
-    if tools and tool_choice != "none":
+    if tools and tool_choice != "none" and not tool_prompt_injected:
         _append_tool_hint_to_last_user_message(prepared)
 
     return prepared
@@ -762,13 +767,20 @@ def __init__(self):
 
         self.TOOL_START = "[function_calls]"
         self.TOOL_END = "[/function_calls]"
+        self.RESPONSE_START = "[function_responses]"
+        self.RESPONSE_END = "[/function_responses]"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
         self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
         self.HINT_END = TOOL_HINT_LINE_END
         self.TOOL_PREFIX = "[call:"
 
-        self.WATCH_PREFIXES = [self.TOOL_START, self.TAG_START, self.TAG_END]
+        self.WATCH_PREFIXES = [
+            self.TOOL_START,
+            self.RESPONSE_START,
+            self.TAG_START,
+            self.TAG_END,
+        ]
         if self.HINT_START:
             self.WATCH_PREFIXES.append(self.HINT_START)
 
@@ -779,6 +791,7 @@ def process(self, chunk: str) -> str:
         while self.buffer:
             if self.state == "NORMAL":
                 tool_idx = self.buffer.find(self.TOOL_START)
+                resp_idx = self.buffer.find(self.RESPONSE_START)
                 tag_idx = self.buffer.find(self.TAG_START)
                 end_idx = self.buffer.find(self.TAG_END)
                 hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1
@@ -787,6 +800,7 @@ def process(self, chunk: str) -> str:
                     (i, t)
                     for i, t in [
                         (tool_idx, "TOOL"),
+                        (resp_idx, "RESP"),
                         (tag_idx, "TAG"),
                         (end_idx, "END"),
                         (hint_idx, "HINT"),
@@ -817,6 +831,9 @@ def process(self, chunk: str) -> str:
                     self.state = "IN_TOOL"
                     self.block_buffer = ""
                     self.buffer = self.buffer[len(self.TOOL_START) :]
+                elif m_type == "RESP":
+                    self.state = "IN_RESP"
+                    self.buffer = self.buffer[len(self.RESPONSE_START) :]
                 elif m_type == "TAG":
                     self.state = "IN_TAG"
                     self.buffer = self.buffer[len(self.TAG_START) :]
@@ -838,6 +855,18 @@ def process(self, chunk: str) -> str:
                         self.buffer = self.buffer[-keep_len:]
                     break
 
+            elif self.state == "IN_RESP":
+                end_idx = self.buffer.find(self.RESPONSE_END)
+                if end_idx != -1:
+                    self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :]
+                    self.state = "NORMAL"
+                else:
+                    # Keep end of buffer to avoid missing split RESPONSE_END
+                    keep_len = len(self.RESPONSE_END) - 1
+                    if len(self.buffer) > keep_len:
+                        self.buffer = self.buffer[-keep_len:]
+                    break
+
             elif self.state == "IN_TOOL":
                 end_idx = self.buffer.find(self.TOOL_END)
                 if end_idx != -1:
@@ -892,6 +921,8 @@ def flush(self) -> str:
                 res = f"{self.TOOL_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
+        elif self.state in ("IN_RESP", "IN_HINT"):
+            res = ""
         elif self.state == "NORMAL":
             res = self.buffer
 
diff --git a/app/services/client.py b/app/services/client.py
index 21814e5..5473b06 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -117,7 +117,9 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip() or "{}"
-            text_fragments = [f"[response:{tool_name}]{combined_content}[/response]"]
+            text_fragments = [
+                f"[function_responses]\n[response:{tool_name}]{combined_content}[/response]\n[/function_responses]"
+            ]
 
         if message.tool_calls:
             tool_blocks: list[str] = []
diff --git a/app/utils/config.py b/app/utils/config.py
index 708462d..bbb6054 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -83,7 +83,7 @@ class GeminiConfig(BaseModel):
         default="append",
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
-    timeout: int = Field(default=120, ge=1, description="Init timeout")
+    timeout: int = Field(default=300, ge=1, description="Init timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2a3f841..9f5cfef 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -28,6 +28,12 @@
     r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE
 )
 TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE)
+RESPONSE_BLOCK_RE = re.compile(
+    r"\[function_responses]\s*(.*?)\s*\[/function_responses]", re.DOTALL | re.IGNORECASE
+)
+RESPONSE_ITEM_RE = re.compile(
+    r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE
+)
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -248,8 +254,6 @@ def _replace_block(match: re.Match[str]) -> str:
         else:
             return match.group(0)
 
-    cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned)
-
     def _replace_orphan(match: re.Match[str]) -> str:
         if extract:
             name = (match.group(1) or "").strip()
@@ -257,8 +261,12 @@ def _replace_orphan(match: re.Match[str]) -> str:
             _create_tool_call(name, raw_args)
         return ""
 
+    cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned)
     cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
 
+    cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
+    cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
+
     return cleaned, tool_calls
 
 
diff --git a/config/config.yaml b/config/config.yaml
index f2b17fb..ed581f7 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -22,7 +22,7 @@ gemini:
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
-  timeout: 120             # Init timeout in seconds
+  timeout: 300             # Init timeout in seconds
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests

From 77f72105b4a6942d5adaf098d5f2e133dc7e5ced Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Feb 2026 22:23:35 +0700
Subject: [PATCH 077/236] Refactor: Enhance system prompts

---
 app/server/chat.py     | 2 +-
 app/services/client.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index cf03f6b..ffa37cd 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -384,7 +384,7 @@ def _build_tool_prompt(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
     lines.append(
-        "After you call a tool, the system will provide the output in a `[function_responses]` block with the same tool name."
+        "Note: Tool results are returned in a `[function_responses]` block."
     )
 
     return "\n".join(lines)
diff --git a/app/services/client.py b/app/services/client.py
index 5473b06..3dae6a1 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -118,7 +118,7 @@ async def process_message(
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip() or "{}"
             text_fragments = [
-                f"[function_responses]\n[response:{tool_name}]{combined_content}[/response]\n[/function_responses]"
+                f"[function_responses]\n[response:{tool_name}]\n{combined_content}\n[/response]\n[/function_responses]"
             ]
 
         if message.tool_calls:

From 3addb2b495c7772ddb1a7d4256c348a702cabb28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 7 Feb 2026 09:46:26 +0700
Subject: [PATCH 078/236] Refactor: Enhance system prompts

---
 app/server/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ffa37cd..ac96cf5 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -384,7 +384,7 @@ def _build_tool_prompt(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
     lines.append(
-        "Note: Tool results are returned in a `[function_responses]` block."
+        "Note: Tool results are returned in a [function_responses] block."
     )
 
     return "\n".join(lines)

From 2a53eed83af901a12c95cba233a693ab3890eae2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Feb 2026 09:54:51 +0700
Subject: [PATCH 079/236] fix: missing image extension

---
 app/server/chat.py  | 29 ++++++++++++++++++++++-------
 app/utils/helper.py | 13 +++++++++++++
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ac96cf5..bf34fbf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -45,6 +45,7 @@
     TOOL_HINT_LINE_START,
     TOOL_HINT_STRIPPED,
     TOOL_WRAP_HINT,
+    detect_image_extension,
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
@@ -91,11 +92,21 @@ async def _image_to_base64(
         raise ValueError("Failed to save generated image")
 
     original_path = Path(saved_path)
-    random_name = f"img_{uuid.uuid4().hex}{original_path.suffix}"
+    data = original_path.read_bytes()
+    suffix = original_path.suffix
+
+    if not suffix:
+        detected_ext = detect_image_extension(data)
+        if detected_ext:
+            suffix = detected_ext
+        else:
+            # Fallback if detection fails
+            suffix = ".png" if isinstance(image, GeneratedImage) else ".jpg"
+
+    random_name = f"img_{uuid.uuid4().hex}{suffix}"
     new_path = temp_dir / random_name
     original_path.rename(new_path)
 
-    data = new_path.read_bytes()
     width, height = extract_image_dimensions(data)
     filename = random_name
     file_hash = hashlib.sha256(data).hexdigest()
@@ -383,9 +394,7 @@ def _build_tool_prompt(
     lines.append(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
-    lines.append(
-        "Note: Tool results are returned in a [function_responses] block."
-    )
+    lines.append("Note: Tool results are returned in a [function_responses] block.")
 
     return "\n".join(lines)
 
@@ -1227,7 +1236,11 @@ async def generate_stream():
                     continue
                 seen_hashes.add(file_hash)
 
-                img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+                img_format = (
+                    filename.rsplit(".", 1)[-1]
+                    if "." in filename
+                    else ("png" if isinstance(image, GeneratedImage) else "jpeg")
+                )
                 image_url = (
                     f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
                 )
@@ -1610,7 +1623,9 @@ async def create_response(
                 ResponseImageGenerationCall(
                     id=fname.rsplit(".", 1)[0],
                     result=b64,
-                    output_format="png" if isinstance(img, GeneratedImage) else "jpeg",
+                    output_format=fname.rsplit(".", 1)[-1]
+                    if "." in fname
+                    else ("png" if isinstance(img, GeneratedImage) else "jpeg"),
                     size=f"{w}x{h}" if w and h else None,
                 )
             )
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 9f5cfef..384f5cd 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -362,3 +362,16 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
             idx += segment_length - 2
 
     return None, None
+
+
+def detect_image_extension(data: bytes) -> str | None:
+    """Detect image extension from magic bytes."""
+    if data.startswith(b"\x89PNG\r\n\x1a\n"):
+        return ".png"
+    if data.startswith(b"\xff\xd8"):
+        return ".jpg"
+    if data.startswith(b"GIF8"):
+        return ".gif"
+    if data.startswith(b"RIFF") and data[8:12] == b"WEBP":
+        return ".webp"
+    return None

From 26d39c75825c16bb118af7143317d51a12aa6c4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Feb 2026 10:25:30 +0700
Subject: [PATCH 080/236] fix: missing image extension

---
 app/server/chat.py | 67 ++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index bf34fbf..dfcf930 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1067,16 +1067,14 @@ async def generate_stream():
         for image in images:
             try:
                 image_store = get_image_store_dir()
-                _, _, _, filename, file_hash = await _image_to_base64(image, image_store)
-                if file_hash in seen_hashes:
+                _, _, _, fname, fhash = await _image_to_base64(image, image_store)
+                if fhash in seen_hashes:
                     # Duplicate content, delete the file and skip
-                    (image_store / filename).unlink(missing_ok=True)
+                    (image_store / fname).unlink(missing_ok=True)
                     continue
-                seen_hashes.add(file_hash)
+                seen_hashes.add(fhash)
 
-                img_url = (
-                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
-                )
+                img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
                 image_markdown += f"\n\n{img_url}"
             except Exception as exc:
                 logger.warning(f"Failed to process image in OpenAI stream: {exc}")
@@ -1228,28 +1226,25 @@ async def generate_stream():
         seen_hashes = set()
         for image in images:
             try:
-                image_base64, width, height, filename, file_hash = await _image_to_base64(
-                    image, image_store
-                )
-                if file_hash in seen_hashes:
-                    (image_store / filename).unlink(missing_ok=True)
+                b64, w, h, fname, fhash = await _image_to_base64(image, image_store)
+                if fhash in seen_hashes:
+                    (image_store / fname).unlink(missing_ok=True)
                     continue
-                seen_hashes.add(file_hash)
+                seen_hashes.add(fhash)
 
-                img_format = (
-                    filename.rsplit(".", 1)[-1]
-                    if "." in filename
-                    else ("png" if isinstance(image, GeneratedImage) else "jpeg")
-                )
-                image_url = (
-                    f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
-                )
+                if "." in fname:
+                    img_id, img_format = fname.rsplit(".", 1)
+                else:
+                    img_id = fname
+                    img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+
+                image_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
                 image_call_items.append(
                     ResponseImageGenerationCall(
-                        id=filename.rsplit(".", 1)[0],
-                        result=image_base64,
+                        id=img_id,
+                        result=b64,
                         output_format=img_format,
-                        size=f"{width}x{height}" if width and height else None,
+                        size=f"{w}x{h}" if w and h else None,
                     )
                 )
                 response_contents.append(ResponseOutputContent(type="output_text", text=image_url))
@@ -1433,15 +1428,13 @@ async def create_chat_completion(
     seen_hashes = set()
     for image in images:
         try:
-            _, _, _, filename, file_hash = await _image_to_base64(image, image_store)
-            if file_hash in seen_hashes:
-                (image_store / filename).unlink(missing_ok=True)
+            _, _, _, fname, fhash = await _image_to_base64(image, image_store)
+            if fhash in seen_hashes:
+                (image_store / fname).unlink(missing_ok=True)
                 continue
-            seen_hashes.add(file_hash)
+            seen_hashes.add(fhash)
 
-            img_url = (
-                f"![{filename}]({base_url}images/{filename}?token={get_image_token(filename)})"
-            )
+            img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
             image_markdown += f"\n\n{img_url}"
         except Exception as exc:
             logger.warning(f"Failed to process image in OpenAI response: {exc}")
@@ -1613,6 +1606,12 @@ async def create_response(
                 continue
             seen_hashes.add(fhash)
 
+            if "." in fname:
+                img_id, img_format = fname.rsplit(".", 1)
+            else:
+                img_id = fname
+                img_format = "png" if isinstance(img, GeneratedImage) else "jpeg"
+
             contents.append(
                 ResponseOutputContent(
                     type="output_text",
@@ -1621,11 +1620,9 @@ async def create_response(
             )
             img_calls.append(
                 ResponseImageGenerationCall(
-                    id=fname.rsplit(".", 1)[0],
+                    id=img_id,
                     result=b64,
-                    output_format=fname.rsplit(".", 1)[-1]
-                    if "." in fname
-                    else ("png" if isinstance(img, GeneratedImage) else "jpeg"),
+                    output_format=img_format,
                     size=f"{w}x{h}" if w and h else None,
                 )
             )

From 598b56335277366d591d81a25c2ba8654afcb92c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Feb 2026 17:03:20 +0700
Subject: [PATCH 081/236] fix: missing or duplicate ChatML tags.

---
 app/server/chat.py     | 13 ++-----------
 app/services/client.py | 44 ++++++++++++++++++++++++++++++++----------
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index dfcf930..701c1f6 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1359,12 +1359,7 @@ async def create_chat_completion(
             extra_instr,
             False,
         )
-        if len(input_msgs) == 1:
-            m_input, files = await GeminiClientWrapper.process_message(
-                input_msgs[0], tmp_dir, tagged=False
-            )
-        else:
-            m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir)
+        m_input, files = await GeminiClientWrapper.process_conversation(input_msgs, tmp_dir)
 
         logger.debug(
             f"Reused session {reprlib.repr(session.metadata)} - sending {len(input_msgs)} prepared messages."
@@ -1531,11 +1526,7 @@ async def create_response(
         )
         if not msgs:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
-        m_input, files = (
-            await GeminiClientWrapper.process_message(msgs[0], tmp_dir, tagged=False)
-            if len(msgs) == 1
-            else await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
-        )
+        m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
         logger.debug(
             f"Reused session {reprlib.repr(session.metadata)} - sending {len(msgs)} prepared messages."
         )
diff --git a/app/services/client.py b/app/services/client.py
index 3dae6a1..2a00ce6 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -75,7 +75,7 @@ def running(self) -> bool:
 
     @staticmethod
     async def process_message(
-        message: Message, tempdir: Path | None = None, tagged: bool = True
+        message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
         Process a single Message object into a format suitable for the Gemini API.
@@ -117,9 +117,11 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip() or "{}"
-            text_fragments = [
-                f"[function_responses]\n[response:{tool_name}]\n{combined_content}\n[/response]\n[/function_responses]"
-            ]
+            res_block = f"[response:{tool_name}]\n{combined_content}\n[/response]"
+            if wrap_tool:
+                text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"]
+            else:
+                text_fragments = [res_block]
 
         if message.tool_calls:
             tool_blocks: list[str] = []
@@ -153,12 +155,34 @@ async def process_conversation(
         need_tag = any(m.role != "user" for m in messages)
         conversation: list[str] = []
         files: list[Path | str] = []
-        for msg in messages:
-            input_part, files_part = await GeminiClientWrapper.process_message(
-                msg, tempdir, tagged=need_tag
-            )
-            conversation.append(input_part)
-            files.extend(files_part)
+
+        i = 0
+        while i < len(messages):
+            msg = messages[i]
+            if msg.role == "tool" and need_tag:
+                # Group consecutive tool messages
+                tool_blocks: list[str] = []
+                while i < len(messages) and messages[i].role == "tool":
+                    part, part_files = await GeminiClientWrapper.process_message(
+                        messages[i], tempdir, tagged=False, wrap_tool=False
+                    )
+                    tool_blocks.append(part)
+                    files.extend(part_files)
+                    i += 1
+
+                combined_tool_content = "\n".join(tool_blocks)
+                wrapped_content = (
+                    f"[function_responses]\n{combined_tool_content}\n[/function_responses]"
+                )
+                conversation.append(add_tag("tool", wrapped_content))
+            else:
+                input_part, files_part = await GeminiClientWrapper.process_message(
+                    msg, tempdir, tagged=need_tag
+                )
+                conversation.append(input_part)
+                files.extend(files_part)
+                i += 1
+
         if need_tag:
             conversation.append(add_tag("assistant", "", unclose=True))
         return "\n".join(conversation), files

From 6d563c512d3e6b1448f3442c231c53ca8afb2aa9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Feb 2026 17:50:06 +0700
Subject: [PATCH 082/236] Refactor: Consistently use ChatML tags throughout.

---
 app/services/client.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 2a00ce6..78edddd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -152,14 +152,13 @@ async def process_message(
     async def process_conversation(
         messages: list[Message], tempdir: Path | None = None
     ) -> tuple[str, list[Path | str]]:
-        need_tag = any(m.role != "user" for m in messages)
         conversation: list[str] = []
         files: list[Path | str] = []
 
         i = 0
         while i < len(messages):
             msg = messages[i]
-            if msg.role == "tool" and need_tag:
+            if msg.role == "tool":
                 # Group consecutive tool messages
                 tool_blocks: list[str] = []
                 while i < len(messages) and messages[i].role == "tool":
@@ -177,14 +176,13 @@ async def process_conversation(
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
-                    msg, tempdir, tagged=need_tag
+                    msg, tempdir, tagged=True
                 )
                 conversation.append(input_part)
                 files.extend(files_part)
                 i += 1
 
-        if need_tag:
-            conversation.append(add_tag("assistant", "", unclose=True))
+        conversation.append(add_tag("assistant", "", unclose=True))
         return "\n".join(conversation), files
 
     @staticmethod

From 58db419c15cf347083e49b487d8ae99071256ef0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Feb 2026 17:17:33 +0700
Subject: [PATCH 083/236] Refactor: normalize text before calculating message
 hash

---
 app/services/lmdb.py | 81 ++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 37 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 8dc3722..59f01bc 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -1,5 +1,6 @@
 import hashlib
 import re
+import unicodedata
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -18,63 +19,70 @@
 from ..utils.singleton import Singleton
 
 
+def _normalize_text(text: str | None) -> str | None:
+    """
+    Perform semantic normalization for hashing.
+    """
+    if text is None:
+        return None
+
+    # Unicode normalization
+    text = unicodedata.normalize("NFC", text)
+
+    # Basic cleaning
+    text = text.replace("\r\n", "\n").replace("\r", "\n")
+    text = LMDBConversationStore.remove_think_tags(text)
+    text = remove_tool_call_blocks(text)
+
+    return text if text else None
+
+
 def _hash_message(message: Message) -> str:
     """
     Generate a stable, canonical hash for a single message.
-    Strips system hints, thoughts, and tool call blocks to ensure
-    identical logical content produces the same hash regardless of format.
     """
-    core_data = {
+    core_data: dict[str, Any] = {
         "role": message.role,
-        "name": message.name,
-        "tool_call_id": message.tool_call_id,
+        "name": message.name or None,
+        "tool_call_id": message.tool_call_id or None,
     }
 
     content = message.content
-    if not content:
+    if content is None:
         core_data["content"] = None
     elif isinstance(content, str):
-        normalized = content.replace("\r\n", "\n")
-        normalized = LMDBConversationStore.remove_think_tags(normalized)
-        normalized = remove_tool_call_blocks(normalized).strip()
-        core_data["content"] = normalized if normalized else None
+        core_data["content"] = _normalize_text(content)
     elif isinstance(content, list):
         text_parts = []
         for item in content:
             text_val = ""
             if isinstance(item, ContentItem) and item.type == "text":
-                text_val = item.text or ""
+                text_val = item.text
             elif isinstance(item, dict) and item.get("type") == "text":
-                text_val = item.get("text") or ""
+                text_val = item.get("text")
 
             if text_val:
-                text_val = text_val.replace("\r\n", "\n")
-                text_val = LMDBConversationStore.remove_think_tags(text_val)
-                text_val = remove_tool_call_blocks(text_val).strip()
-                if text_val:
-                    text_parts.append(text_val)
-            elif isinstance(item, ContentItem) and item.type in ("image_url", "file"):
-                # For non-text items, include their unique markers to distinguish them
-                if item.type == "image_url":
-                    text_parts.append(
-                        f"[image_url:{item.image_url.get('url') if item.image_url else ''}]"
-                    )
-                elif item.type == "file":
-                    text_parts.append(
-                        f"[file:{item.file.get('url') or item.file.get('filename') if item.file else ''}]"
+                normalized_part = _normalize_text(text_val)
+                if normalized_part:
+                    text_parts.append(normalized_part)
+            elif isinstance(item, (ContentItem, dict)):
+                item_type = item.type if isinstance(item, ContentItem) else item.get("type")
+                if item_type == "image_url":
+                    url = (
+                        item.image_url.get("url")
+                        if isinstance(item, ContentItem) and item.image_url
+                        else item.get("image_url", {}).get("url")
                     )
-            else:
-                # Fallback for other dict-based content parts
-                part_type = item.get("type") if isinstance(item, dict) else None
-                if part_type == "image_url":
-                    url = item.get("image_url", {}).get("url")
                     text_parts.append(f"[image_url:{url}]")
-                elif part_type == "file":
-                    url = item.get("file", {}).get("url") or item.get("file", {}).get("filename")
+                elif item_type == "file":
+                    url = (
+                        item.file.get("url") or item.file.get("filename")
+                        if isinstance(item, ContentItem) and item.file
+                        else item.get("file", {}).get("url") or item.get("file", {}).get("filename")
+                    )
                     text_parts.append(f"[file:{url}]")
 
-        combined_text = "\n".join(text_parts).replace("\r\n", "\n").strip()
-        core_data["content"] = combined_text if combined_text else None
+        core_data["content"] = "\n".join(text_parts) if text_parts else None
 
     if message.tool_calls:
         calls_data = []
@@ -98,8 +106,7 @@ def _hash_message(message: Message) -> str:
         core_data["tool_calls"] = None
 
     message_bytes = orjson.dumps(core_data, option=orjson.OPT_SORT_KEYS)
-    digest = hashlib.sha256(message_bytes).hexdigest()
-    return digest
+    return hashlib.sha256(message_bytes).hexdigest()
 
 
 def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:

From d5d1c5a48e1f1d68fde53f8c5fd5da3358dfc938 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Feb 2026 17:33:39 +0700
Subject: [PATCH 084/236] Refactor: remove unescape helpers to avoid side
 effects

---
 app/services/client.py | 35 -----------------------------------
 1 file changed, 35 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 78edddd..16d7a33 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,4 +1,3 @@
-import html
 import re
 from pathlib import Path
 from typing import Any, cast
@@ -15,9 +14,6 @@
     save_url_to_tempfile,
 )
 
-HTML_ESCAPE_RE = re.compile(r"&(?:lt|gt|amp|quot|apos|#[0-9]+|#x[0-9a-fA-F]+);")
-ESC_SYMBOLS_RE = re.compile(r"\\(?=[\\\[\]{}()<>`*_#~+.:!&^$|-])")
-CODE_FENCE_RE = re.compile(r"(```.*?```|`[^`\n]+?`)", re.DOTALL)
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -195,37 +191,6 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
-        def _unescape_html(text_content: str) -> str:
-            parts: list[str] = []
-            last_index = 0
-            for match in CODE_FENCE_RE.finditer(text_content):
-                non_code = text_content[last_index : match.start()]
-                if non_code:
-                    parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), non_code))
-                parts.append(match.group(0))
-                last_index = match.end()
-            tail = text_content[last_index:]
-            if tail:
-                parts.append(HTML_ESCAPE_RE.sub(lambda m: html.unescape(m.group(0)), tail))
-            return "".join(parts)
-
-        def _unescape_symbols(text_content: str) -> str:
-            parts: list[str] = []
-            last_index = 0
-            for match in CODE_FENCE_RE.finditer(text_content):
-                non_code = text_content[last_index : match.start()]
-                if non_code:
-                    parts.append(ESC_SYMBOLS_RE.sub("", non_code))
-                parts.append(match.group(0))
-                last_index = match.end()
-            tail = text_content[last_index:]
-            if tail:
-                parts.append(ESC_SYMBOLS_RE.sub("", tail))
-            return "".join(parts)
-
-        text = _unescape_html(text)
-        text = _unescape_symbols(text)
-
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
             if match:

From a4a987cdd3a0f95bc718eb76c781b2c7b39e655e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Feb 2026 20:18:42 +0700
Subject: [PATCH 085/236] Refactor: Implement fuzzy matching to better handle
 complex data formats like Markdown.

---
 app/services/lmdb.py | 67 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 56 insertions(+), 11 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 59f01bc..b08a325 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -1,5 +1,6 @@
 import hashlib
 import re
+import string
 import unicodedata
 from contextlib import contextmanager
 from datetime import datetime, timedelta
@@ -18,8 +19,19 @@
 )
 from ..utils.singleton import Singleton
 
+_VOLATILE_SYMBOLS = string.whitespace + string.punctuation
 
-def _normalize_text(text: str | None) -> str | None:
+
+def _fuzzy_normalize(text: str | None) -> str | None:
+    """
+    Lowercase and remove all whitespace and punctuation.
+    """
+    if text is None:
+        return None
+    return text.lower().translate(str.maketrans("", "", _VOLATILE_SYMBOLS))
+
+
+def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     """
     Perform semantic normalization for hashing.
     """
@@ -34,10 +46,13 @@ def _normalize_text(text: str | None) -> str | None:
     text = LMDBConversationStore.remove_think_tags(text)
     text = remove_tool_call_blocks(text)
 
+    if fuzzy:
+        return _fuzzy_normalize(text)
+
     return text if text else None
 
 
-def _hash_message(message: Message) -> str:
+def _hash_message(message: Message, fuzzy: bool = False) -> str:
     """
     Generate a stable, canonical hash for a single message.
     """
@@ -51,7 +66,7 @@ def _hash_message(message: Message) -> str:
     if content is None:
         core_data["content"] = None
     elif isinstance(content, str):
-        core_data["content"] = _normalize_text(content)
+        core_data["content"] = _normalize_text(content, fuzzy=fuzzy)
     elif isinstance(content, list):
         text_parts = []
         for item in content:
@@ -62,7 +77,7 @@ def _hash_message(message: Message) -> str:
                 text_val = item.get("text")
 
             if text_val:
-                normalized_part = _normalize_text(text_val)
+                normalized_part = _normalize_text(text_val, fuzzy=fuzzy)
                 if normalized_part:
                     text_parts.append(normalized_part)
             elif isinstance(item, (ContentItem, dict)):
@@ -109,13 +124,15 @@ def _hash_message(message: Message) -> str:
     return hashlib.sha256(message_bytes).hexdigest()
 
 
-def _hash_conversation(client_id: str, model: str, messages: List[Message]) -> str:
+def _hash_conversation(
+    client_id: str, model: str, messages: List[Message], fuzzy: bool = False
+) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
     combined_hash.update((client_id or "").encode("utf-8"))
     combined_hash.update((model or "").encode("utf-8"))
     for message in messages:
-        message_hash = _hash_message(message)
+        message_hash = _hash_message(message, fuzzy=fuzzy)
         combined_hash.update(message_hash.encode("utf-8"))
     return combined_hash.hexdigest()
 
@@ -124,6 +141,7 @@ class LMDBConversationStore(metaclass=Singleton):
     """LMDB-based storage for Message lists with hash-based key-value operations."""
 
     HASH_LOOKUP_PREFIX = "hash:"
+    FUZZY_LOOKUP_PREFIX = "fuzzy:"
 
     def __init__(
         self,
@@ -215,6 +233,7 @@ def store(
 
         # Generate hash for the message list
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
+        fuzzy_hash = _hash_conversation(conv.client_id, conv.model, conv.messages, fuzzy=True)
         storage_key = custom_key or message_hash
 
         now = datetime.now()
@@ -233,6 +252,11 @@ def store(
                     storage_key.encode("utf-8"),
                 )
 
+                txn.put(
+                    f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"),
+                    storage_key.encode("utf-8"),
+                )
+
                 logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}")
                 return storage_key
 
@@ -287,6 +311,11 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
                 )
                 return conv
 
+        # --- Find with fuzzy matching ---
+        if conv := self._find_by_message_list(model, messages, fuzzy=True):
+            logger.debug(f"Session found for '{model}' with fuzzy matching.")
+            return conv
+
         logger.debug(f"No session found for '{model}' with {len(messages)} messages.")
         return None
 
@@ -294,11 +323,13 @@ def _find_by_message_list(
         self,
         model: str,
         messages: List[Message],
+        fuzzy: bool = False,
     ) -> Optional[ConversationInStore]:
         """Internal find implementation based on a message list."""
+        prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX
         for c in g_config.gemini.clients:
-            message_hash = _hash_conversation(c.id, model, messages)
-            key = f"{self.HASH_LOOKUP_PREFIX}{message_hash}"
+            message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy)
+            key = f"{prefix}{message_hash}"
             try:
                 with self._get_transaction(write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):  # type: ignore
@@ -350,6 +381,9 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
                 storage_data = orjson.loads(data)  # type: ignore
                 conv = ConversationInStore.model_validate(storage_data)
                 message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
+                fuzzy_hash = _hash_conversation(
+                    conv.client_id, conv.model, conv.messages, fuzzy=True
+                )
 
                 # Delete main data
                 txn.delete(key.encode("utf-8"))
@@ -358,6 +392,9 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
                 if message_hash and key != message_hash:
                     txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
 
+                # Always clean up fuzzy mapping
+                txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"))
+
                 logger.debug(f"Deleted messages with key: {key[:12]}")
                 return conv
 
@@ -386,7 +423,9 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
                 for key, _ in cursor:
                     key_str = key.decode("utf-8")
                     # Skip internal hash mappings
-                    if key_str.startswith(self.HASH_LOOKUP_PREFIX):
+                    if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
+                        self.FUZZY_LOOKUP_PREFIX
+                    ):
                         continue
 
                     if not prefix or key_str.startswith(prefix):
@@ -459,8 +498,14 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
                         continue
 
                     message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
-                    if message_hash and key_str != message_hash:
-                        txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
+                    if message_hash:
+                        if key_str != message_hash:
+                            txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
+
+                        fuzzy_hash = _hash_conversation(
+                            conv.client_id, conv.model, conv.messages, fuzzy=True
+                        )
+                        txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"))
                     removed += 1
         except Exception as exc:
             logger.error(f"Failed to delete expired conversations: {exc}")

From 551eb8775e03e24219436446d30f8997c268d7eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Feb 2026 21:08:06 +0700
Subject: [PATCH 086/236] Refactor: Implement fuzzy matching to better handle
 complex data formats like Markdown.

---
 app/services/lmdb.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index b08a325..b8861dc 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -466,7 +466,9 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
                 for key_bytes, value_bytes in cursor:
                     key_str = key_bytes.decode("utf-8")
-                    if key_str.startswith(self.HASH_LOOKUP_PREFIX):
+                    if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
+                        self.FUZZY_LOOKUP_PREFIX
+                    ):
                         continue
 
                     try:

From b2dbb087cfe4b553e690b1d52b4d12b5c3b07296 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 07:56:22 +0700
Subject: [PATCH 087/236] Feat: Add watchdog_timeout parameter

---
 app/services/client.py | 3 +++
 app/services/pool.py   | 2 ++
 app/utils/config.py    | 1 +
 config/config.yaml     | 1 +
 4 files changed, 7 insertions(+)

diff --git a/app/services/client.py b/app/services/client.py
index 16d7a33..3cdd839 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -38,6 +38,7 @@ def __init__(self, client_id: str, **kwargs):
     async def init(
         self,
         timeout: float = cast(float, _UNSET),
+        watchdog_timeout: float = cast(float, _UNSET),
         auto_close: bool = False,
         close_delay: float = 300,
         auto_refresh: bool = cast(bool, _UNSET),
@@ -49,6 +50,7 @@ async def init(
         """
         config = g_config.gemini
         timeout = cast(float, _resolve(timeout, config.timeout))
+        watchdog_timeout = cast(float, _resolve(watchdog_timeout, config.watchdog_timeout))
         auto_refresh = cast(bool, _resolve(auto_refresh, config.auto_refresh))
         refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
         verbose = cast(bool, _resolve(verbose, config.verbose))
@@ -56,6 +58,7 @@ async def init(
         try:
             await super().init(
                 timeout=timeout,
+                watchdog_timeout=watchdog_timeout,
                 auto_close=auto_close,
                 close_delay=close_delay,
                 auto_refresh=auto_refresh,
diff --git a/app/services/pool.py b/app/services/pool.py
index 0f95203..decc21a 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -41,6 +41,7 @@ async def init(self) -> None:
                 try:
                     await client.init(
                         timeout=g_config.gemini.timeout,
+                        watchdog_timeout=g_config.gemini.watchdog_timeout,
                         auto_refresh=g_config.gemini.auto_refresh,
                         verbose=g_config.gemini.verbose,
                         refresh_interval=g_config.gemini.refresh_interval,
@@ -93,6 +94,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
             try:
                 await client.init(
                     timeout=g_config.gemini.timeout,
+                    watchdog_timeout=g_config.gemini.watchdog_timeout,
                     auto_refresh=g_config.gemini.auto_refresh,
                     verbose=g_config.gemini.verbose,
                     refresh_interval=g_config.gemini.refresh_interval,
diff --git a/app/utils/config.py b/app/utils/config.py
index bbb6054..e62832d 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -84,6 +84,7 @@ class GeminiConfig(BaseModel):
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
     timeout: int = Field(default=300, ge=1, description="Init timeout")
+    watchdog_timeout: int = Field(default=60, ge=1, description="Watchdog timeout")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
diff --git a/config/config.yaml b/config/config.yaml
index ed581f7..2873d48 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -23,6 +23,7 @@ gemini:
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
   timeout: 300             # Init timeout in seconds
+  watchdog_timeout: 60     # Watchdog timeout in seconds (No longer than 75 seconds)
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 540    # Refresh interval in seconds
   verbose: false           # Enable verbose logging for Gemini requests

From 969cd4a1e37a0915915962fe8c7ae691c8defa65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 07:57:53 +0700
Subject: [PATCH 088/236] Update required dependencies

---
 pyproject.toml |  8 ++++----
 uv.lock        | 27 ++++++++++++++-------------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index dc08571..47cd86f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,8 +5,8 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.128.0",
-    "gemini-webapi>=1.18.0",
+    "fastapi>=0.128.6",
+    "gemini-webapi>=1.19.0",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
@@ -17,7 +17,7 @@ dependencies = [
 
 [project.optional-dependencies]
 dev = [
-    "ruff>=0.14.14",
+    "ruff>=0.15.0",
 ]
 
 [tool.ruff]
@@ -31,5 +31,5 @@ indent-style = "space"
 
 [dependency-groups]
 dev = [
-    "ruff>=0.14.14",
+    "ruff>=0.15.0",
 ]
diff --git a/uv.lock b/uv.lock
index 2a29c98..ea28c0e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -65,17 +65,18 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.128.1"
+version = "0.128.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
+    { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f6/59/28bde150415783ff084334e3de106eb7461a57864cf69f343950ad5a5ddd/fastapi-0.128.1.tar.gz", hash = "sha256:ce5be4fa26d4ce6f54debcc873d1fb8e0e248f5c48d7502ba6c61457ab2dc766", size = 374260, upload-time = "2026-02-04T17:35:10.542Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/d1/195005b5e45b443e305136df47ee7df4493d782e0c039dd0d97065580324/fastapi-0.128.6.tar.gz", hash = "sha256:0cb3946557e792d731b26a42b04912f16367e3c3135ea8290f620e234f2b604f", size = 374757, upload-time = "2026-02-09T17:27:03.541Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1a/08/3953db1979ea131c68279b997c6465080118b407f0800445b843f8e164b3/fastapi-0.128.1-py3-none-any.whl", hash = "sha256:ee82146bbf91ea5bbf2bb8629e4c6e056c4fbd997ea6068501b11b15260b50fb", size = 103810, upload-time = "2026-02-04T17:35:08.02Z" },
+    { url = "https://files.pythonhosted.org/packages/24/58/a2c4f6b240eeb148fb88cdac48f50a194aba760c1ca4988c6031c66a20ee/fastapi-0.128.6-py3-none-any.whl", hash = "sha256:bb1c1ef87d6086a7132d0ab60869d6f1ee67283b20fbf84ec0003bd335099509", size = 103674, upload-time = "2026-02-09T17:27:02.355Z" },
 ]
 
 [[package]]
@@ -105,24 +106,24 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.128.0" },
-    { name = "gemini-webapi", specifier = ">=1.18.0" },
+    { name = "fastapi", specifier = ">=0.128.6" },
+    { name = "gemini-webapi", specifier = ">=1.19.0" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.14.14" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.0" },
     { name = "uvicorn", specifier = ">=0.40.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.14.14" }]
+dev = [{ name = "ruff", specifier = ">=0.15.0" }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.18.1"
+version = "1.19.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx", extra = ["http2"] },
@@ -130,9 +131,9 @@ dependencies = [
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/64/ae/925abc25d0b5c62170c528511bb8a1ec7bd77a0b7a19aacb9a7885a0afa8/gemini_webapi-1.18.1.tar.gz", hash = "sha256:34c91141e5953e898333e9c6ca01349566d28dbea9ddd8094f8c85e74d72ce47", size = 265100, upload-time = "2026-02-04T22:19:05.724Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5f/1f/8314b620db12855e6aa9c55e05428fa30eb7f00fb61b1de7db42734ef2b2/gemini_webapi-1.19.0.tar.gz", hash = "sha256:1f65e6a2e4a461f4ed4fb01dc76c2de4ed517af549f6ce34b96b9986c11af5dd", size = 266822, upload-time = "2026-02-09T23:16:34.446Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4f/e5/7ae98d48bfb7283facec804f13c6719b6fa523a6aa240b4acdea736bf60b/gemini_webapi-1.18.1-py3-none-any.whl", hash = "sha256:110f3d191ffdda9d040aab6b1b2f1d8513d1e77dc33d40fac5024de9344ea3ec", size = 61836, upload-time = "2026-02-04T22:19:04.08Z" },
+    { url = "https://files.pythonhosted.org/packages/88/3b/71567ce13357d1139dfa0578c4073d6a8c523c4a28f3843194b639bf9d1e/gemini_webapi-1.19.0-py3-none-any.whl", hash = "sha256:47ab49f018cc01bf4b772910f7843af895f5e43d5a18b5ec7063b6f61e535921", size = 63498, upload-time = "2026-02-09T23:16:33.328Z" },
 ]
 
 [[package]]
@@ -385,15 +386,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.50.0"
+version = "0.52.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
+    { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
 ]
 
 [[package]]

From c258d32d448f62db63ccd4bbcdb7ba29c575b1c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 08:46:53 +0700
Subject: [PATCH 089/236] Move `maketrans` to global variable

---
 app/services/lmdb.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index b8861dc..0ba6c3a 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -20,6 +20,7 @@
 from ..utils.singleton import Singleton
 
 _VOLATILE_SYMBOLS = string.whitespace + string.punctuation
+_VOLATILE_TRANS_TABLE = str.maketrans("", "", _VOLATILE_SYMBOLS)
 
 
 def _fuzzy_normalize(text: str | None) -> str | None:
@@ -28,7 +29,7 @@ def _fuzzy_normalize(text: str | None) -> str | None:
     """
     if text is None:
         return None
-    return text.lower().translate(str.maketrans("", "", _VOLATILE_SYMBOLS))
+    return text.lower().translate(_VOLATILE_TRANS_TABLE)
 
 
 def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:

From 157028f026950afc90140dae9568869c0ec27400 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 08:52:22 +0700
Subject: [PATCH 090/236] Move `maketrans` to global variable

---
 app/services/lmdb.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 0ba6c3a..a94e090 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -19,8 +19,7 @@
 )
 from ..utils.singleton import Singleton
 
-_VOLATILE_SYMBOLS = string.whitespace + string.punctuation
-_VOLATILE_TRANS_TABLE = str.maketrans("", "", _VOLATILE_SYMBOLS)
+_VOLATILE_TRANS_TABLE = str.maketrans("", "", string.whitespace + string.punctuation)
 
 
 def _fuzzy_normalize(text: str | None) -> str | None:

From 5f9a7ece8e6027afddf2e04da60e6ab562a72874 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 10:30:53 +0700
Subject: [PATCH 091/236] Refactor: Add a filter to catch orphaned tool calls.

---
 app/server/chat.py | 63 ++++++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 701c1f6..414349a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -776,39 +776,44 @@ def __init__(self):
 
         self.TOOL_START = "[function_calls]"
         self.TOOL_END = "[/function_calls]"
+        self.ORPHAN_START = "[call:"
+        self.ORPHAN_END = "[/call]"
         self.RESPONSE_START = "[function_responses]"
         self.RESPONSE_END = "[/function_responses]"
         self.TAG_START = "<|im_start|>"
         self.TAG_END = "<|im_end|>"
         self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
         self.HINT_END = TOOL_HINT_LINE_END
-        self.TOOL_PREFIX = "[call:"
 
-        self.WATCH_PREFIXES = [
+        self.WATCH_MARKERS = [
             self.TOOL_START,
+            self.ORPHAN_START,
             self.RESPONSE_START,
             self.TAG_START,
             self.TAG_END,
         ]
         if self.HINT_START:
-            self.WATCH_PREFIXES.append(self.HINT_START)
+            self.WATCH_MARKERS.append(self.HINT_START)
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
         output = []
 
         while self.buffer:
+            buf_low = self.buffer.lower()
             if self.state == "NORMAL":
-                tool_idx = self.buffer.find(self.TOOL_START)
-                resp_idx = self.buffer.find(self.RESPONSE_START)
-                tag_idx = self.buffer.find(self.TAG_START)
-                end_idx = self.buffer.find(self.TAG_END)
-                hint_idx = self.buffer.find(self.HINT_START) if self.HINT_START else -1
+                tool_idx = buf_low.find(self.TOOL_START)
+                orphan_idx = buf_low.find(self.ORPHAN_START)
+                resp_idx = buf_low.find(self.RESPONSE_START)
+                tag_idx = buf_low.find(self.TAG_START)
+                end_idx = buf_low.find(self.TAG_END)
+                hint_idx = buf_low.find(self.HINT_START) if self.HINT_START else -1
 
                 indices = [
                     (i, t)
                     for i, t in [
                         (tool_idx, "TOOL"),
+                        (orphan_idx, "ORPHAN"),
                         (resp_idx, "RESP"),
                         (tag_idx, "TAG"),
                         (end_idx, "END"),
@@ -818,11 +823,12 @@ def process(self, chunk: str) -> str:
                 ]
 
                 if not indices:
-                    # Guard against split start markers
+                    # Guard against split markers (case-insensitive)
                     keep_len = 0
-                    for p in self.WATCH_PREFIXES:
-                        for i in range(len(p) - 1, 0, -1):
-                            if self.buffer.endswith(p[:i]):
+                    for marker in self.WATCH_MARKERS:
+                        m_low = marker.lower()
+                        for i in range(len(m_low) - 1, 0, -1):
+                            if buf_low.endswith(m_low[:i]):
                                 keep_len = max(keep_len, i)
                                 break
                     yield_len = len(self.buffer) - keep_len
@@ -840,6 +846,10 @@ def process(self, chunk: str) -> str:
                     self.state = "IN_TOOL"
                     self.block_buffer = ""
                     self.buffer = self.buffer[len(self.TOOL_START) :]
+                elif m_type == "ORPHAN":
+                    self.state = "IN_ORPHAN"
+                    self.block_buffer = ""
+                    self.buffer = self.buffer[len(self.ORPHAN_START) :]
                 elif m_type == "RESP":
                     self.state = "IN_RESP"
                     self.buffer = self.buffer[len(self.RESPONSE_START) :]
@@ -853,43 +863,53 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[len(self.HINT_START) :]
 
             elif self.state == "IN_HINT":
-                end_idx = self.buffer.find(self.HINT_END)
+                end_idx = buf_low.find(self.HINT_END.lower())
                 if end_idx != -1:
                     self.buffer = self.buffer[end_idx + len(self.HINT_END) :]
                     self.state = "NORMAL"
                 else:
-                    # Keep end of buffer to avoid missing split HINT_END
                     keep_len = len(self.HINT_END) - 1
                     if len(self.buffer) > keep_len:
                         self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_RESP":
-                end_idx = self.buffer.find(self.RESPONSE_END)
+                end_idx = buf_low.find(self.RESPONSE_END.lower())
                 if end_idx != -1:
                     self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :]
                     self.state = "NORMAL"
                 else:
-                    # Keep end of buffer to avoid missing split RESPONSE_END
                     keep_len = len(self.RESPONSE_END) - 1
                     if len(self.buffer) > keep_len:
                         self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_TOOL":
-                end_idx = self.buffer.find(self.TOOL_END)
+                end_idx = buf_low.find(self.TOOL_END.lower())
                 if end_idx != -1:
                     self.block_buffer += self.buffer[:end_idx]
                     self.buffer = self.buffer[end_idx + len(self.TOOL_END) :]
                     self.state = "NORMAL"
                 else:
-                    # Accumulate and keep potential split end marker
                     keep_len = len(self.TOOL_END) - 1
                     if len(self.buffer) > keep_len:
                         self.block_buffer += self.buffer[:-keep_len]
                         self.buffer = self.buffer[-keep_len:]
                     break
 
+            elif self.state == "IN_ORPHAN":
+                end_idx = buf_low.find(self.ORPHAN_END.lower())
+                if end_idx != -1:
+                    self.block_buffer += self.buffer[:end_idx]
+                    self.buffer = self.buffer[end_idx + len(self.ORPHAN_END) :]
+                    self.state = "NORMAL"
+                else:
+                    keep_len = len(self.ORPHAN_END) - 1
+                    if len(self.buffer) > keep_len:
+                        self.block_buffer += self.buffer[:-keep_len]
+                        self.buffer = self.buffer[-keep_len:]
+                    break
+
             elif self.state == "IN_TAG":
                 nl_idx = self.buffer.find("\n")
                 if nl_idx != -1:
@@ -900,7 +920,7 @@ def process(self, chunk: str) -> str:
                     break
 
             elif self.state == "IN_BLOCK":
-                end_idx = self.buffer.find(self.TAG_END)
+                end_idx = buf_low.find(self.TAG_END.lower())
                 if end_idx != -1:
                     content = self.buffer[:end_idx]
                     if self.current_role != "tool":
@@ -909,7 +929,6 @@ def process(self, chunk: str) -> str:
                     self.state = "NORMAL"
                     self.current_role = ""
                 else:
-                    # Yield safe part and keep potential split TAG_END
                     keep_len = len(self.TAG_END) - 1
                     if self.current_role != "tool":
                         if len(self.buffer) > keep_len:
@@ -926,8 +945,10 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         res = ""
         if self.state == "IN_TOOL":
-            if self.TOOL_PREFIX not in self.block_buffer.lower():
+            if self.ORPHAN_START.lower() not in self.block_buffer.lower():
                 res = f"{self.TOOL_START}{self.block_buffer}"
+        elif self.state == "IN_ORPHAN":
+            res = f"{self.ORPHAN_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
         elif self.state in ("IN_RESP", "IN_HINT"):

From c81c2cefd8bd76e83c0edf84ebd2a0a11ad28ab8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 12:48:20 +0700
Subject: [PATCH 092/236] Update required dependencies

---
 pyproject.toml | 2 +-
 uv.lock        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 47cd86f..58391ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
     "fastapi>=0.128.6",
-    "gemini-webapi>=1.19.0",
+    "gemini-webapi>=1.19.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
diff --git a/uv.lock b/uv.lock
index ea28c0e..34b5cc8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -123,7 +123,7 @@ dev = [{ name = "ruff", specifier = ">=0.15.0" }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.19.0"
+version = "1.19.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx", extra = ["http2"] },
@@ -131,9 +131,9 @@ dependencies = [
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5f/1f/8314b620db12855e6aa9c55e05428fa30eb7f00fb61b1de7db42734ef2b2/gemini_webapi-1.19.0.tar.gz", hash = "sha256:1f65e6a2e4a461f4ed4fb01dc76c2de4ed517af549f6ce34b96b9986c11af5dd", size = 266822, upload-time = "2026-02-09T23:16:34.446Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/d1/c61ee05ca6e20c70caa25a3f0f12e2a810bbc6b39e588ff937821de43690/gemini_webapi-1.19.1.tar.gz", hash = "sha256:a52afdfc2d9f6e87a6ae8cd926fb2ce5c562a0a99dc75ce97d8d50ffc2a3e133", size = 266761, upload-time = "2026-02-10T05:44:29.195Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/3b/71567ce13357d1139dfa0578c4073d6a8c523c4a28f3843194b639bf9d1e/gemini_webapi-1.19.0-py3-none-any.whl", hash = "sha256:47ab49f018cc01bf4b772910f7843af895f5e43d5a18b5ec7063b6f61e535921", size = 63498, upload-time = "2026-02-09T23:16:33.328Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/0b/7a73919ee8621f6559ae679a20d754959b989a3f09cf20478d89971f40b4/gemini_webapi-1.19.1-py3-none-any.whl", hash = "sha256:0dc4c7daa58d281722d52d6acf520f2e850c6c3c6020080fdbc5f77736c8be9a", size = 63500, upload-time = "2026-02-10T05:44:27.692Z" },
 ]
 
 [[package]]

From a17082532189d852b61e8b791a49844b4bb922f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Feb 2026 12:53:10 +0700
Subject: [PATCH 093/236] Add dependabot

---
 .github/dependabot.yaml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .github/dependabot.yaml

diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
new file mode 100644
index 0000000..5ace460
--- /dev/null
+++ b/.github/dependabot.yaml
@@ -0,0 +1,6 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"

From a0136af06d4e43bd6ac4a9ba7a2b05509358e187 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 10:37:50 +0700
Subject: [PATCH 094/236] Refactor: Implement the logic changes recommended by
 Copilot

- Remove orphaned tool calls to prevent leaking internal tool-call information.
- Define limits for the `timeout`, `watchdog_timeout`, and `refresh_interval` ranges.
- Revise the fuzzy match logic to prevent accidental session reuse and avoid any possible content leakage between requests.
---
 app/server/chat.py   |   4 +-
 app/services/lmdb.py | 252 ++++++++++++++++++++++++-------------------
 app/utils/config.py  |  10 +-
 config/config.yaml   |   6 +-
 4 files changed, 151 insertions(+), 121 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 414349a..30a6b3a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -947,11 +947,9 @@ def flush(self) -> str:
         if self.state == "IN_TOOL":
             if self.ORPHAN_START.lower() not in self.block_buffer.lower():
                 res = f"{self.TOOL_START}{self.block_buffer}"
-        elif self.state == "IN_ORPHAN":
-            res = f"{self.ORPHAN_START}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
-        elif self.state in ("IN_RESP", "IN_HINT"):
+        elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT"):
             res = ""
         elif self.state == "NORMAL":
             res = self.buffer
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index a94e090..c90f537 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -25,6 +25,7 @@
 def _fuzzy_normalize(text: str | None) -> str | None:
     """
     Lowercase and remove all whitespace and punctuation.
+    Used as a fallback for complex/malformed contents matching.
     """
     if text is None:
         return None
@@ -38,7 +39,7 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     if text is None:
         return None
 
-    # Unicode normalization
+    # Unicode normalization to NFC
     text = unicodedata.normalize("NFC", text)
 
     # Basic cleaning
@@ -49,7 +50,8 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     if fuzzy:
         return _fuzzy_normalize(text)
 
-    return text if text else None
+    # Always strip to ensure trailing newlines/spaces don't break exact matches
+    return text.strip() if text.strip() else None
 
 
 def _hash_message(message: Message, fuzzy: bool = False) -> str:
@@ -157,7 +159,6 @@ def __init__(
             max_db_size: Maximum database size in bytes (default: 256 MB)
             retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup)
         """
-
         if db_path is None:
             db_path = g_config.storage.path
         if max_db_size is None:
@@ -174,9 +175,11 @@ def __init__(
         self._init_environment()
 
     def _ensure_db_path(self) -> None:
+        """Create database directory if it doesn't exist."""
         self.db_path.parent.mkdir(parents=True, exist_ok=True)
 
     def _init_environment(self) -> None:
+        """Initialize LMDB environment."""
         try:
             self._env = lmdb.open(
                 str(self.db_path),
@@ -187,12 +190,18 @@ def _init_environment(self) -> None:
                 meminit=False,
             )
             logger.info(f"LMDB environment initialized at {self.db_path}")
-        except Exception as e:
+        except lmdb.Error as e:
             logger.error(f"Failed to initialize LMDB environment: {e}")
             raise
 
     @contextmanager
     def _get_transaction(self, write: bool = False):
+        """
+        Context manager for LMDB transactions.
+
+        Args:
+            write: Whether the transaction should be writable.
+        """
         if not self._env:
             raise RuntimeError("LMDB environment not initialized")
 
@@ -201,12 +210,57 @@ def _get_transaction(self, write: bool = False):
             yield txn
             if write:
                 txn.commit()
-        except Exception:
+        except lmdb.Error:
+            if write:
+                txn.abort()
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in LMDB transaction: {e}")
             if write:
                 txn.abort()
             raise
-        finally:
-            pass  # Transaction is automatically cleaned up
+
+    @staticmethod
+    def _decode_index_value(data: bytes) -> List[str]:
+        """Decode index value, handling both legacy single-string and new list-of-strings formats."""
+        if not data:
+            return []
+        if data.startswith(b"["):
+            try:
+                val = orjson.loads(data)
+                if isinstance(val, list):
+                    return [str(v) for v in val]
+            except orjson.JSONDecodeError:
+                pass
+        try:
+            return [data.decode("utf-8")]
+        except UnicodeDecodeError:
+            return []
+
+    @staticmethod
+    def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
+        """Add a storage key to the index for a given hash, avoiding duplicates."""
+        idx_key = f"{prefix}{hash_val}".encode("utf-8")
+        existing = txn.get(idx_key)
+        keys = LMDBConversationStore._decode_index_value(existing) if existing else []
+        if storage_key not in keys:
+            keys.append(storage_key)
+            txn.put(idx_key, orjson.dumps(keys))
+
+    @staticmethod
+    def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
+        """Remove a specific storage key from the index for a given hash."""
+        idx_key = f"{prefix}{hash_val}".encode("utf-8")
+        existing = txn.get(idx_key)
+        if not existing:
+            return
+        keys = LMDBConversationStore._decode_index_value(existing)
+        if storage_key in keys:
+            keys.remove(storage_key)
+            if keys:
+                txn.put(idx_key, orjson.dumps(keys))
+            else:
+                txn.delete(idx_key)
 
     def store(
         self,
@@ -226,12 +280,10 @@ def store(
         if not conv:
             raise ValueError("Messages list cannot be empty")
 
-        # Sanitize messages before computing hash and storing to ensure consistency
-        # with the search (find) logic, which also sanitizes its prefix.
+        # Ensure consistent sanitization before hashing and storage
         sanitized_messages = self.sanitize_assistant_messages(conv.messages)
         conv.messages = sanitized_messages
 
-        # Generate hash for the message list
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
         fuzzy_hash = _hash_conversation(conv.client_id, conv.model, conv.messages, fuzzy=True)
         storage_key = custom_key or message_hash
@@ -247,21 +299,19 @@ def store(
             with self._get_transaction(write=True) as txn:
                 txn.put(storage_key.encode("utf-8"), value, overwrite=True)
 
-                txn.put(
-                    f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"),
-                    storage_key.encode("utf-8"),
-                )
-
-                txn.put(
-                    f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"),
-                    storage_key.encode("utf-8"),
-                )
+                self._update_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, storage_key)
+                self._update_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, storage_key)
 
                 logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}")
                 return storage_key
 
+        except lmdb.Error as e:
+            logger.error(f"LMDB error while storing messages with key {storage_key[:12]}: {e}")
+            raise
         except Exception as e:
-            logger.error(f"Failed to store messages with key {storage_key[:12]}: {e}")
+            logger.error(
+                f"Unexpected error while storing messages with key {storage_key[:12]}: {e}"
+            )
             raise
 
     def get(self, key: str) -> Optional[ConversationInStore]:
@@ -280,29 +330,37 @@ def get(self, key: str) -> Optional[ConversationInStore]:
                 if not data:
                     return None
 
-                storage_data = orjson.loads(data)  # type: ignore
+                storage_data = orjson.loads(data)
                 conv = ConversationInStore.model_validate(storage_data)
 
                 logger.debug(f"Retrieved {len(conv.messages)} messages with key: {key[:12]}")
                 return conv
-
+        except (lmdb.Error, orjson.JSONDecodeError) as e:
+            logger.error(f"Failed to retrieve/parse messages with key {key[:12]}: {e}")
+            return None
         except Exception as e:
-            logger.error(f"Failed to retrieve messages with key {key[:12]}: {e}")
+            logger.error(f"Unexpected error retrieving messages with key {key[:12]}: {e}")
             return None
 
     def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]:
         """
         Search conversation data by message list.
+        Tries raw matching, then sanitized matching, and finally fuzzy matching.
+
+        Args:
+            model: Model name
+            messages: List of messages to match
+
+        Returns:
+            ConversationInStore or None if not found
         """
         if not messages:
             return None
 
-        # --- Find with raw messages ---
         if conv := self._find_by_message_list(model, messages):
             logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.")
             return conv
 
-        # --- Find with cleaned messages ---
         cleaned_messages = self.sanitize_assistant_messages(messages)
         if cleaned_messages != messages:
             if conv := self._find_by_message_list(model, cleaned_messages):
@@ -311,7 +369,6 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
                 )
                 return conv
 
-        # --- Find with fuzzy matching ---
         if conv := self._find_by_message_list(model, messages, fuzzy=True):
             logger.debug(f"Session found for '{model}' with fuzzy matching.")
             return conv
@@ -325,18 +382,49 @@ def _find_by_message_list(
         messages: List[Message],
         fuzzy: bool = False,
     ) -> Optional[ConversationInStore]:
-        """Internal find implementation based on a message list."""
+        """
+        Internal find implementation based on a message list.
+
+        Args:
+            model: Model name
+            messages: Message list to hash
+            fuzzy: Whether to use fuzzy hashing
+
+        Returns:
+            ConversationInStore or None if not found
+        """
         prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX
+        target_len = len(messages)
+
         for c in g_config.gemini.clients:
             message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy)
             key = f"{prefix}{message_hash}"
             try:
                 with self._get_transaction(write=False) as txn:
-                    if mapped := txn.get(key.encode("utf-8")):  # type: ignore
-                        return self.get(mapped.decode("utf-8"))  # type: ignore
-            except Exception as e:
+                    if mapped := txn.get(key.encode("utf-8")):
+                        candidate_keys = self._decode_index_value(mapped)
+                        # Try candidates from newest to oldest
+                        for ck in reversed(candidate_keys):
+                            if conv := self.get(ck):
+                                if len(conv.messages) != target_len:
+                                    continue
+
+                                if fuzzy:
+                                    # For fuzzy matching, verify each message hash individually
+                                    # to prevent semantic collisions (e.g., "1.2" vs "12")
+                                    match_found = True
+                                    for i in range(target_len):
+                                        if _hash_message(
+                                            conv.messages[i], fuzzy=True
+                                        ) != _hash_message(messages[i], fuzzy=True):
+                                            match_found = False
+                                            break
+                                    if not match_found:
+                                        continue
+                                return conv
+            except lmdb.Error as e:
                 logger.error(
-                    f"Failed to retrieve messages by message list for hash {message_hash} and client {c.id}: {e}"
+                    f"LMDB error while searching for hash {message_hash} and client {c.id}: {e}"
                 )
                 continue
 
@@ -345,74 +433,42 @@ def _find_by_message_list(
         return None
 
     def exists(self, key: str) -> bool:
-        """
-        Check if a key exists in the store.
-
-        Args:
-            key: Storage key to check
-
-        Returns:
-            bool: True if key exists, False otherwise
-        """
+        """Check if a key exists in the store."""
         try:
             with self._get_transaction(write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
-        except Exception as e:
+        except lmdb.Error as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
             return False
 
     def delete(self, key: str) -> Optional[ConversationInStore]:
-        """
-        Delete conversation model by key.
-
-        Args:
-            key: Storage key to delete
-
-        Returns:
-            ConversationInStore: The deleted conversation data, or None if not found
-        """
+        """Delete conversation model by key."""
         try:
             with self._get_transaction(write=True) as txn:
-                # Get data first to clean up hash mapping
                 data = txn.get(key.encode("utf-8"))
                 if not data:
                     return None
 
-                storage_data = orjson.loads(data)  # type: ignore
+                storage_data = orjson.loads(data)
                 conv = ConversationInStore.model_validate(storage_data)
                 message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
                 fuzzy_hash = _hash_conversation(
                     conv.client_id, conv.model, conv.messages, fuzzy=True
                 )
 
-                # Delete main data
                 txn.delete(key.encode("utf-8"))
 
-                # Clean up hash mapping if it exists
-                if message_hash and key != message_hash:
-                    txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
-
-                # Always clean up fuzzy mapping
-                txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"))
+                self._remove_from_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, key)
+                self._remove_from_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, key)
 
                 logger.debug(f"Deleted messages with key: {key[:12]}")
                 return conv
-
-        except Exception as e:
+        except (lmdb.Error, orjson.JSONDecodeError) as e:
             logger.error(f"Failed to delete messages with key {key[:12]}: {e}")
             return None
 
     def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
-        """
-        List all keys in the store, optionally filtered by prefix.
-
-        Args:
-            prefix: Optional prefix to filter keys
-            limit: Optional limit on number of keys returned
-
-        Returns:
-            List of keys
-        """
+        """List all keys in the store, optionally filtered by prefix."""
         keys = []
         try:
             with self._get_transaction(write=False) as txn:
@@ -422,7 +478,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
                 count = 0
                 for key, _ in cursor:
                     key_str = key.decode("utf-8")
-                    # Skip internal hash mappings
+                    # Skip internal index mappings
                     if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
                         self.FUZZY_LOOKUP_PREFIX
                     ):
@@ -431,25 +487,14 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
                     if not prefix or key_str.startswith(prefix):
                         keys.append(key_str)
                         count += 1
-
                         if limit and count >= limit:
                             break
-
-        except Exception as e:
+        except lmdb.Error as e:
             logger.error(f"Failed to list keys: {e}")
-
         return keys
 
     def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
-        """
-        Delete conversations older than the given retention period.
-
-        Args:
-            retention_days: Optional override for retention period in days.
-
-        Returns:
-            Number of conversations removed.
-        """
+        """Delete conversations older than the given retention period."""
         retention_value = (
             self.retention_days if retention_days is None else max(0, int(retention_days))
         )
@@ -463,7 +508,6 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         try:
             with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
-
                 for key_bytes, value_bytes in cursor:
                     key_str = key_bytes.decode("utf-8")
                     if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
@@ -472,9 +516,9 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
                         continue
 
                     try:
-                        storage_data = orjson.loads(value_bytes)  # type: ignore[arg-type]
+                        storage_data = orjson.loads(value_bytes)
                         conv = ConversationInStore.model_validate(storage_data)
-                    except Exception as exc:
+                    except (orjson.JSONDecodeError, Exception) as exc:
                         logger.warning(f"Failed to decode record for key {key_str}: {exc}")
                         continue
 
@@ -484,7 +528,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
                     if timestamp < cutoff:
                         expired_entries.append((key_str, conv))
-        except Exception as exc:
+        except lmdb.Error as exc:
             logger.error(f"Failed to scan LMDB for retention cleanup: {exc}")
             raise
 
@@ -501,15 +545,13 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
                     message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
                     if message_hash:
-                        if key_str != message_hash:
-                            txn.delete(f"{self.HASH_LOOKUP_PREFIX}{message_hash}".encode("utf-8"))
-
+                        self._remove_from_index(txn, self.HASH_LOOKUP_PREFIX, message_hash, key_str)
                         fuzzy_hash = _hash_conversation(
                             conv.client_id, conv.model, conv.messages, fuzzy=True
                         )
-                        txn.delete(f"{self.FUZZY_LOOKUP_PREFIX}{fuzzy_hash}".encode("utf-8"))
+                        self._remove_from_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, key_str)
                     removed += 1
-        except Exception as exc:
+        except lmdb.Error as exc:
             logger.error(f"Failed to delete expired conversations: {exc}")
             raise
 
@@ -521,19 +563,13 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
         return removed
 
     def stats(self) -> Dict[str, Any]:
-        """
-        Get database statistics.
-
-        Returns:
-            Dict with database statistics
-        """
+        """Get database statistics."""
         if not self._env:
             logger.error("LMDB environment not initialized")
             return {}
-
         try:
             return self._env.stat()
-        except Exception as e:
+        except lmdb.Error as e:
             logger.error(f"Failed to get database stats: {e}")
             return {}
 
@@ -550,21 +586,15 @@ def __del__(self):
 
     @staticmethod
     def remove_think_tags(text: str) -> str:
-        """
-        Remove all <think>...</think> tags and strip whitespace.
-        """
+        """Remove all <think>...</think> tags and strip whitespace."""
         if not text:
             return text
-        # Remove all think blocks anywhere in the text
         cleaned_content = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
         return cleaned_content.strip()
 
     @staticmethod
     def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
-        """
-        Produce a canonical history where assistant messages are cleaned of
-        internal markers and tool call blocks are moved to metadata.
-        """
+        """Clean assistant messages of internal markers and move tool calls to metadata."""
         cleaned_messages = []
         for msg in messages:
             if msg.role == "assistant":
@@ -596,7 +626,6 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                     for item in msg.content:
                         if isinstance(item, ContentItem) and item.type == "text" and item.text:
                             text = LMDBConversationStore.remove_think_tags(item.text)
-
                             if not msg.tool_calls:
                                 text, extracted = extract_tool_calls(text)
                                 if extracted:
@@ -625,5 +654,4 @@ def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
                     cleaned_messages.append(msg)
             else:
                 cleaned_messages.append(msg)
-
         return cleaned_messages
diff --git a/app/utils/config.py b/app/utils/config.py
index e62832d..3b24931 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -83,11 +83,15 @@ class GeminiConfig(BaseModel):
         default="append",
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
-    timeout: int = Field(default=300, ge=1, description="Init timeout")
-    watchdog_timeout: int = Field(default=60, ge=1, description="Watchdog timeout")
+    timeout: int = Field(default=300, ge=30, description="Init timeout")
+    watchdog_timeout: int = Field(
+        default=60, ge=10, le=75, description="Watchdog timeout in seconds (Not more than 75s)"
+    )
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
-        default=540, ge=1, description="Interval in seconds to refresh Gemini cookies"
+        default=540,
+        ge=60,
+        description="Interval in seconds to refresh Gemini cookies (Not less than 60s)",
     )
     verbose: bool = Field(False, description="Enable verbose logging for Gemini API requests")
     max_chars_per_request: int = Field(
diff --git a/config/config.yaml b/config/config.yaml
index 2873d48..3d5e6f4 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -22,10 +22,10 @@ gemini:
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
-  timeout: 300             # Init timeout in seconds
-  watchdog_timeout: 60     # Watchdog timeout in seconds (No longer than 75 seconds)
+  timeout: 300             # Init timeout in seconds (Not less than 30s)
+  watchdog_timeout: 60     # Watchdog timeout in seconds (Not more than 75s)
   auto_refresh: true       # Auto-refresh session cookies
-  refresh_interval: 540    # Refresh interval in seconds
+  refresh_interval: 540    # Refresh interval in seconds (Not less than 60s)
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
   model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)

From 5eb9f509d451d76739da1f34b5003b1d7628279b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 12:24:06 +0700
Subject: [PATCH 095/236] Refactor: Optimize fuzzy matching logic

---
 app/services/lmdb.py | 31 ++++++++++++++++---------------
 app/utils/config.py  |  2 +-
 scripts/dump_lmdb.py |  2 +-
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index c90f537..4b57f60 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -370,7 +370,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
                 return conv
 
         if conv := self._find_by_message_list(model, messages, fuzzy=True):
-            logger.debug(f"Session found for '{model}' with fuzzy matching.")
+            logger.debug(
+                f"Session found for '{model}' with {len(messages)} fuzzy matching messages."
+            )
             return conv
 
         logger.debug(f"No session found for '{model}' with {len(messages)} messages.")
@@ -396,6 +398,8 @@ def _find_by_message_list(
         prefix = self.FUZZY_LOOKUP_PREFIX if fuzzy else self.HASH_LOOKUP_PREFIX
         target_len = len(messages)
 
+        target_hashes = [_hash_message(m, fuzzy=fuzzy) for m in messages]
+
         for c in g_config.gemini.clients:
             message_hash = _hash_conversation(c.id, model, messages, fuzzy=fuzzy)
             key = f"{prefix}{message_hash}"
@@ -403,25 +407,22 @@ def _find_by_message_list(
                 with self._get_transaction(write=False) as txn:
                     if mapped := txn.get(key.encode("utf-8")):
                         candidate_keys = self._decode_index_value(mapped)
-                        # Try candidates from newest to oldest
                         for ck in reversed(candidate_keys):
                             if conv := self.get(ck):
                                 if len(conv.messages) != target_len:
                                     continue
 
-                                if fuzzy:
-                                    # For fuzzy matching, verify each message hash individually
-                                    # to prevent semantic collisions (e.g., "1.2" vs "12")
-                                    match_found = True
-                                    for i in range(target_len):
-                                        if _hash_message(
-                                            conv.messages[i], fuzzy=True
-                                        ) != _hash_message(messages[i], fuzzy=True):
-                                            match_found = False
-                                            break
-                                    if not match_found:
-                                        continue
-                                return conv
+                                match_found = True
+                                for i in range(target_len):
+                                    if (
+                                        _hash_message(conv.messages[i], fuzzy=fuzzy)
+                                        != target_hashes[i]
+                                    ):
+                                        match_found = False
+                                        break
+
+                                if match_found:
+                                    return conv
             except lmdb.Error as e:
                 logger.error(
                     f"LMDB error while searching for hash {message_hash} and client {c.id}: {e}"
diff --git a/app/utils/config.py b/app/utils/config.py
index 3b24931..4c1709f 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -83,7 +83,7 @@ class GeminiConfig(BaseModel):
         default="append",
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
-    timeout: int = Field(default=300, ge=30, description="Init timeout")
+    timeout: int = Field(default=300, ge=30, description="Init timeout in seconds")
     watchdog_timeout: int = Field(
         default=60, ge=10, le=75, description="Watchdog timeout in seconds (Not more than 75s)"
     )
diff --git a/scripts/dump_lmdb.py b/scripts/dump_lmdb.py
index b06b1b4..a331325 100644
--- a/scripts/dump_lmdb.py
+++ b/scripts/dump_lmdb.py
@@ -42,7 +42,7 @@ def dump_lmdb(path: Path, keys: Iterable[str] | None = None) -> None:
             records = _dump_all(txn)
     env.close()
 
-    print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode())
+    print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode("utf-8"))
 
 
 def main() -> None:

From 971f2c70f81ac82640cb6a9f3c800be0d7c1143a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 12:28:11 +0700
Subject: [PATCH 096/236] Update dependencies

---
 pyproject.toml |  2 +-
 uv.lock        | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 58391ff..d3a1aaf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.128.6",
+    "fastapi>=0.128.7",
     "gemini-webapi>=1.19.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
diff --git a/uv.lock b/uv.lock
index 34b5cc8..c038f53 100644
--- a/uv.lock
+++ b/uv.lock
@@ -65,7 +65,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.128.6"
+version = "0.128.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -74,9 +74,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/83/d1/195005b5e45b443e305136df47ee7df4493d782e0c039dd0d97065580324/fastapi-0.128.6.tar.gz", hash = "sha256:0cb3946557e792d731b26a42b04912f16367e3c3135ea8290f620e234f2b604f", size = 374757, upload-time = "2026-02-09T17:27:03.541Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a0/fc/af386750b3fd8d8828167e4c82b787a8eeca2eca5c5429c9db8bb7c70e04/fastapi-0.128.7.tar.gz", hash = "sha256:783c273416995486c155ad2c0e2b45905dedfaf20b9ef8d9f6a9124670639a24", size = 375325, upload-time = "2026-02-10T12:26:40.968Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/24/58/a2c4f6b240eeb148fb88cdac48f50a194aba760c1ca4988c6031c66a20ee/fastapi-0.128.6-py3-none-any.whl", hash = "sha256:bb1c1ef87d6086a7132d0ab60869d6f1ee67283b20fbf84ec0003bd335099509", size = 103674, upload-time = "2026-02-09T17:27:02.355Z" },
+    { url = "https://files.pythonhosted.org/packages/af/1a/f983b45661c79c31be575c570d46c437a5409b67a939c1b3d8d6b3ed7a7f/fastapi-0.128.7-py3-none-any.whl", hash = "sha256:6bd9bd31cb7047465f2d3fa3ba3f33b0870b17d4eaf7cdb36d1576ab060ad662", size = 103630, upload-time = "2026-02-10T12:26:39.414Z" },
 ]
 
 [[package]]
@@ -106,8 +106,8 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.128.6" },
-    { name = "gemini-webapi", specifier = ">=1.19.0" },
+    { name = "fastapi", specifier = ">=0.128.7" },
+    { name = "gemini-webapi", specifier = ">=1.19.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },

From cad23795e41a97fd7cb7e5dd371c03d1bdbec607 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 20:46:42 +0700
Subject: [PATCH 097/236] Refactor: Update Markdown unescape helpers to prevent
 impacting clients like Roo Code

---
 app/services/client.py |  6 ++++++
 app/utils/helper.py    | 44 ++++++++++++++++--------------------------
 2 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 3cdd839..5d248c2 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -14,6 +14,10 @@
     save_url_to_tempfile,
 )
 
+COMMONMARK_UNESCAPE_RE = re.compile(
+    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
+)  # See: https://spec.commonmark.org/current/#backslash-escapes
+
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -194,6 +198,8 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
+        text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text)
+
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
             if match:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 384f5cd..67bfa55 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -191,7 +191,6 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
     if not text:
         return text, []
 
-    # Clean hints FIRST so they don't interfere with tool call regexes (e.g. example calls in hint)
     cleaned = strip_system_hints(text)
 
     tool_calls: list[ToolCall] = []
@@ -237,33 +236,24 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             )
         )
 
-    def _replace_block(match: re.Match[str]) -> str:
-        block_content = match.group(1)
-        if not block_content:
-            return match.group(0)
-
-        is_tool_block = bool(TOOL_CALL_RE.search(block_content))
-
-        if is_tool_block:
-            if extract:
-                for call_match in TOOL_CALL_RE.finditer(block_content):
-                    name = (call_match.group(1) or "").strip()
-                    raw_args = (call_match.group(2) or "").strip()
-                    _create_tool_call(name, raw_args)
-            return ""
-        else:
-            return match.group(0)
-
-    def _replace_orphan(match: re.Match[str]) -> str:
-        if extract:
-            name = (match.group(1) or "").strip()
-            raw_args = (match.group(2) or "").strip()
-            _create_tool_call(name, raw_args)
-        return ""
-
-    cleaned = TOOL_BLOCK_RE.sub(_replace_block, cleaned)
-    cleaned = TOOL_CALL_RE.sub(_replace_orphan, cleaned)
+    all_calls = []
+    for match in TOOL_CALL_RE.finditer(cleaned):
+        all_calls.append(
+            {
+                "start": match.start(),
+                "name": (match.group(1) or "").strip(),
+                "args": (match.group(2) or "").strip(),
+            }
+        )
+
+    all_calls.sort(key=lambda x: x["start"])
+
+    if extract:
+        for call in all_calls:
+            _create_tool_call(call["name"], call["args"])
 
+    cleaned = TOOL_BLOCK_RE.sub("", cleaned)
+    cleaned = TOOL_CALL_RE.sub("", cleaned)
     cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
     cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
 

From 795b8d88a3cfb29e46c4e369c277e810308cc8e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Feb 2026 21:20:11 +0700
Subject: [PATCH 098/236] Refactor: Update Markdown unescape helpers to prevent
 impacting clients like Roo Code

---
 app/services/client.py |  6 ------
 app/utils/helper.py    | 37 +++++++++++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 5d248c2..3cdd839 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -14,10 +14,6 @@
     save_url_to_tempfile,
 )
 
-COMMONMARK_UNESCAPE_RE = re.compile(
-    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
-)  # See: https://spec.commonmark.org/current/#backslash-escapes
-
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -198,8 +194,6 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
-        text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text)
-
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
             if match:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 67bfa55..ce781bd 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -34,6 +34,9 @@
 RESPONSE_ITEM_RE = re.compile(
     r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE
 )
+COMMONMARK_UNESCAPE_RE = re.compile(
+    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
+)  # See: https://spec.commonmark.org/current/#backslash-escapes
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -192,9 +195,12 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
         return text, []
 
     cleaned = strip_system_hints(text)
-
     tool_calls: list[ToolCall] = []
 
+    def _unescape_markdown(s: str) -> str:
+        """Restores characters escaped for Markdown rendering."""
+        return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+
     def _create_tool_call(name: str, raw_args: str) -> None:
         if not extract:
             return
@@ -202,20 +208,33 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
+        prev_name = ""
+        while name != prev_name:
+            prev_name = name
+            name = _unescape_markdown(name)
+
+        def _try_parse_json(s: str) -> dict | None:
+            try:
+                return orjson.loads(s)
+            except orjson.JSONDecodeError:
+                try:
+                    return orjson.loads(_unescape_markdown(s))
+                except orjson.JSONDecodeError:
+                    return None
+
         arguments = raw_args
-        try:
-            parsed_args = orjson.loads(raw_args)
-            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
-        except orjson.JSONDecodeError:
+        parsed_args = _try_parse_json(raw_args)
+
+        if parsed_args is None:
             json_match = re.search(r"({.*})", raw_args, re.DOTALL)
             if json_match:
                 potential_json = json_match.group(1)
-                try:
-                    parsed_args = orjson.loads(potential_json)
+                parsed_args = _try_parse_json(potential_json)
+                if parsed_args is not None:
                     arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
                         "utf-8"
                     )
-                except orjson.JSONDecodeError:
+                else:
                     logger.warning(
                         f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
                     )
@@ -223,6 +242,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
                 logger.warning(
                     f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
                 )
+        else:
+            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
 
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")

From e85252a3c6000af5f5094560ad95aa8a8e78c184 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 07:59:28 +0700
Subject: [PATCH 099/236] Revert "Refactor: Update Markdown unescape helpers to
 prevent impacting clients like Roo Code"

This reverts commit 795b8d88a3cfb29e46c4e369c277e810308cc8e8.
---
 app/services/client.py |  6 ++++++
 app/utils/helper.py    | 37 ++++++++-----------------------------
 2 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 3cdd839..5d248c2 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -14,6 +14,10 @@
     save_url_to_tempfile,
 )
 
+COMMONMARK_UNESCAPE_RE = re.compile(
+    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
+)  # See: https://spec.commonmark.org/current/#backslash-escapes
+
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -194,6 +198,8 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
+        text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text)
+
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
             if match:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index ce781bd..67bfa55 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -34,9 +34,6 @@
 RESPONSE_ITEM_RE = re.compile(
     r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE
 )
-COMMONMARK_UNESCAPE_RE = re.compile(
-    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
-)  # See: https://spec.commonmark.org/current/#backslash-escapes
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -195,11 +192,8 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
         return text, []
 
     cleaned = strip_system_hints(text)
-    tool_calls: list[ToolCall] = []
 
-    def _unescape_markdown(s: str) -> str:
-        """Restores characters escaped for Markdown rendering."""
-        return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+    tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
         if not extract:
@@ -208,33 +202,20 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        prev_name = ""
-        while name != prev_name:
-            prev_name = name
-            name = _unescape_markdown(name)
-
-        def _try_parse_json(s: str) -> dict | None:
-            try:
-                return orjson.loads(s)
-            except orjson.JSONDecodeError:
-                try:
-                    return orjson.loads(_unescape_markdown(s))
-                except orjson.JSONDecodeError:
-                    return None
-
         arguments = raw_args
-        parsed_args = _try_parse_json(raw_args)
-
-        if parsed_args is None:
+        try:
+            parsed_args = orjson.loads(raw_args)
+            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+        except orjson.JSONDecodeError:
             json_match = re.search(r"({.*})", raw_args, re.DOTALL)
             if json_match:
                 potential_json = json_match.group(1)
-                parsed_args = _try_parse_json(potential_json)
-                if parsed_args is not None:
+                try:
+                    parsed_args = orjson.loads(potential_json)
                     arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
                         "utf-8"
                     )
-                else:
+                except orjson.JSONDecodeError:
                     logger.warning(
                         f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
                     )
@@ -242,8 +223,6 @@ def _try_parse_json(s: str) -> dict | None:
                 logger.warning(
                     f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
                 )
-        else:
-            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
 
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")

From 4be41506c95673bf7a747f8ff2629a45c99b8309 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 12:17:12 +0700
Subject: [PATCH 100/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/server/chat.py     | 259 +++++++++++++++++++++++++----------------
 app/services/client.py |  39 ++++---
 app/services/lmdb.py   |   7 +-
 app/utils/helper.py    |  89 +++++++++-----
 4 files changed, 245 insertions(+), 149 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 30a6b3a..080d015 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -382,11 +382,16 @@ def _build_tool_prompt(
     )
     lines.append("[function_calls]")
     lines.append("[call:tool_name]")
-    lines.append('{"argument": "value"}')
+    lines.append("@args")
+    lines.append("")
+    lines.append("<<<ARG:arg_name>>>")
+    lines.append("value")
+    lines.append("<<<END:arg_name>>>")
+    lines.append("")
     lines.append("[/call]")
     lines.append("[/function_calls]")
     lines.append(
-        "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block."
+        "CRITICAL: Arguments MUST use <<<ARG:name>>>...<<<END:name>>> tags. Content inside tags can be any format."
     )
     lines.append(
         "If multiple tools are needed, list them sequentially within the same [function_calls] block."
@@ -394,7 +399,9 @@ def _build_tool_prompt(
     lines.append(
         "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
     )
-    lines.append("Note: Tool results are returned in a [function_responses] block.")
+    lines.append(
+        "Note: Tool results are returned in a [function_responses] block using @results and <<<RESULT>>> tags."
+    )
 
     return "\n".join(lines)
 
@@ -774,26 +781,44 @@ def __init__(self):
         self.current_role = ""
         self.block_buffer = ""
 
-        self.TOOL_START = "[function_calls]"
-        self.TOOL_END = "[/function_calls]"
-        self.ORPHAN_START = "[call:"
-        self.ORPHAN_END = "[/call]"
-        self.RESPONSE_START = "[function_responses]"
-        self.RESPONSE_END = "[/function_responses]"
-        self.TAG_START = "<|im_start|>"
-        self.TAG_END = "<|im_end|>"
-        self.HINT_START = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
-        self.HINT_END = TOOL_HINT_LINE_END
-
-        self.WATCH_MARKERS = [
-            self.TOOL_START,
-            self.ORPHAN_START,
-            self.RESPONSE_START,
-            self.TAG_START,
-            self.TAG_END,
-        ]
-        if self.HINT_START:
-            self.WATCH_MARKERS.append(self.HINT_START)
+        self.STATE_MARKERS = {
+            "TOOL": {
+                "starts": ["[function_calls]", "\\[function_calls\\]"],
+                "ends": ["[/function_calls]", "\\[/function_calls\\]"],
+            },
+            "ORPHAN": {
+                "starts": ["[call:", "\\[call:"],
+                "ends": ["[/call]", "\\[/call\\]"],
+            },
+            "RESP": {
+                "starts": ["[function_responses]", "\\[function_responses\\]"],
+                "ends": ["[/function_responses]", "\\[/function_responses\\]"],
+            },
+            "ARG": {
+                "starts": ["<<<ARG:", "\\<\\<\\<ARG:"],
+                "ends": ["<<<END:", "\\<\\<\\<END:"],
+            },
+            "RESULT": {
+                "starts": ["<<<RESULT>>>", "\\<\\<\\<RESULT\\>\\>\\>"],
+                "ends": ["<<<END:RESULT>>>", "\\<\\<\\<END:RESULT\\>\\>\\>"],
+            },
+            "TAG": {
+                "starts": ["<|im_start|>", "\\<|im_start|\\>"],
+                "ends": ["<|im_end|>", "\\<|im_end|\\>"],
+            },
+        }
+
+        hint_start = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
+        if hint_start:
+            self.STATE_MARKERS["HINT"] = {
+                "starts": [hint_start],
+                "ends": [TOOL_HINT_LINE_END],
+            }
+
+        self.WATCH_MARKERS = []
+        for cfg in self.STATE_MARKERS.values():
+            self.WATCH_MARKERS.extend(cfg["starts"])
+            self.WATCH_MARKERS.extend(cfg.get("ends", []))
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
@@ -802,25 +827,12 @@ def process(self, chunk: str) -> str:
         while self.buffer:
             buf_low = self.buffer.lower()
             if self.state == "NORMAL":
-                tool_idx = buf_low.find(self.TOOL_START)
-                orphan_idx = buf_low.find(self.ORPHAN_START)
-                resp_idx = buf_low.find(self.RESPONSE_START)
-                tag_idx = buf_low.find(self.TAG_START)
-                end_idx = buf_low.find(self.TAG_END)
-                hint_idx = buf_low.find(self.HINT_START) if self.HINT_START else -1
-
-                indices = [
-                    (i, t)
-                    for i, t in [
-                        (tool_idx, "TOOL"),
-                        (orphan_idx, "ORPHAN"),
-                        (resp_idx, "RESP"),
-                        (tag_idx, "TAG"),
-                        (end_idx, "END"),
-                        (hint_idx, "HINT"),
-                    ]
-                    if i != -1
-                ]
+                indices = []
+                for m_type, cfg in self.STATE_MARKERS.items():
+                    for p in cfg["starts"]:
+                        idx = buf_low.find(p.lower())
+                        if idx != -1:
+                            indices.append((idx, m_type, len(p)))
 
                 if not indices:
                     # Guard against split markers (case-insensitive)
@@ -838,76 +850,111 @@ def process(self, chunk: str) -> str:
                     break
 
                 indices.sort()
-                idx, m_type = indices[0]
+                idx, m_type, m_len = indices[0]
                 output.append(self.buffer[:idx])
                 self.buffer = self.buffer[idx:]
 
-                if m_type == "TOOL":
-                    self.state = "IN_TOOL"
-                    self.block_buffer = ""
-                    self.buffer = self.buffer[len(self.TOOL_START) :]
-                elif m_type == "ORPHAN":
-                    self.state = "IN_ORPHAN"
+                self.state = f"IN_{m_type}"
+                if m_type in ("TOOL", "ORPHAN"):
                     self.block_buffer = ""
-                    self.buffer = self.buffer[len(self.ORPHAN_START) :]
-                elif m_type == "RESP":
-                    self.state = "IN_RESP"
-                    self.buffer = self.buffer[len(self.RESPONSE_START) :]
-                elif m_type == "TAG":
-                    self.state = "IN_TAG"
-                    self.buffer = self.buffer[len(self.TAG_START) :]
-                elif m_type == "END":
-                    self.buffer = self.buffer[len(self.TAG_END) :]
-                elif m_type == "HINT":
-                    self.state = "IN_HINT"
-                    self.buffer = self.buffer[len(self.HINT_START) :]
+
+                self.buffer = self.buffer[m_len:]
 
             elif self.state == "IN_HINT":
-                end_idx = buf_low.find(self.HINT_END.lower())
-                if end_idx != -1:
-                    self.buffer = self.buffer[end_idx + len(self.HINT_END) :]
+                cfg = self.STATE_MARKERS["HINT"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.buffer = self.buffer[found_idx + found_len :]
+                    self.state = "NORMAL"
+                else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.buffer = self.buffer[-max_end_len:]
+                    break
+
+            elif self.state == "IN_ARG":
+                cfg = self.STATE_MARKERS["ARG"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    bracket_idx = self.buffer.find(">", found_idx + found_len)
+                    if bracket_idx != -1:
+                        end_pos = bracket_idx + 1
+                        while end_pos < len(self.buffer) and self.buffer[end_pos] == ">":
+                            end_pos += 1
+
+                        self.buffer = self.buffer[end_pos:]
+                        self.state = "NORMAL"
+                    else:
+                        break
+                else:
+                    break
+
+            elif self.state == "IN_RESULT":
+                cfg = self.STATE_MARKERS["RESULT"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
-                    keep_len = len(self.HINT_END) - 1
-                    if len(self.buffer) > keep_len:
-                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_RESP":
-                end_idx = buf_low.find(self.RESPONSE_END.lower())
-                if end_idx != -1:
-                    self.buffer = self.buffer[end_idx + len(self.RESPONSE_END) :]
+                cfg = self.STATE_MARKERS["RESP"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
-                    keep_len = len(self.RESPONSE_END) - 1
-                    if len(self.buffer) > keep_len:
-                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_TOOL":
-                end_idx = buf_low.find(self.TOOL_END.lower())
-                if end_idx != -1:
-                    self.block_buffer += self.buffer[:end_idx]
-                    self.buffer = self.buffer[end_idx + len(self.TOOL_END) :]
+                cfg = self.STATE_MARKERS["TOOL"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.block_buffer += self.buffer[:found_idx]
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
-                    keep_len = len(self.TOOL_END) - 1
-                    if len(self.buffer) > keep_len:
-                        self.block_buffer += self.buffer[:-keep_len]
-                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_ORPHAN":
-                end_idx = buf_low.find(self.ORPHAN_END.lower())
-                if end_idx != -1:
-                    self.block_buffer += self.buffer[:end_idx]
-                    self.buffer = self.buffer[end_idx + len(self.ORPHAN_END) :]
+                cfg = self.STATE_MARKERS["ORPHAN"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    self.block_buffer += self.buffer[:found_idx]
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
-                    keep_len = len(self.ORPHAN_END) - 1
-                    if len(self.buffer) > keep_len:
-                        self.block_buffer += self.buffer[:-keep_len]
-                        self.buffer = self.buffer[-keep_len:]
                     break
 
             elif self.state == "IN_TAG":
@@ -920,24 +967,30 @@ def process(self, chunk: str) -> str:
                     break
 
             elif self.state == "IN_BLOCK":
-                end_idx = buf_low.find(self.TAG_END.lower())
-                if end_idx != -1:
-                    content = self.buffer[:end_idx]
+                cfg = self.STATE_MARKERS["TAG"]
+                found_idx, found_len = -1, 0
+                for p in cfg["ends"]:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1 and (found_idx == -1 or idx < found_idx):
+                        found_idx, found_len = idx, len(p)
+
+                if found_idx != -1:
+                    content = self.buffer[:found_idx]
                     if self.current_role != "tool":
                         output.append(content)
-                    self.buffer = self.buffer[end_idx + len(self.TAG_END) :]
+                    self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                     self.current_role = ""
                 else:
-                    keep_len = len(self.TAG_END) - 1
+                    max_end_len = max(len(p) for p in cfg["ends"])
                     if self.current_role != "tool":
-                        if len(self.buffer) > keep_len:
-                            output.append(self.buffer[:-keep_len])
-                            self.buffer = self.buffer[-keep_len:]
+                        if len(self.buffer) > max_end_len:
+                            output.append(self.buffer[:-max_end_len])
+                            self.buffer = self.buffer[-max_end_len:]
                         break
                     else:
-                        if len(self.buffer) > keep_len:
-                            self.buffer = self.buffer[-keep_len:]
+                        if len(self.buffer) > max_end_len:
+                            self.buffer = self.buffer[-max_end_len:]
                         break
 
         return "".join(output)
@@ -945,11 +998,13 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         res = ""
         if self.state == "IN_TOOL":
-            if self.ORPHAN_START.lower() not in self.block_buffer.lower():
-                res = f"{self.TOOL_START}{self.block_buffer}"
+            orphan_starts = self.STATE_MARKERS["ORPHAN"]["starts"]
+            is_orphan = any(p.lower() in self.block_buffer.lower() for p in orphan_starts)
+            if not is_orphan:
+                res = f"{self.STATE_MARKERS['TOOL']['starts'][0]}{self.block_buffer}"
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
-        elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT"):
+        elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
             res = ""
         elif self.state == "NORMAL":
             res = self.buffer
diff --git a/app/services/client.py b/app/services/client.py
index 5d248c2..c955456 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -10,14 +10,11 @@
 from ..utils import g_config
 from ..utils.helper import (
     add_tag,
+    normalize_llm_text,
     save_file_to_tempfile,
     save_url_to_tempfile,
 )
 
-COMMONMARK_UNESCAPE_RE = re.compile(
-    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
-)  # See: https://spec.commonmark.org/current/#backslash-escapes
-
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -89,12 +86,12 @@ async def process_message(
 
         if isinstance(message.content, str):
             if message.content or message.role == "tool":
-                text_fragments.append(message.content or "{}")
+                text_fragments.append(message.content or "")
         elif isinstance(message.content, list):
             for item in message.content:
                 if item.type == "text":
                     if item.text or message.role == "tool":
-                        text_fragments.append(item.text or "{}")
+                        text_fragments.append(item.text or "")
                 elif item.type == "image_url":
                     if not item.image_url:
                         raise ValueError("Image URL cannot be empty")
@@ -113,14 +110,19 @@ async def process_message(
                     else:
                         raise ValueError("File must contain 'file_data' or 'url' key")
         elif message.content is None and message.role == "tool":
-            text_fragments.append("{}")
+            text_fragments.append("")
         elif message.content is not None:
             raise ValueError("Unsupported message content type.")
 
         if message.role == "tool":
             tool_name = message.name or "unknown"
-            combined_content = "\n".join(text_fragments).strip() or "{}"
-            res_block = f"[response:{tool_name}]\n{combined_content}\n[/response]"
+            combined_content = "\n".join(text_fragments).strip()
+            res_block = (
+                f"[response:{tool_name}]\n"
+                f"@results\n\n"
+                f"<<<RESULT>>>\n{combined_content}\n<<<END:RESULT>>>\n\n"
+                f"[/response]"
+            )
             if wrap_tool:
                 text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"]
             else:
@@ -130,17 +132,22 @@ async def process_message(
             tool_blocks: list[str] = []
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
+                formatted_args = "\n@args\n"
                 try:
                     parsed_args = orjson.loads(args_text)
-                    args_text = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
-                        "utf-8"
-                    )
+                    if isinstance(parsed_args, dict):
+                        for k, v in parsed_args.items():
+                            val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
+                            formatted_args += f"\n<<<ARG:{k}>>>\n{val_str}\n<<<END:{k}>>>\n"
+                    else:
+                        formatted_args += args_text
                 except orjson.JSONDecodeError:
-                    pass
-                tool_blocks.append(f"[call:{call.function.name}]{args_text}[/call]")
+                    formatted_args += args_text
+
+                tool_blocks.append(f"[call:{call.function.name}]{formatted_args}\n[/call]")
 
             if tool_blocks:
-                tool_section = "[function_calls]\n" + "".join(tool_blocks) + "\n[/function_calls]"
+                tool_section = "[function_calls]\n" + "\n".join(tool_blocks) + "\n[/function_calls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -198,7 +205,7 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
-        text = COMMONMARK_UNESCAPE_RE.sub(r"\1", text)
+        text = normalize_llm_text(text)
 
         def extract_file_path_from_display_text(text_content: str) -> str | None:
             match = re.match(FILE_PATH_PATTERN, text_content)
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 4b57f60..a90c684 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -1,7 +1,6 @@
 import hashlib
 import re
 import string
-import unicodedata
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -15,6 +14,7 @@
 from ..utils import g_config
 from ..utils.helper import (
     extract_tool_calls,
+    normalize_llm_text,
     remove_tool_call_blocks,
 )
 from ..utils.singleton import Singleton
@@ -39,11 +39,8 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     if text is None:
         return None
 
-    # Unicode normalization to NFC
-    text = unicodedata.normalize("NFC", text)
+    text = normalize_llm_text(text)
 
-    # Basic cleaning
-    text = text.replace("\r\n", "\n").replace("\r", "\n")
     text = LMDBConversationStore.remove_think_tags(text)
     text = remove_tool_call_blocks(text)
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 67bfa55..dfb4abd 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -1,10 +1,12 @@
 import base64
 import hashlib
+import html
 import mimetypes
 import re
 import reprlib
 import struct
 import tempfile
+import unicodedata
 from pathlib import Path
 from urllib.parse import urlparse
 
@@ -19,22 +21,39 @@
     "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n"
     "[function_calls]\n"
     "[call:tool_name]\n"
-    '{"argument": "value"}\n'
+    "@args\n"
+    "\n<<<ARG:arg_name>>>\n"
+    "value\n"
+    "<<<END:arg_name>>>\n"
     "[/call]\n"
     "[/function_calls]\n"
-    "CRITICAL: Every [call:...] MUST have a raw JSON object followed by a mandatory [/call] closing tag. DO NOT use markdown blocks or add text inside the block.\n"
+    "CRITICAL: Arguments MUST use <<<ARG:name>>>...<<<END:name>>> tags. Content inside tags can be any format.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\[function_calls]\s*(.*?)\s*\[/function_calls]", re.DOTALL | re.IGNORECASE
+    r"\\?\[function_calls\\?]\s*(.*?)\s*\\?\[/function_calls\\?]", re.DOTALL | re.IGNORECASE
+)
+TOOL_CALL_RE = re.compile(
+    r"\\?\[call:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/call\\?]", re.DOTALL | re.IGNORECASE
 )
-TOOL_CALL_RE = re.compile(r"\[call:([^]]+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE)
 RESPONSE_BLOCK_RE = re.compile(
-    r"\[function_responses]\s*(.*?)\s*\[/function_responses]", re.DOTALL | re.IGNORECASE
+    r"\\?\[function_responses\\?]\s*(.*?)\s*\\?\[/function_responses\\?]",
+    re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\[response:([^]]+)]\s*(.*?)\s*\[/response]", re.DOTALL | re.IGNORECASE
+    r"\\?\[response:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/response\\?]", re.DOTALL | re.IGNORECASE
+)
+TAGGED_ARG_RE = re.compile(
+    r"(?:\\?<){3}ARG:([^>\\]+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:\1(?:\\?>){3}",
+    re.DOTALL | re.IGNORECASE,
+)
+TAGGED_RESULT_RE = re.compile(
+    r"(?:\\?<){3}RESULT(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:RESULT(?:\\?>){3}",
+    re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>")
+CONTROL_TOKEN_RE = re.compile(r"\\?<\|im_(?:start|end)\|\\?>")
+COMMONMARK_UNESCAPE_RE = re.compile(
+    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
+)  # See: https://spec.commonmark.org/current/#backslash-escapes
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -50,6 +69,26 @@ def add_tag(role: str, content: str, unclose: bool = False) -> str:
     return f"<|im_start|>{role}\n{content}" + ("\n<|im_end|>" if not unclose else "")
 
 
+def normalize_llm_text(s: str) -> str:
+    """
+    Safely normalize LLM-generated text for both display and hashing.
+    Includes: HTML unescaping, NFC normalization, and line ending standardization.
+    """
+    if not s:
+        return ""
+
+    s = html.unescape(s)
+    s = unicodedata.normalize("NFC", s)
+    s = s.replace("\r\n", "\n").replace("\r", "\n")
+
+    return s
+
+
+def unescape_llm_text(s: str) -> str:
+    r"""Unescape characters escaped by Gemini Web's post-processing."""
+    return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+
+
 def estimate_tokens(text: str | None) -> int:
     """Estimate the number of tokens heuristically based on character count"""
     if not text:
@@ -202,27 +241,23 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        arguments = raw_args
-        try:
-            parsed_args = orjson.loads(raw_args)
-            arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
-        except orjson.JSONDecodeError:
-            json_match = re.search(r"({.*})", raw_args, re.DOTALL)
-            if json_match:
-                potential_json = json_match.group(1)
-                try:
-                    parsed_args = orjson.loads(potential_json)
-                    arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
-                        "utf-8"
-                    )
-                except orjson.JSONDecodeError:
-                    logger.warning(
-                        f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
-                    )
+        name = unescape_llm_text(name.strip())
+        raw_args = unescape_llm_text(raw_args)
+
+        arg_matches = TAGGED_ARG_RE.findall(raw_args)
+        if arg_matches:
+            args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches}
+            arguments = orjson.dumps(args_dict).decode("utf-8")
+            logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}")
+        else:
+            cleaned_raw = raw_args.replace("@args", "").strip()
+            if not cleaned_raw:
+                logger.debug(f"Tool '{name}' called without arguments.")
             else:
                 logger.warning(
-                    f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
+                    f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
                 )
+            arguments = "{}"
 
         index = len(tool_calls)
         seed = f"{name}:{arguments}:{index}".encode("utf-8")
@@ -241,7 +276,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         all_calls.append(
             {
                 "start": match.start(),
-                "name": (match.group(1) or "").strip(),
+                "name": unescape_llm_text((match.group(1) or "").strip()),
                 "args": (match.group(2) or "").strip(),
             }
         )
@@ -256,6 +291,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
     cleaned = TOOL_CALL_RE.sub("", cleaned)
     cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
     cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
+    cleaned = TAGGED_ARG_RE.sub("", cleaned)
+    cleaned = TAGGED_RESULT_RE.sub("", cleaned)
 
     return cleaned, tool_calls
 

From d86798bc360b5ba76f3fb778c3b7e86b736400f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 15:05:48 +0700
Subject: [PATCH 101/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/server/chat.py     | 114 ++++++++++++++++++-------------
 app/services/client.py |  24 ++++---
 app/services/lmdb.py   |   4 +-
 app/utils/helper.py    | 150 ++++++++++++++++-------------------------
 4 files changed, 141 insertions(+), 151 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 080d015..4262d0d 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -342,7 +342,7 @@ def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
 ) -> str:
-    """Generate a system prompt chunk describing available tools."""
+    """Generate a system prompt describing available tools and the PascalCase protocol."""
     if not tools:
         return ""
 
@@ -378,29 +378,27 @@ def _build_tool_prompt(
         )
 
     lines.append(
-        "When you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:"
+        "When you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:"
     )
-    lines.append("[function_calls]")
-    lines.append("[call:tool_name]")
+    lines.append("[ToolCalls]")
+    lines.append("[Call:tool_name]")
     lines.append("@args")
-    lines.append("")
-    lines.append("<<<ARG:arg_name>>>")
+    lines.append("<<<CallParameter:arg_name>>>")
     lines.append("value")
-    lines.append("<<<END:arg_name>>>")
-    lines.append("")
-    lines.append("[/call]")
-    lines.append("[/function_calls]")
+    lines.append("<<<EndCallParameter>>>")
+    lines.append("[/Call]")
+    lines.append("[/ToolCalls]")
     lines.append(
-        "CRITICAL: Arguments MUST use <<<ARG:name>>>...<<<END:name>>> tags. Content inside tags can be any format."
+        "CRITICAL: Every argument MUST be enclosed in <<<CallParameter:arg_name>>>...<<<EndCallParameter>>>. Output as RAW text. Content inside tags can be any format."
     )
     lines.append(
-        "If multiple tools are needed, list them sequentially within the same [function_calls] block."
+        "If multiple tools are needed, list them sequentially within the same [ToolCalls] block."
     )
     lines.append(
-        "If no tool call is needed, provide a normal response and NEVER use the [function_calls] tag."
+        "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag."
     )
     lines.append(
-        "Note: Tool results are returned in a [function_responses] block using @results and <<<RESULT>>> tags."
+        "Note: Tool results are returned in a [ToolResults] block using @results and <<<ToolResult>>> tags."
     )
 
     return "\n".join(lines)
@@ -771,8 +769,8 @@ async def _send_with_split(
 
 class StreamingOutputFilter:
     """
-    State Machine filter to suppress technical markers, tool calls, and system hints.
-    Handles fragmentation where markers are split across multiple chunks.
+    Filter to suppress technical protocol markers, tool calls, and system hints from the stream.
+    Uses a state machine to handle fragmentation where markers are split across multiple chunks.
     """
 
     def __init__(self):
@@ -783,28 +781,32 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[function_calls]", "\\[function_calls\\]"],
-                "ends": ["[/function_calls]", "\\[/function_calls\\]"],
+                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
+                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[call:", "\\[call:"],
-                "ends": ["[/call]", "\\[/call\\]"],
+                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
+                "ends": ["[/Call]", "\\[/Call\\]"],
             },
             "RESP": {
-                "starts": ["[function_responses]", "\\[function_responses\\]"],
-                "ends": ["[/function_responses]", "\\[/function_responses\\]"],
+                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
+                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
             },
             "ARG": {
-                "starts": ["<<<ARG:", "\\<\\<\\<ARG:"],
-                "ends": ["<<<END:", "\\<\\<\\<END:"],
+                "starts": [
+                    "<<<CallParameter:",
+                    "\\<\\<\\<CallParameter:",
+                    "\\<\\<\\<CallParameter\\:",
+                ],
+                "ends": ["<<<EndCallParameter>>>", "\\<\\<\\<EndCallParameter\\>\\>\\>"],
             },
             "RESULT": {
-                "starts": ["<<<RESULT>>>", "\\<\\<\\<RESULT\\>\\>\\>"],
-                "ends": ["<<<END:RESULT>>>", "\\<\\<\\<END:RESULT\\>\\>\\>"],
+                "starts": ["<<<ToolResult>>>", "\\<\\<\\<ToolResult\\>\\>\\>"],
+                "ends": ["<<<EndToolResult>>>", "\\<\\<\\<EndToolResult\\>\\>\\>"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<|im_start|\\>"],
-                "ends": ["<|im_end|>", "\\<|im_end|\\>"],
+                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
+                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
             },
         }
 
@@ -815,10 +817,20 @@ def __init__(self):
                 "ends": [TOOL_HINT_LINE_END],
             }
 
+        self.ORPHAN_ENDS = [
+            "<|im_end|>",
+            "\\<|im\\_end|\\>",
+            "[/Call]",
+            "\\[/Call\\]",
+            "[/ToolCalls]",
+            "\\[/ToolCalls\\]",
+        ]
+
         self.WATCH_MARKERS = []
         for cfg in self.STATE_MARKERS.values():
             self.WATCH_MARKERS.extend(cfg["starts"])
             self.WATCH_MARKERS.extend(cfg.get("ends", []))
+        self.WATCH_MARKERS.extend(self.ORPHAN_ENDS)
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
@@ -834,8 +846,12 @@ def process(self, chunk: str) -> str:
                         if idx != -1:
                             indices.append((idx, m_type, len(p)))
 
+                for p in self.ORPHAN_ENDS:
+                    idx = buf_low.find(p.lower())
+                    if idx != -1:
+                        indices.append((idx, "SKIP", len(p)))
+
                 if not indices:
-                    # Guard against split markers (case-insensitive)
                     keep_len = 0
                     for marker in self.WATCH_MARKERS:
                         m_low = marker.lower()
@@ -854,6 +870,10 @@ def process(self, chunk: str) -> str:
                 output.append(self.buffer[:idx])
                 self.buffer = self.buffer[idx:]
 
+                if m_type == "SKIP":
+                    self.buffer = self.buffer[m_len:]
+                    continue
+
                 self.state = f"IN_{m_type}"
                 if m_type in ("TOOL", "ORPHAN"):
                     self.block_buffer = ""
@@ -886,17 +906,12 @@ def process(self, chunk: str) -> str:
                         found_idx, found_len = idx, len(p)
 
                 if found_idx != -1:
-                    bracket_idx = self.buffer.find(">", found_idx + found_len)
-                    if bracket_idx != -1:
-                        end_pos = bracket_idx + 1
-                        while end_pos < len(self.buffer) and self.buffer[end_pos] == ">":
-                            end_pos += 1
-
-                        self.buffer = self.buffer[end_pos:]
-                        self.state = "NORMAL"
-                    else:
-                        break
+                    self.buffer = self.buffer[found_idx + found_len :]
+                    self.state = "NORMAL"
                 else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.buffer = self.buffer[-max_end_len:]
                     break
 
             elif self.state == "IN_RESULT":
@@ -911,6 +926,9 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.buffer = self.buffer[-max_end_len:]
                     break
 
             elif self.state == "IN_RESP":
@@ -940,6 +958,10 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.block_buffer += self.buffer[:-max_end_len]
+                        self.buffer = self.buffer[-max_end_len:]
                     break
 
             elif self.state == "IN_ORPHAN":
@@ -955,6 +977,10 @@ def process(self, chunk: str) -> str:
                     self.buffer = self.buffer[found_idx + found_len :]
                     self.state = "NORMAL"
                 else:
+                    max_end_len = max(len(p) for p in cfg["ends"])
+                    if len(self.buffer) > max_end_len:
+                        self.block_buffer += self.buffer[:-max_end_len]
+                        self.buffer = self.buffer[-max_end_len:]
                     break
 
             elif self.state == "IN_TAG":
@@ -996,16 +1022,12 @@ def process(self, chunk: str) -> str:
         return "".join(output)
 
     def flush(self) -> str:
+        """Release remaining buffer content and perform final cleanup at stream end."""
         res = ""
-        if self.state == "IN_TOOL":
-            orphan_starts = self.STATE_MARKERS["ORPHAN"]["starts"]
-            is_orphan = any(p.lower() in self.block_buffer.lower() for p in orphan_starts)
-            if not is_orphan:
-                res = f"{self.STATE_MARKERS['TOOL']['starts'][0]}{self.block_buffer}"
+        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
+            res = ""
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
-        elif self.state in ("IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
-            res = ""
         elif self.state == "NORMAL":
             res = self.buffer
 
diff --git a/app/services/client.py b/app/services/client.py
index c955456..ba203d9 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -78,8 +78,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a single Message object into a format suitable for the Gemini API.
-        Extracts text fragments, handles images and files, and appends tool call blocks if present.
+        Process a Message into Gemini API format using the PascalCase technical protocol.
+        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -118,13 +118,13 @@ async def process_message(
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
             res_block = (
-                f"[response:{tool_name}]\n"
-                f"@results\n\n"
-                f"<<<RESULT>>>\n{combined_content}\n<<<END:RESULT>>>\n\n"
-                f"[/response]"
+                f"[Result:{tool_name}]\n"
+                f"@results\n"
+                f"<<<ToolResult>>>\n{combined_content}\n<<<EndToolResult>>>\n"
+                f"[/Result]"
             )
             if wrap_tool:
-                text_fragments = [f"[function_responses]\n{res_block}\n[/function_responses]"]
+                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
             else:
                 text_fragments = [res_block]
 
@@ -132,22 +132,24 @@ async def process_message(
             tool_blocks: list[str] = []
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
-                formatted_args = "\n@args\n"
+                formatted_args = "@args\n"
                 try:
                     parsed_args = orjson.loads(args_text)
                     if isinstance(parsed_args, dict):
                         for k, v in parsed_args.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_args += f"\n<<<ARG:{k}>>>\n{val_str}\n<<<END:{k}>>>\n"
+                            formatted_args += (
+                                f"<<<CallParameter:{k}>>>\n{val_str}\n<<<EndCallParameter>>>\n"
+                            )
                     else:
                         formatted_args += args_text
                 except orjson.JSONDecodeError:
                     formatted_args += args_text
 
-                tool_blocks.append(f"[call:{call.function.name}]{formatted_args}\n[/call]")
+                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]")
 
             if tool_blocks:
-                tool_section = "[function_calls]\n" + "\n".join(tool_blocks) + "\n[/function_calls]"
+                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index a90c684..ad92bbf 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -33,9 +33,7 @@ def _fuzzy_normalize(text: str | None) -> str | None:
 
 
 def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
-    """
-    Perform semantic normalization for hashing.
-    """
+    """Perform safe semantic normalization for hashing using helper utilities."""
     if text is None:
         return None
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index dfb4abd..25f9c9b 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,42 +18,43 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\nWhen you decide to call tools, you MUST respond ONLY with a single [function_calls] block using this EXACT syntax:\n"
-    "[function_calls]\n"
-    "[call:tool_name]\n"
+    "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
+    "[ToolCalls]\n"
+    "[Call:tool_name]\n"
     "@args\n"
-    "\n<<<ARG:arg_name>>>\n"
+    "<<<CallParameter:arg_name>>>\n"
     "value\n"
-    "<<<END:arg_name>>>\n"
-    "[/call]\n"
-    "[/function_calls]\n"
-    "CRITICAL: Arguments MUST use <<<ARG:name>>>...<<<END:name>>> tags. Content inside tags can be any format.\n"
+    "<<<EndCallParameter>>>\n"
+    "[/Call]\n"
+    "[/ToolCalls]\n"
+    "CRITICAL: Every argument MUST be enclosed in <<<CallParameter:arg_name>>>...<<<EndCallParameter>>>. Output as RAW text. Content inside tags can be any format.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\\?\[function_calls\\?]\s*(.*?)\s*\\?\[/function_calls\\?]", re.DOTALL | re.IGNORECASE
+    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
 )
 TOOL_CALL_RE = re.compile(
-    r"\\?\[call:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/call\\?]", re.DOTALL | re.IGNORECASE
+    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[function_responses\\?]\s*(.*?)\s*\\?\[/function_responses\\?]",
+    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[response:([^]\\]+)\\?]\s*(.*?)\s*\\?\[/response\\?]", re.DOTALL | re.IGNORECASE
+    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
+    re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"(?:\\?<){3}ARG:([^>\\]+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:\1(?:\\?>){3}",
+    r"(?:\\?<){3}CallParameter\\?:((?:[^>\\]|\\.)+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndCallParameter(?:\\?>){3}",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"(?:\\?<){3}RESULT(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}END:RESULT(?:\\?>){3}",
+    r"(?:\\?<){3}ToolResult(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndToolResult(?:\\?>){3}",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\|im_(?:start|end)\|\\?>")
-COMMONMARK_UNESCAPE_RE = re.compile(
-    r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])"
-)  # See: https://spec.commonmark.org/current/#backslash-escapes
+CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
+COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -61,7 +62,7 @@
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
-    """Surround content with role tags"""
+    """Surround content with ChatML role tags."""
     if role not in VALID_TAG_ROLES:
         logger.warning(f"Unknown role: {role}, returning content without tags")
         return content
@@ -85,12 +86,12 @@ def normalize_llm_text(s: str) -> str:
 
 
 def unescape_llm_text(s: str) -> str:
-    r"""Unescape characters escaped by Gemini Web's post-processing."""
+    """Unescape characters escaped by Gemini Web's post-processing (e.g., \\_ to _)."""
     return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
 
 
 def estimate_tokens(text: str | None) -> int:
-    """Estimate the number of tokens heuristically based on character count"""
+    """Estimate the number of tokens heuristically based on character count."""
     if not text:
         return 0
     return int(len(text) / 3)
@@ -99,6 +100,7 @@ def estimate_tokens(text: str | None) -> int:
 async def save_file_to_tempfile(
     file_in_base64: str, file_name: str = "", tempdir: Path | None = None
 ) -> Path:
+    """Decode base64 file data and save to a temporary file."""
     data = base64.b64decode(file_in_base64)
     suffix = Path(file_name).suffix if file_name else ".bin"
 
@@ -110,6 +112,7 @@ async def save_file_to_tempfile(
 
 
 async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
+    """Download content from a URL and save to a temporary file."""
     data: bytes | None = None
     suffix: str | None = None
     if url.startswith("data:image/"):
@@ -148,67 +151,48 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
 
 
 def strip_tagged_blocks(text: str) -> str:
-    """Remove <|im_start|>role ... <|im_end|> sections.
-    - tool blocks are removed entirely (including content).
-    - other roles: remove markers and role, keep inner content.
+    """
+    Remove ChatML role blocks (<|im_start|>role...<|im_end|>).
+    Role 'tool' blocks are removed entirely; others have markers stripped but content preserved.
+    Handles both raw and escaped markers consistently.
     """
     if not text:
         return text
 
-    result: list[str] = []
+    result = []
     idx = 0
-    length = len(text)
-    start_marker = "<|im_start|>"
-    end_marker = "<|im_end|>"
-
-    while idx < length:
-        start = text.find(start_marker, idx)
-        if start == -1:
+    while idx < len(text):
+        match_start = CHATML_START_RE.search(text, idx)
+        if not match_start:
             result.append(text[idx:])
             break
 
-        result.append(text[idx:start])
+        result.append(text[idx : match_start.start()])
+        role = match_start.group(1).lower()
+        content_start = match_start.end()
 
-        role_start = start + len(start_marker)
-        newline = text.find("\n", role_start)
-        if newline == -1:
-            result.append(text[start:])
+        match_end = CHATML_END_RE.search(text, content_start)
+        if not match_end:
+            if role != "tool":
+                result.append(text[content_start:])
             break
 
-        role = text[role_start:newline].strip().lower()
-
-        end = text.find(end_marker, newline + 1)
-        if end == -1:
-            if role == "tool":
-                break
-            else:
-                result.append(text[newline + 1 :])
-                break
-
-        block_end = end + len(end_marker)
+        if role != "tool":
+            result.append(text[content_start : match_end.start()])
 
-        if role == "tool":
-            idx = block_end
-            continue
-
-        content = text[newline + 1 : end]
-        result.append(content)
-        idx = block_end
+        idx = match_end.end()
 
     return "".join(result)
 
 
 def strip_system_hints(text: str) -> str:
-    """Remove system-level hint text from a given string."""
+    """Remove system hints, ChatML tags, and technical protocol markers from text."""
     if not text:
         return text
 
-    # Remove the full hints first
     cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
 
-    # Remove fragments or multi-line blocks using derived constants
     if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END:
-        # Match from the start line to the end line, inclusive, handling internal modifications
         pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?"
         cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL)
 
@@ -218,20 +202,26 @@ def strip_system_hints(text: str) -> str:
         cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned)
 
     cleaned = strip_tagged_blocks(cleaned)
+
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
+    cleaned = TOOL_BLOCK_RE.sub("", cleaned)
+    cleaned = TOOL_CALL_RE.sub("", cleaned)
+    cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
+    cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
+    cleaned = TAGGED_ARG_RE.sub("", cleaned)
+    cleaned = TAGGED_RESULT_RE.sub("", cleaned)
+
     return cleaned
 
 
 def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
     """
-    Unified engine for stripping tool call blocks and extracting tool metadata.
-    If extract=True, parses JSON arguments and assigns deterministic call IDs.
+    Extract tool metadata and return text stripped of technical markers.
+    Arguments are parsed into JSON and assigned deterministic call IDs.
     """
     if not text:
         return text, []
 
-    cleaned = strip_system_hints(text)
-
     tool_calls: list[ToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
@@ -271,45 +261,27 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             )
         )
 
-    all_calls = []
-    for match in TOOL_CALL_RE.finditer(cleaned):
-        all_calls.append(
-            {
-                "start": match.start(),
-                "name": unescape_llm_text((match.group(1) or "").strip()),
-                "args": (match.group(2) or "").strip(),
-            }
-        )
-
-    all_calls.sort(key=lambda x: x["start"])
-
-    if extract:
-        for call in all_calls:
-            _create_tool_call(call["name"], call["args"])
+    for match in TOOL_CALL_RE.finditer(text):
+        _create_tool_call(match.group(1), match.group(2))
 
-    cleaned = TOOL_BLOCK_RE.sub("", cleaned)
-    cleaned = TOOL_CALL_RE.sub("", cleaned)
-    cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
-    cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
-    cleaned = TAGGED_ARG_RE.sub("", cleaned)
-    cleaned = TAGGED_RESULT_RE.sub("", cleaned)
+    cleaned = strip_system_hints(text)
 
     return cleaned, tool_calls
 
 
 def remove_tool_call_blocks(text: str) -> str:
-    """Strip tool call code blocks from text."""
+    """Strip tool call blocks from text for display."""
     cleaned, _ = _process_tools_internal(text, extract=False)
     return cleaned
 
 
 def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
-    """Extract tool call definitions and return cleaned text."""
+    """Extract tool calls and return cleaned text."""
     return _process_tools_internal(text, extract=True)
 
 
 def text_from_message(message: Message) -> str:
-    """Return text content from a message for token estimation."""
+    """Concatenate text and tool arguments from a message for token estimation."""
     base_text = ""
     if isinstance(message.content, str):
         base_text = message.content
@@ -329,7 +301,6 @@ def text_from_message(message: Message) -> str:
 
 def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
     """Return image dimensions (width, height) if PNG or JPEG headers are present."""
-    # PNG: dimensions stored in bytes 16..24 of the IHDR chunk
     if len(data) >= 24 and data.startswith(b"\x89PNG\r\n\x1a\n"):
         try:
             width, height = struct.unpack(">II", data[16:24])
@@ -337,7 +308,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
         except struct.error:
             return None, None
 
-    # JPEG: dimensions stored in SOF segment; iterate through markers to locate it
     if len(data) >= 4 and data[0:2] == b"\xff\xd8":
         idx = 2
         length = len(data)
@@ -357,7 +327,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
             0xCF,
         }
         while idx < length:
-            # Find marker alignment (markers are prefixed with 0xFF bytes)
             if data[idx] != 0xFF:
                 idx += 1
                 continue
@@ -380,7 +349,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
 
             if marker in sof_markers:
                 if idx + 4 < length:
-                    # Skip precision byte at idx, then read height/width (big-endian)
                     height = (data[idx + 1] << 8) + data[idx + 2]
                     width = (data[idx + 3] << 8) + data[idx + 4]
                     return int(width), int(height)

From 0d18e9e84525c346bf4cf5fb3b545f7884f2157f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 16:55:17 +0700
Subject: [PATCH 102/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/server/chat.py     | 25 +++++++++++--------------
 app/services/client.py | 14 +++-----------
 app/utils/helper.py    | 15 ++++++---------
 3 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 4262d0d..66c6d11 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -382,14 +382,11 @@ def _build_tool_prompt(
     )
     lines.append("[ToolCalls]")
     lines.append("[Call:tool_name]")
-    lines.append("@args")
-    lines.append("<<<CallParameter:arg_name>>>")
-    lines.append("value")
-    lines.append("<<<EndCallParameter>>>")
+    lines.append("[CallParameter:arg_name]value[/CallParameter]")
     lines.append("[/Call]")
     lines.append("[/ToolCalls]")
     lines.append(
-        "CRITICAL: Every argument MUST be enclosed in <<<CallParameter:arg_name>>>...<<<EndCallParameter>>>. Output as RAW text. Content inside tags can be any format."
+        "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format."
     )
     lines.append(
         "If multiple tools are needed, list them sequentially within the same [ToolCalls] block."
@@ -398,7 +395,7 @@ def _build_tool_prompt(
         "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag."
     )
     lines.append(
-        "Note: Tool results are returned in a [ToolResults] block using @results and <<<ToolResult>>> tags."
+        "Note: Tool results are returned in a [ToolResults] block using [ToolResult] tags."
     )
 
     return "\n".join(lines)
@@ -793,16 +790,12 @@ def __init__(self):
                 "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
             },
             "ARG": {
-                "starts": [
-                    "<<<CallParameter:",
-                    "\\<\\<\\<CallParameter:",
-                    "\\<\\<\\<CallParameter\\:",
-                ],
-                "ends": ["<<<EndCallParameter>>>", "\\<\\<\\<EndCallParameter\\>\\>\\>"],
+                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
+                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
             },
             "RESULT": {
-                "starts": ["<<<ToolResult>>>", "\\<\\<\\<ToolResult\\>\\>\\>"],
-                "ends": ["<<<EndToolResult>>>", "\\<\\<\\<EndToolResult\\>\\>\\>"],
+                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
+                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
             },
             "TAG": {
                 "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
@@ -824,6 +817,10 @@ def __init__(self):
             "\\[/Call\\]",
             "[/ToolCalls]",
             "\\[/ToolCalls\\]",
+            "[/CallParameter]",
+            "\\[/CallParameter\\]",
+            "[/ToolResult]",
+            "\\[/ToolResult\\]",
         ]
 
         self.WATCH_MARKERS = []
diff --git a/app/services/client.py b/app/services/client.py
index ba203d9..9f9ac0f 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -118,10 +118,7 @@ async def process_message(
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
             res_block = (
-                f"[Result:{tool_name}]\n"
-                f"@results\n"
-                f"<<<ToolResult>>>\n{combined_content}\n<<<EndToolResult>>>\n"
-                f"[/Result]"
+                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
             )
             if wrap_tool:
                 text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
@@ -138,9 +135,7 @@ async def process_message(
                     if isinstance(parsed_args, dict):
                         for k, v in parsed_args.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_args += (
-                                f"<<<CallParameter:{k}>>>\n{val_str}\n<<<EndCallParameter>>>\n"
-                            )
+                            formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n"
                     else:
                         formatted_args += args_text
                 except orjson.JSONDecodeError:
@@ -171,7 +166,6 @@ async def process_conversation(
         while i < len(messages):
             msg = messages[i]
             if msg.role == "tool":
-                # Group consecutive tool messages
                 tool_blocks: list[str] = []
                 while i < len(messages) and messages[i].role == "tool":
                     part, part_files = await GeminiClientWrapper.process_message(
@@ -182,9 +176,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = (
-                    f"[function_responses]\n{combined_tool_content}\n[/function_responses]"
-                )
+                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 25f9c9b..4172154 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -21,13 +21,10 @@
     "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
-    "@args\n"
-    "<<<CallParameter:arg_name>>>\n"
-    "value\n"
-    "<<<EndCallParameter>>>\n"
+    "[CallParameter:arg_name]value[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n"
-    "CRITICAL: Every argument MUST be enclosed in <<<CallParameter:arg_name>>>...<<<EndCallParameter>>>. Output as RAW text. Content inside tags can be any format.\n"
+    "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
@@ -44,11 +41,11 @@
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"(?:\\?<){3}CallParameter\\?:((?:[^>\\]|\\.)+)(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndCallParameter(?:\\?>){3}",
+    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"(?:\\?<){3}ToolResult(?:\\?>){3}\s*(.*?)\s*(?:\\?<){3}EndToolResult(?:\\?>){3}",
+    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
@@ -86,7 +83,7 @@ def normalize_llm_text(s: str) -> str:
 
 
 def unescape_llm_text(s: str) -> str:
-    """Unescape characters escaped by Gemini Web's post-processing (e.g., \\_ to _)."""
+    """Unescape characters escaped by Gemini Web's post-processing."""
     return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
 
 
@@ -240,7 +237,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             arguments = orjson.dumps(args_dict).decode("utf-8")
             logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}")
         else:
-            cleaned_raw = raw_args.replace("@args", "").strip()
+            cleaned_raw = raw_args.strip()
             if not cleaned_raw:
                 logger.debug(f"Tool '{name}' called without arguments.")
             else:

From 8fa4329c5a1483784876630e0a891e7dba781fdd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 17:11:30 +0700
Subject: [PATCH 103/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/server/chat.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 66c6d11..c31a079 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -49,6 +49,7 @@
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
+    remove_tool_call_blocks,
     strip_system_hints,
     text_from_message,
 )
@@ -221,7 +222,7 @@ def _process_llm_output(
     structured_requirement: StructuredOutputRequirement | None,
 ) -> tuple[str, str, list[Any]]:
     """
-    Common post-processing logic for Gemini output.
+    Post-process Gemini output to extract tool calls and prepare clean text for display and storage.
     Returns: (visible_text, storage_output, tool_calls)
     """
     visible_with_think, tool_calls = extract_tool_calls(raw_output_with_think)
@@ -230,7 +231,7 @@ def _process_llm_output(
 
     visible_output = visible_with_think.strip()
 
-    storage_output, _ = extract_tool_calls(raw_output_clean)
+    storage_output = remove_tool_call_blocks(raw_output_clean)
     storage_output = storage_output.strip()
 
     if structured_requirement:

From dcd7276ee41a202eb840494c986074da3a53499d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 20:36:20 +0700
Subject: [PATCH 104/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/services/client.py | 33 +--------------------------------
 app/utils/helper.py    | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 9f9ac0f..6ab80cd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,4 +1,3 @@
-import re
 from pathlib import Path
 from typing import Any, cast
 
@@ -15,13 +14,6 @@
     save_url_to_tempfile,
 )
 
-FILE_PATH_PATTERN = re.compile(
-    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
-    re.IGNORECASE,
-)
-GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
-)
 _UNSET = object()
 
 
@@ -199,27 +191,4 @@ def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
         else:
             text += str(response)
 
-        text = normalize_llm_text(text)
-
-        def extract_file_path_from_display_text(text_content: str) -> str | None:
-            match = re.match(FILE_PATH_PATTERN, text_content)
-            if match:
-                return match.group(1)
-            return None
-
-        def replacer(match: re.Match) -> str:
-            display_text = str(match.group(1)).strip()
-            google_search_prefix = match.group(2)
-            query_part = match.group(3)
-
-            file_path = extract_file_path_from_display_text(display_text)
-
-            if file_path:
-                # If it's a file path, transform it into a self-referencing Markdown link
-                return f"[`{file_path}`]({file_path})"
-            else:
-                # Otherwise, reconstruct the original Google search link with the display_text
-                original_google_search_url = f"{google_search_prefix}{query_part}"
-                return f"[`{display_text}`]({original_google_search_url})"
-
-        return re.sub(GOOGLE_SEARCH_LINK_PATTERN, replacer, text)
+        return normalize_llm_text(text)
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 4172154..ec39ebc 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -52,6 +52,16 @@
 CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
 CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
+FILE_PATH_PATTERN = re.compile(
+    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
+    re.IGNORECASE,
+)
+GOOGLE_SEARCH_LINK_PATTERN = re.compile(
+    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
+)
+CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}")
+CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
+CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -82,9 +92,26 @@ def normalize_llm_text(s: str) -> str:
     return s
 
 
+def _strip_google_search_links(match: re.Match) -> str:
+    """Extract local Markdown link from Google Search links if applicable."""
+    display_text = str(match.group(1)).strip()
+    if FILE_PATH_PATTERN.match(display_text):
+        return f"[`{display_text}`]({display_text})"
+    return match.group(0)
+
+
 def unescape_llm_text(s: str) -> str:
-    """Unescape characters escaped by Gemini Web's post-processing."""
-    return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+    """Unescape and mend text fragments broken by Gemini Web's post-processing."""
+    if not s:
+        return ""
+
+    s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+
+    s = CONFLICT_START_RE.sub("<<<<<<<", s)
+    s = CONFLICT_SEP_RE.sub("=======", s)
+    s = CONFLICT_END_RE.sub(">>>>>>>", s)
+
+    return GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
 
 
 def estimate_tokens(text: str | None) -> int:
@@ -235,11 +262,11 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         if arg_matches:
             args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches}
             arguments = orjson.dumps(args_dict).decode("utf-8")
-            logger.debug(f"Successfully parsed {len(args_dict)} tagged arguments for tool: {name}")
+            logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}")
         else:
             cleaned_raw = raw_args.strip()
             if not cleaned_raw:
-                logger.debug(f"Tool '{name}' called without arguments.")
+                logger.debug(f"Successfully parsed 0 arguments for tool: {name}")
             else:
                 logger.warning(
                     f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"

From 737aa3aa4ac51a377ab3687e73623c7943c14872 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Feb 2026 23:24:44 +0700
Subject: [PATCH 105/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/utils/helper.py | 66 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index ec39ebc..0a83f8d 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -100,18 +100,78 @@ def _strip_google_search_links(match: re.Match) -> str:
     return match.group(0)
 
 
+def _remove_injected_fences(s: str) -> str:
+    """
+    Strip anonymous Markdown code fences often injected by LLMs around
+    responses or tool calls, while preserving named blocks and all internal content.
+    """
+    if not s:
+        return ""
+
+    lines = s.splitlines()
+    out = []
+    in_fence = False
+    fence_len = 0
+    is_anonymous = False
+
+    for line in lines:
+        stripped = line.strip()
+        if stripped.startswith("```"):
+            count = 0
+            for char in stripped:
+                if char == "`":
+                    count += 1
+                else:
+                    break
+
+            lang = stripped[count:].strip()
+
+            if not in_fence:
+                in_fence = True
+                fence_len = count
+                is_anonymous = not lang
+                if not is_anonymous:
+                    out.append(line)
+                continue
+
+            if count >= fence_len:
+                if is_anonymous and lang:
+                    out.append(line)
+                    continue
+
+                if not is_anonymous:
+                    out.append(line)
+                in_fence = False
+                is_anonymous = False
+                fence_len = 0
+                continue
+
+        out.append(line)
+
+    return "\n".join(out)
+
+
 def unescape_llm_text(s: str) -> str:
-    """Unescape and mend text fragments broken by Gemini Web's post-processing."""
+    """
+    Standardize and repair LLM-generated text fragments.
+
+    Sequence:
+    1. Reverse CommonMark escapes.
+    2. Restore git conflict markers broken by web processing.
+    3. Strip injected anonymous code fences.
+    4. Process and normalize Google Search links.
+    """
     if not s:
         return ""
 
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
-
     s = CONFLICT_START_RE.sub("<<<<<<<", s)
     s = CONFLICT_SEP_RE.sub("=======", s)
     s = CONFLICT_END_RE.sub(">>>>>>>", s)
+    s = _remove_injected_fences(s)
+    s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
 
-    return GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
+    return s
 
 
 def estimate_tokens(text: str | None) -> int:

From 2c808955f8f761e1ab062c4d2cd5aab705ac050c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 08:55:58 +0700
Subject: [PATCH 106/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/utils/helper.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 0a83f8d..2367b72 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -61,6 +61,7 @@
 )
 CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}")
 CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
+CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}")
 CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -167,6 +168,7 @@ def unescape_llm_text(s: str) -> str:
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = CONFLICT_START_RE.sub("<<<<<<<", s)
     s = CONFLICT_SEP_RE.sub("=======", s)
+    s = CONFLICT_SEP_DASH_RE.sub("-------", s)
     s = CONFLICT_END_RE.sub(">>>>>>>", s)
     s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)

From 9b0e1d5365ba323a406f77c7452283ef7a9879a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 09:23:08 +0700
Subject: [PATCH 107/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/utils/helper.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2367b72..e612252 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -59,10 +59,10 @@
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
-CONFLICT_START_RE = re.compile(r"<(?:\s*<){6,}")
-CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
-CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}")
-CONFLICT_END_RE = re.compile(r">(?:\s*>){6,}")
+CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}")
+CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=\s*(?:=\s*){6,}")
+CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—]\s*(?:[-—]\s*){6,}")
+CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -155,21 +155,24 @@ def _remove_injected_fences(s: str) -> str:
 def unescape_llm_text(s: str) -> str:
     """
     Standardize and repair LLM-generated text fragments.
+    These patches are specifically designed for complex clients like Roo Code to ensure
+    compatibility with their specialized tool protocols (e.g., apply_diff) which may be
+    mangled by Gemini Web's interface or browser auto-formatting.
 
     Sequence:
-    1. Reverse CommonMark escapes.
-    2. Restore git conflict markers broken by web processing.
+    1. Restore git conflict markers and DOUBLE any leading backslash to protect it.
+    2. Reverse CommonMark escapes (consuming one level of doubled backslashes).
     3. Strip injected anonymous code fences.
     4. Process and normalize Google Search links.
     """
     if not s:
         return ""
 
+    s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s)
+    s = CONFLICT_SEP_RE.sub(r"\1\1=======", s)
+    s = CONFLICT_SEP_DASH_RE.sub(r"\1\1-------", s)
+    s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s)
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
-    s = CONFLICT_START_RE.sub("<<<<<<<", s)
-    s = CONFLICT_SEP_RE.sub("=======", s)
-    s = CONFLICT_SEP_DASH_RE.sub("-------", s)
-    s = CONFLICT_END_RE.sub(">>>>>>>", s)
     s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
 

From 95f87f6d905cdbb1352698e03f29e0821a224d31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 09:38:08 +0700
Subject: [PATCH 108/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure, such as in Roo Code.

---
 app/utils/helper.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index e612252..8262b85 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -60,8 +60,8 @@
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
 CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}")
-CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=\s*(?:=\s*){6,}")
-CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—]\s*(?:[-—]\s*){6,}")
+CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
+CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}")
 CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
@@ -169,8 +169,8 @@ def unescape_llm_text(s: str) -> str:
         return ""
 
     s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s)
-    s = CONFLICT_SEP_RE.sub(r"\1\1=======", s)
-    s = CONFLICT_SEP_DASH_RE.sub(r"\1\1-------", s)
+    s = CONFLICT_SEP_RE.sub("=======", s)
+    s = CONFLICT_SEP_DASH_RE.sub("-------", s)
     s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s)
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = _remove_injected_fences(s)

From 4569689ad1280aa2808927345c1a2b29e170f997 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 10:47:56 +0700
Subject: [PATCH 109/236] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 8262b85..89429c9 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -59,10 +59,10 @@
 GOOGLE_SEARCH_LINK_PATTERN = re.compile(
     r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
 )
-CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}")
-CONFLICT_SEP_RE = re.compile(r"=(?:\s*=){6,}")
-CONFLICT_SEP_DASH_RE = re.compile(r"[-—](?:\s*[-—]){6,}")
-CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}")
+CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE)
+CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}")
+CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—](?:\s*[-—]){6,}")
+CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}(?:\s*(REPLACE)\b)?", re.IGNORECASE)
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -168,10 +168,14 @@ def unescape_llm_text(s: str) -> str:
     if not s:
         return ""
 
-    s = CONFLICT_START_RE.sub(r"\1\1<<<<<<<", s)
-    s = CONFLICT_SEP_RE.sub("=======", s)
-    s = CONFLICT_SEP_DASH_RE.sub("-------", s)
-    s = CONFLICT_END_RE.sub(r"\1\1>>>>>>>", s)
+    s = CONFLICT_START_RE.sub(
+        lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s
+    )
+    s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s)
+    s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s)
+    s = CONFLICT_END_RE.sub(
+        lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s
+    )
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)

From 5a65cb6a9ebb6a51a21016f5d977bbef76f23f55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 10:56:50 +0700
Subject: [PATCH 110/236] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 89429c9..ff339e1 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -154,28 +154,29 @@ def _remove_injected_fences(s: str) -> str:
 
 def unescape_llm_text(s: str) -> str:
     """
-    Standardize and repair LLM-generated text fragments.
-    These patches are specifically designed for complex clients like Roo Code to ensure
-    compatibility with their specialized tool protocols (e.g., apply_diff) which may be
-    mangled by Gemini Web's interface or browser auto-formatting.
+    Standardize and repair LLM-generated text fragments for specialized client protocols.
+    Designed to ensure compatibility with clients like Roo Code by fixing
+    mangled conflict markers, escaping issues, and injected Markdown formatting.
 
     Sequence:
-    1. Restore git conflict markers and DOUBLE any leading backslash to protect it.
-    2. Reverse CommonMark escapes (consuming one level of doubled backslashes).
-    3. Strip injected anonymous code fences.
+    1. Normalize git conflict markers (handles mangled spacing and keyword standardization).
+    2. Reverse CommonMark escapes (removes leading backslashes from structural markers).
+    3. Strip injected anonymous Markdown code fences.
     4. Process and normalize Google Search links.
     """
     if not s:
         return ""
 
-    s = CONFLICT_START_RE.sub(
-        lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s
-    )
-    s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s)
-    s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s)
-    s = CONFLICT_END_RE.sub(
-        lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s
-    )
+    if any(c in s for c in ("<", "=", ">", "-", "—")):
+        s = CONFLICT_START_RE.sub(
+            lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s
+        )
+        s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s)
+        s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s)
+        s = CONFLICT_END_RE.sub(
+            lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s
+        )
+
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)

From 8a03c3347bce1e41bb0b694b42095aec62a9e6cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 12:18:06 +0700
Subject: [PATCH 111/236] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index ff339e1..fba276a 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -8,7 +8,7 @@
 import tempfile
 import unicodedata
 from pathlib import Path
-from urllib.parse import urlparse
+from urllib.parse import unquote, urlparse
 
 import httpx
 import orjson
@@ -56,8 +56,13 @@
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
 )
-GOOGLE_SEARCH_LINK_PATTERN = re.compile(
-    r"`?\[`?(.+?)`?`?]\((https://www\.google\.com/search\?q=)([^)]*)\)`?"
+GOOGLE_SEARCH_PATTERN = re.compile(
+    r"(?P<md_start>`?\[`?)?"
+    r"(?P<text>[^]]+)?"
+    r"(?(md_start)`?]\()?"
+    r"https://www\.google\.com/search\?q=(?P<query>[^&\s\"'<>)]+)"
+    r"(?(md_start)\)?`?)",
+    re.IGNORECASE,
 )
 CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE)
 CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}")
@@ -93,11 +98,13 @@ def normalize_llm_text(s: str) -> str:
     return s
 
 
-def _strip_google_search_links(match: re.Match) -> str:
-    """Extract local Markdown link from Google Search links if applicable."""
-    display_text = str(match.group(1)).strip()
-    if FILE_PATH_PATTERN.match(display_text):
-        return f"[`{display_text}`]({display_text})"
+def _strip_google_search(match: re.Match) -> str:
+    """Extract raw text from Google Search links if it looks like a file path."""
+    text_to_check = match.group("text") if match.group("text") else unquote(match.group("query"))
+    text_to_check = unquote(text_to_check.strip())
+
+    if FILE_PATH_PATTERN.match(text_to_check):
+        return text_to_check
     return match.group(0)
 
 
@@ -157,12 +164,6 @@ def unescape_llm_text(s: str) -> str:
     Standardize and repair LLM-generated text fragments for specialized client protocols.
     Designed to ensure compatibility with clients like Roo Code by fixing
     mangled conflict markers, escaping issues, and injected Markdown formatting.
-
-    Sequence:
-    1. Normalize git conflict markers (handles mangled spacing and keyword standardization).
-    2. Reverse CommonMark escapes (removes leading backslashes from structural markers).
-    3. Strip injected anonymous Markdown code fences.
-    4. Process and normalize Google Search links.
     """
     if not s:
         return ""
@@ -179,7 +180,7 @@ def unescape_llm_text(s: str) -> str:
 
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = _remove_injected_fences(s)
-    s = GOOGLE_SEARCH_LINK_PATTERN.sub(_strip_google_search_links, s)
+    s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
 
     return s
 
@@ -413,7 +414,6 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
             0xC5,
             0xC6,
             0xC7,
-            0xC9,
             0xCA,
             0xCB,
             0xCD,

From 7ed21323a8b51798c3b02c1ce625e94a31b603cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 12:22:21 +0700
Subject: [PATCH 112/236] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index fba276a..8faa8b9 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -414,6 +414,7 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
             0xC5,
             0xC6,
             0xC7,
+            0xC9,
             0xCA,
             0xCB,
             0xCD,

From d92bc1c43a051c818fd0810be94710580994b40f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 15:28:52 +0700
Subject: [PATCH 113/236] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/server/chat.py  |  12 +---
 app/utils/helper.py | 139 +++++++++-----------------------------------
 2 files changed, 30 insertions(+), 121 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index c31a079..3f8e0cd 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -378,17 +378,7 @@ def _build_tool_prompt(
             f"You are required to call the tool named `{target}`. Do not call any other tool."
         )
 
-    lines.append(
-        "When you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:"
-    )
-    lines.append("[ToolCalls]")
-    lines.append("[Call:tool_name]")
-    lines.append("[CallParameter:arg_name]value[/CallParameter]")
-    lines.append("[/Call]")
-    lines.append("[/ToolCalls]")
-    lines.append(
-        "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format."
-    )
+    lines.append(TOOL_WRAP_HINT.strip())
     lines.append(
         "If multiple tools are needed, list them sequentially within the same [ToolCalls] block."
     )
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 8faa8b9..28260c3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,13 +18,13 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\nWhen you decide to call tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
+    "\nWhen calling tools, use this EXACT protocol:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:arg_name]value[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n"
-    "CRITICAL: Every argument MUST be enclosed in [CallParameter:arg_name]...[/CallParameter]. Output as RAW text. Content inside tags can be any format.\n"
+    "CRITICAL: Wrap ALL multi-line or complex values in a markdown code block (e.g., [CallParameter:arg_name]```\nvalue\n```[/CallParameter]) to prevent rendering corruption.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
@@ -64,10 +64,6 @@
     r"(?(md_start)\)?`?)",
     re.IGNORECASE,
 )
-CONFLICT_START_RE = re.compile(r"(\\?)\s*<\s*(?:<\s*){6,}(?:\s*(SEARCH)\b)?", re.IGNORECASE)
-CONFLICT_SEP_RE = re.compile(r"(\\?)\s*=(?:\s*=){6,}")
-CONFLICT_SEP_DASH_RE = re.compile(r"(\\?)\s*[-—](?:\s*[-—]){6,}")
-CONFLICT_END_RE = re.compile(r"(\\?)\s*>\s*(?:>\s*){6,}(?:\s*(REPLACE)\b)?", re.IGNORECASE)
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -108,78 +104,36 @@ def _strip_google_search(match: re.Match) -> str:
     return match.group(0)
 
 
-def _remove_injected_fences(s: str) -> str:
+def _strip_param_fences(s: str) -> str:
     """
-    Strip anonymous Markdown code fences often injected by LLMs around
-    responses or tool calls, while preserving named blocks and all internal content.
+    Remove one layer of outermost Markdown code fences,
+    supporting nested blocks by detecting variable fence lengths.
     """
+    s = s.strip()
     if not s:
         return ""
 
-    lines = s.splitlines()
-    out = []
-    in_fence = False
-    fence_len = 0
-    is_anonymous = False
-
-    for line in lines:
-        stripped = line.strip()
-        if stripped.startswith("```"):
-            count = 0
-            for char in stripped:
-                if char == "`":
-                    count += 1
-                else:
-                    break
-
-            lang = stripped[count:].strip()
-
-            if not in_fence:
-                in_fence = True
-                fence_len = count
-                is_anonymous = not lang
-                if not is_anonymous:
-                    out.append(line)
-                continue
+    match = re.match(r"^(?P<fence>`{3,})", s)
+    if not match or not s.endswith(match.group("fence")):
+        return s
 
-            if count >= fence_len:
-                if is_anonymous and lang:
-                    out.append(line)
-                    continue
-
-                if not is_anonymous:
-                    out.append(line)
-                in_fence = False
-                is_anonymous = False
-                fence_len = 0
-                continue
-
-        out.append(line)
+    lines = s.splitlines()
+    if len(lines) >= 2:
+        return "\n".join(lines[1:-1])
 
-    return "\n".join(out)
+    n = len(match.group("fence"))
+    return s[n:-n].strip()
 
 
 def unescape_llm_text(s: str) -> str:
     """
-    Standardize and repair LLM-generated text fragments for specialized client protocols.
-    Designed to ensure compatibility with clients like Roo Code by fixing
-    mangled conflict markers, escaping issues, and injected Markdown formatting.
+    Standardize and repair LLM-generated text fragments (unescaping, link normalization)
+    to ensure compatibility with specialized clients like Roo Code.
     """
     if not s:
         return ""
 
-    if any(c in s for c in ("<", "=", ">", "-", "—")):
-        s = CONFLICT_START_RE.sub(
-            lambda m: (m.group(1) or "") + "<<<<<<<" + (" SEARCH" if m.group(2) else ""), s
-        )
-        s = CONFLICT_SEP_RE.sub(lambda m: (m.group(1) or "") + "=======", s)
-        s = CONFLICT_SEP_DASH_RE.sub(lambda m: (m.group(1) or "") + "-------", s)
-        s = CONFLICT_END_RE.sub(
-            lambda m: (m.group(1) or "") + ">>>>>>>" + (" REPLACE" if m.group(2) else ""), s
-        )
-
     s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
-    s = _remove_injected_fences(s)
     s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
 
     return s
@@ -196,13 +150,11 @@ async def save_file_to_tempfile(
     file_in_base64: str, file_name: str = "", tempdir: Path | None = None
 ) -> Path:
     """Decode base64 file data and save to a temporary file."""
-    data = base64.b64decode(file_in_base64)
-    suffix = Path(file_name).suffix if file_name else ".bin"
-
-    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp:
-        tmp.write(data)
+    with tempfile.NamedTemporaryFile(
+        delete=False, suffix=Path(file_name).suffix if file_name else ".bin", dir=tempdir
+    ) as tmp:
+        tmp.write(base64.b64decode(file_in_base64))
         path = Path(tmp.name)
-
     return path
 
 
@@ -213,35 +165,22 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     if url.startswith("data:image/"):
         metadata_part = url.split(",")[0]
         mime_type = metadata_part.split(":")[1].split(";")[0]
-
-        base64_data = url.split(",")[1]
-        data = base64.b64decode(base64_data)
-
-        suffix = mimetypes.guess_extension(mime_type)
-        if not suffix:
-            suffix = f".{mime_type.split('/')[1]}"
+        data = base64.b64decode(url.split(",")[1])
+        suffix = mimetypes.guess_extension(mime_type) or f".{mime_type.split('/')[1]}"
     else:
         async with httpx.AsyncClient(follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
             content_type = resp.headers.get("content-type")
-
             if content_type:
-                mime_type = content_type.split(";")[0].strip()
-                suffix = mimetypes.guess_extension(mime_type)
-
+                suffix = mimetypes.guess_extension(content_type.split(";")[0].strip())
             if not suffix:
-                path_url = urlparse(url).path
-                suffix = Path(path_url).suffix
-
-            if not suffix:
-                suffix = ".bin"
+                suffix = Path(urlparse(url).path).suffix or ".bin"
 
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix, dir=tempdir) as tmp:
         tmp.write(data)
         path = Path(tmp.name)
-
     return path
 
 
@@ -249,7 +188,6 @@ def strip_tagged_blocks(text: str) -> str:
     """
     Remove ChatML role blocks (<|im_start|>role...<|im_end|>).
     Role 'tool' blocks are removed entirely; others have markers stripped but content preserved.
-    Handles both raw and escaped markers consistently.
     """
     if not text:
         return text
@@ -274,7 +212,6 @@ def strip_tagged_blocks(text: str) -> str:
 
         if role != "tool":
             result.append(text[content_start : match_end.start()])
-
         idx = match_end.end()
 
     return "".join(result)
@@ -297,7 +234,6 @@ def strip_system_hints(text: str) -> str:
         cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned)
 
     cleaned = strip_tagged_blocks(cleaned)
-
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)
     cleaned = TOOL_BLOCK_RE.sub("", cleaned)
     cleaned = TOOL_CALL_RE.sub("", cleaned)
@@ -331,7 +267,10 @@ def _create_tool_call(name: str, raw_args: str) -> None:
 
         arg_matches = TAGGED_ARG_RE.findall(raw_args)
         if arg_matches:
-            args_dict = {arg_name.strip(): arg_value.strip() for arg_name, arg_value in arg_matches}
+            args_dict = {
+                arg_name.strip(): _strip_param_fences(arg_value)
+                for arg_name, arg_value in arg_matches
+            }
             arguments = orjson.dumps(args_dict).decode("utf-8")
             logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}")
         else:
@@ -360,7 +299,6 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         _create_tool_call(match.group(1), match.group(2))
 
     cleaned = strip_system_hints(text)
-
     return cleaned, tool_calls
 
 
@@ -406,21 +344,7 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
     if len(data) >= 4 and data[0:2] == b"\xff\xd8":
         idx = 2
         length = len(data)
-        sof_markers = {
-            0xC0,
-            0xC1,
-            0xC2,
-            0xC3,
-            0xC5,
-            0xC6,
-            0xC7,
-            0xC9,
-            0xCA,
-            0xCB,
-            0xCD,
-            0xCE,
-            0xCF,
-        }
+        sof_markers = {0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF}
         while idx < length:
             if data[idx] != 0xFF:
                 idx += 1
@@ -431,26 +355,21 @@ def extract_image_dimensions(data: bytes) -> tuple[int | None, int | None]:
                 break
             marker = data[idx]
             idx += 1
-
             if marker in (0xD8, 0xD9, 0x01) or 0xD0 <= marker <= 0xD7:
                 continue
-
             if idx + 1 >= length:
                 break
             segment_length = (data[idx] << 8) + data[idx + 1]
             idx += 2
             if segment_length < 2:
                 break
-
             if marker in sof_markers:
                 if idx + 4 < length:
                     height = (data[idx + 1] << 8) + data[idx + 2]
                     width = (data[idx + 3] << 8) + data[idx + 4]
                     return int(width), int(height)
                 break
-
             idx += segment_length - 2
-
     return None, None
 
 

From 4ecad566edbe7cb1c37ad50ecc6da034c88e209a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 16:18:28 +0700
Subject: [PATCH 114/236] Refactor: Update `unescape_llm_text` to standardize
 and repair LLM-generated text fragments.

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 28260c3..99af0a5 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,13 +18,15 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\nWhen calling tools, use this EXACT protocol:\n"
+    "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
-    "[CallParameter:arg_name]value[/CallParameter]\n"
+    "[CallParameter:arg_name]\n"
+    "value\n"
+    "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n"
-    "CRITICAL: Wrap ALL multi-line or complex values in a markdown code block (e.g., [CallParameter:arg_name]```\nvalue\n```[/CallParameter]) to prevent rendering corruption.\n"
+    "CRITICAL: If 'value' is multi-line or complex, you MUST wrap it in a markdown code block within the tags (use a fence longer than any backtick sequence in the content) to prevent rendering corruption.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From 28378b4125e8900284814040d86f3139f2ddd644 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 16:19:54 +0700
Subject: [PATCH 115/236] Update dependencies

---
 pyproject.toml |  4 ++--
 uv.lock        | 48 ++++++++++++++++++++++++------------------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d3a1aaf..93dabab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
-    "fastapi>=0.128.7",
+    "fastapi>=0.129.0",
     "gemini-webapi>=1.19.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
@@ -31,5 +31,5 @@ indent-style = "space"
 
 [dependency-groups]
 dev = [
-    "ruff>=0.15.0",
+    "ruff>=0.15.1",
 ]
diff --git a/uv.lock b/uv.lock
index c038f53..249e84b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -65,7 +65,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.128.7"
+version = "0.129.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -74,9 +74,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a0/fc/af386750b3fd8d8828167e4c82b787a8eeca2eca5c5429c9db8bb7c70e04/fastapi-0.128.7.tar.gz", hash = "sha256:783c273416995486c155ad2c0e2b45905dedfaf20b9ef8d9f6a9124670639a24", size = 375325, upload-time = "2026-02-10T12:26:40.968Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/47/75f6bea02e797abff1bca968d5997793898032d9923c1935ae2efdece642/fastapi-0.129.0.tar.gz", hash = "sha256:61315cebd2e65df5f97ec298c888f9de30430dd0612d59d6480beafbc10655af", size = 375450, upload-time = "2026-02-12T13:54:52.541Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/af/1a/f983b45661c79c31be575c570d46c437a5409b67a939c1b3d8d6b3ed7a7f/fastapi-0.128.7-py3-none-any.whl", hash = "sha256:6bd9bd31cb7047465f2d3fa3ba3f33b0870b17d4eaf7cdb36d1576ab060ad662", size = 103630, upload-time = "2026-02-10T12:26:39.414Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/dd/d0ee25348ac58245ee9f90b6f3cbb666bf01f69be7e0911f9851bddbda16/fastapi-0.129.0-py3-none-any.whl", hash = "sha256:b4946880e48f462692b31c083be0432275cbfb6e2274566b1be91479cc1a84ec", size = 102950, upload-time = "2026-02-12T13:54:54.528Z" },
 ]
 
 [[package]]
@@ -106,7 +106,7 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.128.7" },
+    { name = "fastapi", specifier = ">=0.129.0" },
     { name = "gemini-webapi", specifier = ">=1.19.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
@@ -119,7 +119,7 @@ requires-dist = [
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.15.0" }]
+dev = [{ name = "ruff", specifier = ">=0.15.1" }]
 
 [[package]]
 name = "gemini-webapi"
@@ -361,27 +361,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.0"
+version = "0.15.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c8/39/5cee96809fbca590abea6b46c6d1c586b49663d1d2830a751cc8fc42c666/ruff-0.15.0.tar.gz", hash = "sha256:6bdea47cdbea30d40f8f8d7d69c0854ba7c15420ec75a26f463290949d7f7e9a", size = 4524893, upload-time = "2026-02-03T17:53:35.357Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/dc/4e6ac71b511b141cf626357a3946679abeba4cf67bc7cc5a17920f31e10d/ruff-0.15.1.tar.gz", hash = "sha256:c590fe13fb57c97141ae975c03a1aedb3d3156030cabd740d6ff0b0d601e203f", size = 4540855, upload-time = "2026-02-12T23:09:09.998Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/88/3fd1b0aa4b6330d6aaa63a285bc96c9f71970351579152d231ed90914586/ruff-0.15.0-py3-none-linux_armv6l.whl", hash = "sha256:aac4ebaa612a82b23d45964586f24ae9bc23ca101919f5590bdb368d74ad5455", size = 10354332, upload-time = "2026-02-03T17:52:54.892Z" },
-    { url = "https://files.pythonhosted.org/packages/72/f6/62e173fbb7eb75cc29fe2576a1e20f0a46f671a2587b5f604bfb0eaf5f6f/ruff-0.15.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:dcd4be7cc75cfbbca24a98d04d0b9b36a270d0833241f776b788d59f4142b14d", size = 10767189, upload-time = "2026-02-03T17:53:19.778Z" },
-    { url = "https://files.pythonhosted.org/packages/99/e4/968ae17b676d1d2ff101d56dc69cf333e3a4c985e1ec23803df84fc7bf9e/ruff-0.15.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d747e3319b2bce179c7c1eaad3d884dc0a199b5f4d5187620530adf9105268ce", size = 10075384, upload-time = "2026-02-03T17:53:29.241Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/bf/9843c6044ab9e20af879c751487e61333ca79a2c8c3058b15722386b8cae/ruff-0.15.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:650bd9c56ae03102c51a5e4b554d74d825ff3abe4db22b90fd32d816c2e90621", size = 10481363, upload-time = "2026-02-03T17:52:43.332Z" },
-    { url = "https://files.pythonhosted.org/packages/55/d9/4ada5ccf4cd1f532db1c8d44b6f664f2208d3d93acbeec18f82315e15193/ruff-0.15.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6664b7eac559e3048223a2da77769c2f92b43a6dfd4720cef42654299a599c9", size = 10187736, upload-time = "2026-02-03T17:53:00.522Z" },
-    { url = "https://files.pythonhosted.org/packages/86/e2/f25eaecd446af7bb132af0a1d5b135a62971a41f5366ff41d06d25e77a91/ruff-0.15.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f811f97b0f092b35320d1556f3353bf238763420ade5d9e62ebd2b73f2ff179", size = 10968415, upload-time = "2026-02-03T17:53:15.705Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/dc/f06a8558d06333bf79b497d29a50c3a673d9251214e0d7ec78f90b30aa79/ruff-0.15.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:761ec0a66680fab6454236635a39abaf14198818c8cdf691e036f4bc0f406b2d", size = 11809643, upload-time = "2026-02-03T17:53:23.031Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/45/0ece8db2c474ad7df13af3a6d50f76e22a09d078af63078f005057ca59eb/ruff-0.15.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:940f11c2604d317e797b289f4f9f3fa5555ffe4fb574b55ed006c3d9b6f0eb78", size = 11234787, upload-time = "2026-02-03T17:52:46.432Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/d9/0e3a81467a120fd265658d127db648e4d3acfe3e4f6f5d4ea79fac47e587/ruff-0.15.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbca3d40558789126da91d7ef9a7c87772ee107033db7191edefa34e2c7f1b4", size = 11112797, upload-time = "2026-02-03T17:52:49.274Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/cb/8c0b3b0c692683f8ff31351dfb6241047fa873a4481a76df4335a8bff716/ruff-0.15.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9a121a96db1d75fa3eb39c4539e607f628920dd72ff1f7c5ee4f1b768ac62d6e", size = 11033133, upload-time = "2026-02-03T17:53:33.105Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/5e/23b87370cf0f9081a8c89a753e69a4e8778805b8802ccfe175cc410e50b9/ruff-0.15.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5298d518e493061f2eabd4abd067c7e4fb89e2f63291c94332e35631c07c3662", size = 10442646, upload-time = "2026-02-03T17:53:06.278Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/9a/3c94de5ce642830167e6d00b5c75aacd73e6347b4c7fc6828699b150a5ee/ruff-0.15.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:afb6e603d6375ff0d6b0cee563fa21ab570fd15e65c852cb24922cef25050cf1", size = 10195750, upload-time = "2026-02-03T17:53:26.084Z" },
-    { url = "https://files.pythonhosted.org/packages/30/15/e396325080d600b436acc970848d69df9c13977942fb62bb8722d729bee8/ruff-0.15.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:77e515f6b15f828b94dc17d2b4ace334c9ddb7d9468c54b2f9ed2b9c1593ef16", size = 10676120, upload-time = "2026-02-03T17:53:09.363Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/c9/229a23d52a2983de1ad0fb0ee37d36e0257e6f28bfd6b498ee2c76361874/ruff-0.15.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6f6e80850a01eb13b3e42ee0ebdf6e4497151b48c35051aab51c101266d187a3", size = 11201636, upload-time = "2026-02-03T17:52:57.281Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/b0/69adf22f4e24f3677208adb715c578266842e6e6a3cc77483f48dd999ede/ruff-0.15.0-py3-none-win32.whl", hash = "sha256:238a717ef803e501b6d51e0bdd0d2c6e8513fe9eec14002445134d3907cd46c3", size = 10465945, upload-time = "2026-02-03T17:53:12.591Z" },
-    { url = "https://files.pythonhosted.org/packages/51/ad/f813b6e2c97e9b4598be25e94a9147b9af7e60523b0cb5d94d307c15229d/ruff-0.15.0-py3-none-win_amd64.whl", hash = "sha256:dd5e4d3301dc01de614da3cdffc33d4b1b96fb89e45721f1598e5532ccf78b18", size = 11564657, upload-time = "2026-02-03T17:52:51.893Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/b0/2d823f6e77ebe560f4e397d078487e8d52c1516b331e3521bc75db4272ca/ruff-0.15.0-py3-none-win_arm64.whl", hash = "sha256:c480d632cc0ca3f0727acac8b7d053542d9e114a462a145d0b00e7cd658c515a", size = 10865753, upload-time = "2026-02-03T17:53:03.014Z" },
+    { url = "https://files.pythonhosted.org/packages/23/bf/e6e4324238c17f9d9120a9d60aa99a7daaa21204c07fcd84e2ef03bb5fd1/ruff-0.15.1-py3-none-linux_armv6l.whl", hash = "sha256:b101ed7cf4615bda6ffe65bdb59f964e9f4a0d3f85cbf0e54f0ab76d7b90228a", size = 10367819, upload-time = "2026-02-12T23:09:03.598Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/ea/c8f89d32e7912269d38c58f3649e453ac32c528f93bb7f4219258be2e7ed/ruff-0.15.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:939c995e9277e63ea632cc8d3fae17aa758526f49a9a850d2e7e758bfef46602", size = 10798618, upload-time = "2026-02-12T23:09:22.928Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/0f/1d0d88bc862624247d82c20c10d4c0f6bb2f346559d8af281674cf327f15/ruff-0.15.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1d83466455fdefe60b8d9c8df81d3c1bbb2115cede53549d3b522ce2bc703899", size = 10148518, upload-time = "2026-02-12T23:08:58.339Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/c8/291c49cefaa4a9248e986256df2ade7add79388fe179e0691be06fae6f37/ruff-0.15.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9457e3c3291024866222b96108ab2d8265b477e5b1534c7ddb1810904858d16", size = 10518811, upload-time = "2026-02-12T23:09:31.865Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/1a/f5707440e5ae43ffa5365cac8bbb91e9665f4a883f560893829cf16a606b/ruff-0.15.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:92c92b003e9d4f7fbd33b1867bb15a1b785b1735069108dfc23821ba045b29bc", size = 10196169, upload-time = "2026-02-12T23:09:17.306Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/ff/26ddc8c4da04c8fd3ee65a89c9fb99eaa5c30394269d424461467be2271f/ruff-0.15.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fe5c41ab43e3a06778844c586251eb5a510f67125427625f9eb2b9526535779", size = 10990491, upload-time = "2026-02-12T23:09:25.503Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/00/50920cb385b89413f7cdb4bb9bc8fc59c1b0f30028d8bccc294189a54955/ruff-0.15.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66a6dd6df4d80dc382c6484f8ce1bcceb55c32e9f27a8b94c32f6c7331bf14fb", size = 11843280, upload-time = "2026-02-12T23:09:19.88Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/6d/2f5cad8380caf5632a15460c323ae326f1e1a2b5b90a6ee7519017a017ca/ruff-0.15.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a4a42cbb8af0bda9bcd7606b064d7c0bc311a88d141d02f78920be6acb5aa83", size = 11274336, upload-time = "2026-02-12T23:09:14.907Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/1d/5f56cae1d6c40b8a318513599b35ea4b075d7dc1cd1d04449578c29d1d75/ruff-0.15.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab064052c31dddada35079901592dfba2e05f5b1e43af3954aafcbc1096a5b2", size = 11137288, upload-time = "2026-02-12T23:09:07.475Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/20/6f8d7d8f768c93b0382b33b9306b3b999918816da46537d5a61635514635/ruff-0.15.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:5631c940fe9fe91f817a4c2ea4e81f47bee3ca4aa646134a24374f3c19ad9454", size = 11070681, upload-time = "2026-02-12T23:08:55.43Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/67/d640ac76069f64cdea59dba02af2e00b1fa30e2103c7f8d049c0cff4cafd/ruff-0.15.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:68138a4ba184b4691ccdc39f7795c66b3c68160c586519e7e8444cf5a53e1b4c", size = 10486401, upload-time = "2026-02-12T23:09:27.927Z" },
+    { url = "https://files.pythonhosted.org/packages/65/3d/e1429f64a3ff89297497916b88c32a5cc88eeca7e9c787072d0e7f1d3e1e/ruff-0.15.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:518f9af03bfc33c03bdb4cb63fabc935341bb7f54af500f92ac309ecfbba6330", size = 10197452, upload-time = "2026-02-12T23:09:12.147Z" },
+    { url = "https://files.pythonhosted.org/packages/78/83/e2c3bade17dad63bf1e1c2ffaf11490603b760be149e1419b07049b36ef2/ruff-0.15.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:da79f4d6a826caaea95de0237a67e33b81e6ec2e25fc7e1993a4015dffca7c61", size = 10693900, upload-time = "2026-02-12T23:09:34.418Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/27/fdc0e11a813e6338e0706e8b39bb7a1d61ea5b36873b351acee7e524a72a/ruff-0.15.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3dd86dccb83cd7d4dcfac303ffc277e6048600dfc22e38158afa208e8bf94a1f", size = 11227302, upload-time = "2026-02-12T23:09:36.536Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/58/ac864a75067dcbd3b95be5ab4eb2b601d7fbc3d3d736a27e391a4f92a5c1/ruff-0.15.1-py3-none-win32.whl", hash = "sha256:660975d9cb49b5d5278b12b03bb9951d554543a90b74ed5d366b20e2c57c2098", size = 10462555, upload-time = "2026-02-12T23:09:29.899Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/5e/d4ccc8a27ecdb78116feac4935dfc39d1304536f4296168f91ed3ec00cd2/ruff-0.15.1-py3-none-win_amd64.whl", hash = "sha256:c820fef9dd5d4172a6570e5721704a96c6679b80cf7be41659ed439653f62336", size = 11599956, upload-time = "2026-02-12T23:09:01.157Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/07/5bda6a85b220c64c65686bc85bd0bbb23b29c62b3a9f9433fa55f17cda93/ruff-0.15.1-py3-none-win_arm64.whl", hash = "sha256:5ff7d5f0f88567850f45081fac8f4ec212be8d0b963e385c3f7d0d2eb4899416", size = 10874604, upload-time = "2026-02-12T23:09:05.515Z" },
 ]
 
 [[package]]

From 3fcd01ead021e9536f1cbdaf4c4f0c2b2a0047e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 17:28:44 +0700
Subject: [PATCH 116/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/server/chat.py  |  9 ---------
 app/utils/helper.py | 10 +++++++---
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 3f8e0cd..114eedf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -379,15 +379,6 @@ def _build_tool_prompt(
         )
 
     lines.append(TOOL_WRAP_HINT.strip())
-    lines.append(
-        "If multiple tools are needed, list them sequentially within the same [ToolCalls] block."
-    )
-    lines.append(
-        "If no tool call is needed, provide a normal response and NEVER use the [ToolCalls] tag."
-    )
-    lines.append(
-        "Note: Tool results are returned in a [ToolResults] block using [ToolResult] tags."
-    )
 
     return "\n".join(lines)
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 99af0a5..68066a3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,15 +18,19 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n"
+    "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:arg_name]\n"
     "value\n"
     "[/CallParameter]\n"
     "[/Call]\n"
-    "[/ToolCalls]\n"
-    "CRITICAL: If 'value' is multi-line or complex, you MUST wrap it in a markdown code block within the tags (use a fence longer than any backtick sequence in the content) to prevent rendering corruption.\n"
+    "[/ToolCalls]\n\n"
+    "CRITICAL: If 'value' contains ANY newlines or special characters, you MUST wrap it in a markdown code block (triple backticks or longer) within the tags. "
+    "Failure to wrap multi-line content will result in a protocol rejection. Use a fence longer than any backtick sequence in the content.\n\n"
+    "Multiple tool calls MUST be listed sequentially within the same [ToolCalls] block.\n"
+    "If no tool is needed, respond naturally and NEVER use any [ToolCalls] or [Call] tags.\n"
+    "Note: Tool results are returned in [ToolResults] blocks."
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From ef24704b3d302f76233999002717a44d4a88c444 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 17:42:59 +0700
Subject: [PATCH 117/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 68066a3..a2b4c23 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,7 +18,7 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "When calling tools, you MUST respond ONLY with a single [ToolCalls] block using this EXACT syntax:\n\n"
+    "When calling tools, respond ONLY with a single [ToolCalls] block. NO other text allowed. EXACT syntax:\n\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:arg_name]\n"
@@ -26,11 +26,9 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "CRITICAL: If 'value' contains ANY newlines or special characters, you MUST wrap it in a markdown code block (triple backticks or longer) within the tags. "
-    "Failure to wrap multi-line content will result in a protocol rejection. Use a fence longer than any backtick sequence in the content.\n\n"
-    "Multiple tool calls MUST be listed sequentially within the same [ToolCalls] block.\n"
-    "If no tool is needed, respond naturally and NEVER use any [ToolCalls] or [Call] tags.\n"
-    "Note: Tool results are returned in [ToolResults] blocks."
+    "CRITICAL: If 'value' has ANY newline, you MUST wrap it in a code block (```). "
+    "The tags MUST contain ONLY the code block, no other text. Use a fence longer than any backticks in content.\n"
+    "Multiple calls: list [Call] blocks inside [ToolCalls]. No tools: respond naturally, NO tags."
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From 4edf4cdedc6c093a426da1554948fed074705edd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 18:08:52 +0700
Subject: [PATCH 118/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index a2b4c23..0efa460 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,17 +18,21 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "When calling tools, respond ONLY with a single [ToolCalls] block. NO other text allowed. EXACT syntax:\n\n"
+    "SYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
+    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
+    "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n"
+    "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n"
+    "EXACT SYNTAX TEMPLATE:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:arg_name]\n"
+    "```\n"
     "value\n"
+    "```\n"
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "CRITICAL: If 'value' has ANY newline, you MUST wrap it in a code block (```). "
-    "The tags MUST contain ONLY the code block, no other text. Use a fence longer than any backticks in content.\n"
-    "Multiple calls: list [Call] blocks inside [ToolCalls]. No tools: respond naturally, NO tags."
+    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags."
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From dcadabb8341a4e94cc9a44a6c9e9350e70365a7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 18:23:20 +0700
Subject: [PATCH 119/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/server/chat.py  | 2 +-
 app/utils/helper.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 114eedf..881f7c9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -378,7 +378,7 @@ def _build_tool_prompt(
             f"You are required to call the tool named `{target}`. Do not call any other tool."
         )
 
-    lines.append(TOOL_WRAP_HINT.strip())
+    lines.append(TOOL_WRAP_HINT)
 
     return "\n".join(lines)
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 0efa460..1e70c3b 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,7 +18,7 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "SYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
+    "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
     "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n"
     "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n"
@@ -32,7 +32,7 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags."
+    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE

From 83904142bb97676f09d93725ee5d4ad19f5d7cd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 19:56:04 +0700
Subject: [PATCH 120/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

- These patches are specifically designed for complex clients like Roo Code to ensure compatibility with their specialized tool protocols.
---
 app/utils/helper.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 1e70c3b..ae96f05 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -20,8 +20,8 @@
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
     "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
-    "2. For ALL parameters, the value MUST be wrapped in a markdown code block inside the tags to prevent rendering corruption.\n"
-    "3. Use a markdown fence (backticks) longer than any backtick sequence in the content (e.g., use ```` if content has ```).\n\n"
+    "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
+    "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
@@ -32,6 +32,7 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
+    "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
     "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(

From ce43d63ce2b2b6e042fa122d96d405c683407ef2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 22:26:00 +0700
Subject: [PATCH 121/236] Refactor: Remove all escape logic handlers.

- Change tool call tags to snake_case.
---
 app/server/chat.py     | 61 +++++++++++++++----------------
 app/services/client.py | 32 ++++++++---------
 app/services/lmdb.py   | 22 ++++++------
 app/utils/helper.py    | 81 ++++++++++++++++++++----------------------
 app/utils/logging.py   |  2 +-
 5 files changed, 93 insertions(+), 105 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 881f7c9..98277d6 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -121,18 +121,18 @@ def _calculate_usage(
 ) -> tuple[int, int, int]:
     """Calculate prompt, completion and total tokens consistently."""
     prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_args_text = ""
+    tool_params_text = ""
     if tool_calls:
         for call in tool_calls:
             if hasattr(call, "function"):
-                tool_args_text += call.function.arguments or ""
+                tool_params_text += call.function.arguments or ""
             elif isinstance(call, dict):
-                tool_args_text += call.get("function", {}).get("arguments", "")
+                tool_params_text += call.get("function", {}).get("arguments", "")
 
     completion_basis = assistant_text or ""
-    if tool_args_text:
+    if tool_params_text:
         completion_basis = (
-            f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text
+            f"{completion_basis}\n{tool_params_text}" if completion_basis else tool_params_text
         )
 
     completion_tokens = estimate_tokens(completion_basis)
@@ -343,7 +343,7 @@ def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
 ) -> str:
-    """Generate a system prompt describing available tools and the PascalCase protocol."""
+    """Generate a system prompt describing available tools and the snake_case protocol."""
     if not tools:
         return ""
 
@@ -359,10 +359,10 @@ def _build_tool_prompt(
             schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode(
                 "utf-8"
             )
-            lines.append("Arguments JSON schema:")
+            lines.append("Parameters JSON schema:")
             lines.append(schema_text)
         else:
-            lines.append("Arguments JSON schema: {}")
+            lines.append("Parameters JSON schema: {}")
 
     if tool_choice == "none":
         lines.append(
@@ -760,28 +760,28 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
-                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
+                "starts": ["[tool_calls]"],
+                "ends": ["[/tool_calls]"],
             },
             "ORPHAN": {
-                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
-                "ends": ["[/Call]", "\\[/Call\\]"],
+                "starts": ["[call:"],
+                "ends": ["[/call]"],
             },
             "RESP": {
-                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
-                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
+                "starts": ["[tool_results]"],
+                "ends": ["[/tool_results]"],
             },
-            "ARG": {
-                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
-                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
+            "PARAM": {
+                "starts": ["[call_parameter:"],
+                "ends": ["[/call_parameter]"],
             },
             "RESULT": {
-                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
-                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
+                "starts": ["[tool_result]"],
+                "ends": ["[/tool_result]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
-                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
+                "starts": ["<|im_start|>"],
+                "ends": ["<|im_end|>"],
             },
         }
 
@@ -794,15 +794,10 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "\\<|im\\_end|\\>",
-            "[/Call]",
-            "\\[/Call\\]",
-            "[/ToolCalls]",
-            "\\[/ToolCalls\\]",
-            "[/CallParameter]",
-            "\\[/CallParameter\\]",
-            "[/ToolResult]",
-            "\\[/ToolResult\\]",
+            "[/call]",
+            "[/tool_calls]",
+            "[/call_parameter]",
+            "[/tool_result]",
         ]
 
         self.WATCH_MARKERS = []
@@ -876,8 +871,8 @@ def process(self, chunk: str) -> str:
                         self.buffer = self.buffer[-max_end_len:]
                     break
 
-            elif self.state == "IN_ARG":
-                cfg = self.STATE_MARKERS["ARG"]
+            elif self.state == "IN_PARAM":
+                cfg = self.STATE_MARKERS["PARAM"]
                 found_idx, found_len = -1, 0
                 for p in cfg["ends"]:
                     idx = buf_low.find(p.lower())
@@ -1003,7 +998,7 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         """Release remaining buffer content and perform final cleanup at stream end."""
         res = ""
-        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
+        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_PARAM", "IN_RESULT"):
             res = ""
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
diff --git a/app/services/client.py b/app/services/client.py
index 6ab80cd..1c826e5 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -70,8 +70,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a Message into Gemini API format using the PascalCase technical protocol.
-        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
+        Process a Message into Gemini API format using the snake_case technical protocol.
+        Extracts text, handles files, and appends tool_calls/tool_results blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -109,34 +109,32 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
-            res_block = (
-                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
-            )
+            res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]"
             if wrap_tool:
-                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
+                text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"]
             else:
                 text_fragments = [res_block]
 
         if message.tool_calls:
             tool_blocks: list[str] = []
             for call in message.tool_calls:
-                args_text = call.function.arguments.strip()
-                formatted_args = "@args\n"
+                params_text = call.function.arguments.strip()
+                formatted_params = ""
                 try:
-                    parsed_args = orjson.loads(args_text)
-                    if isinstance(parsed_args, dict):
-                        for k, v in parsed_args.items():
+                    parsed_params = orjson.loads(params_text)
+                    if isinstance(parsed_params, dict):
+                        for k, v in parsed_params.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n"
+                            formatted_params += f"[call_parameter:{k}]{val_str}[/call_parameter]\n"
                     else:
-                        formatted_args += args_text
+                        formatted_params += params_text
                 except orjson.JSONDecodeError:
-                    formatted_args += args_text
+                    formatted_params += params_text
 
-                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]")
+                tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
 
             if tool_blocks:
-                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
+                tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -168,7 +166,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
+                wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index ad92bbf..2f59662 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -99,17 +99,17 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
-            args = tc.function.arguments or "{}"
+            params = tc.function.arguments or "{}"
             try:
-                parsed = orjson.loads(args)
-                canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+                parsed = orjson.loads(params)
+                canon_params = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
             except orjson.JSONDecodeError:
-                canon_args = args
+                canon_params = params
 
             calls_data.append(
                 {
                     "name": tc.function.name,
-                    "arguments": canon_args,
+                    "arguments": canon_params,
                 }
             )
         calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
@@ -149,7 +149,7 @@ def __init__(
         """
         Initialize LMDB store.
 
-        Args:
+        Params:
             db_path: Path to LMDB database directory
             max_db_size: Maximum database size in bytes (default: 256 MB)
             retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup)
@@ -194,7 +194,7 @@ def _get_transaction(self, write: bool = False):
         """
         Context manager for LMDB transactions.
 
-        Args:
+        Params:
             write: Whether the transaction should be writable.
         """
         if not self._env:
@@ -265,7 +265,7 @@ def store(
         """
         Store a conversation model in LMDB.
 
-        Args:
+        Params:
             conv: Conversation model to store
             custom_key: Optional custom key, if not provided, hash will be used
 
@@ -313,7 +313,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
         """
         Retrieve conversation data by key.
 
-        Args:
+        Params:
             key: Storage key (hash or custom key)
 
         Returns:
@@ -342,7 +342,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
         Search conversation data by message list.
         Tries raw matching, then sanitized matching, and finally fuzzy matching.
 
-        Args:
+        Params:
             model: Model name
             messages: List of messages to match
 
@@ -382,7 +382,7 @@ def _find_by_message_list(
         """
         Internal find implementation based on a message list.
 
-        Args:
+        Params:
             model: Model name
             messages: Message list to hash
             fuzzy: Whether to use fuzzy hashing
diff --git a/app/utils/helper.py b/app/utils/helper.py
index ae96f05..752aed9 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,48 +19,45 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
+    "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
     "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
-    "[ToolCalls]\n"
-    "[Call:tool_name]\n"
-    "[CallParameter:arg_name]\n"
+    "[tool_calls]\n"
+    "[call:tool_name]\n"
+    "[call_parameter:parameter_name]\n"
     "```\n"
     "value\n"
     "```\n"
-    "[/CallParameter]\n"
-    "[/Call]\n"
-    "[/ToolCalls]\n\n"
+    "[/call_parameter]\n"
+    "[/call]\n"
+    "[/tool_calls]\n\n"
     "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
-)
-TOOL_BLOCK_RE = re.compile(
-    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
+    "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
+TOOL_BLOCK_RE = re.compile(r"\[tool_calls]\s*(.*?)\s*\[/tool_calls]", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
-    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
+    r"\[call:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
+    r"\[tool_results]\s*(.*?)\s*\[/tool_results]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
+    r"\[result:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/result]",
     re.DOTALL | re.IGNORECASE,
 )
-TAGGED_ARG_RE = re.compile(
-    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
+TAGGED_PARAM_RE = re.compile(
+    r"\[call_parameter:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call_parameter]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
+    r"\[tool_result]\s*(.*?)\s*\[/tool_result]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
-COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
+CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE)
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -134,15 +131,14 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def unescape_llm_text(s: str) -> str:
+def _repair_param_value(s: str) -> str:
     """
-    Standardize and repair LLM-generated text fragments (unescaping, link normalization)
+    Standardize and repair LLM-generated parameter values
     to ensure compatibility with specialized clients like Roo Code.
     """
     if not s:
         return ""
 
-    s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
 
     return s
@@ -248,7 +244,7 @@ def strip_system_hints(text: str) -> str:
     cleaned = TOOL_CALL_RE.sub("", cleaned)
     cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
     cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
-    cleaned = TAGGED_ARG_RE.sub("", cleaned)
+    cleaned = TAGGED_PARAM_RE.sub("", cleaned)
     cleaned = TAGGED_RESULT_RE.sub("", cleaned)
 
     return cleaned
@@ -257,38 +253,37 @@ def strip_system_hints(text: str) -> str:
 def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
     """
     Extract tool metadata and return text stripped of technical markers.
-    Arguments are parsed into JSON and assigned deterministic call IDs.
+    Parameters are parsed into JSON and assigned deterministic call IDs.
     """
     if not text:
         return text, []
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_args: str) -> None:
+    def _create_tool_call(name: str, raw_params: str) -> None:
         if not extract:
             return
+
+        name = name.strip()
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        name = unescape_llm_text(name.strip())
-        raw_args = unescape_llm_text(raw_args)
-
-        arg_matches = TAGGED_ARG_RE.findall(raw_args)
-        if arg_matches:
-            args_dict = {
-                arg_name.strip(): _strip_param_fences(arg_value)
-                for arg_name, arg_value in arg_matches
+        param_matches = TAGGED_PARAM_RE.findall(raw_params)
+        if param_matches:
+            params_dict = {
+                param_name.strip(): _repair_param_value(_strip_param_fences(param_value))
+                for param_name, param_value in param_matches
             }
-            arguments = orjson.dumps(args_dict).decode("utf-8")
-            logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}")
+            arguments = orjson.dumps(params_dict).decode("utf-8")
+            logger.debug(f"Successfully parsed {len(params_dict)} parameters for tool: {name}")
         else:
-            cleaned_raw = raw_args.strip()
+            cleaned_raw = raw_params.strip()
             if not cleaned_raw:
-                logger.debug(f"Successfully parsed 0 arguments for tool: {name}")
+                logger.debug(f"Successfully parsed 0 parameters for tool: {name}")
             else:
                 logger.warning(
-                    f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
+                    f"Malformed parameters for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
                 )
             arguments = "{}"
 
@@ -323,7 +318,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
 
 def text_from_message(message: Message) -> str:
-    """Concatenate text and tool arguments from a message for token estimation."""
+    """Concatenate text and tool parameters from a message for token estimation."""
     base_text = ""
     if isinstance(message.content, str):
         base_text = message.content
@@ -335,8 +330,8 @@ def text_from_message(message: Message) -> str:
         base_text = ""
 
     if message.tool_calls:
-        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
-        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
+        tool_param_text = "".join(call.function.arguments or "" for call in message.tool_calls)
+        base_text = f"{base_text}\n{tool_param_text}" if base_text else tool_param_text
 
     return base_text
 
diff --git a/app/utils/logging.py b/app/utils/logging.py
index 87fcc7f..da417f1 100644
--- a/app/utils/logging.py
+++ b/app/utils/logging.py
@@ -15,7 +15,7 @@ def setup_logging(
     """
     Setup loguru logging configuration to unify all project logging output
 
-    Args:
+    Params:
         level: Log level
         diagnose: Whether to enable diagnostic information
         backtrace: Whether to enable backtrace information

From 8792948642f510ca294323c12e90232cb1926e06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 22:35:39 +0700
Subject: [PATCH 122/236] Refactor: Remove all escape logic handlers.

- Change tool call tags to snake_case.
---
 app/services/client.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 1c826e5..90474ad 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -125,11 +125,14 @@ async def process_message(
                     if isinstance(parsed_params, dict):
                         for k, v in parsed_params.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_params += f"[call_parameter:{k}]{val_str}[/call_parameter]\n"
+                            # Wrap value in triple backticks to match the required protocol hint
+                            formatted_params += (
+                                f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
+                            )
                     else:
-                        formatted_params += params_text
+                        formatted_params += f"```\n{params_text}\n```\n"
                 except orjson.JSONDecodeError:
-                    formatted_params += params_text
+                    formatted_params += f"```\n{params_text}\n```\n"
 
                 tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
 

From 5482e0cd51fa8051a92dafb061fa4677e108b10f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 22:56:47 +0700
Subject: [PATCH 123/236] Refactor: Remove all escape logic handlers.

- Change tool call tags to snake_case.
---
 app/services/client.py |  1 -
 app/utils/helper.py    | 12 ++++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 90474ad..9a2742b 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -125,7 +125,6 @@ async def process_message(
                     if isinstance(parsed_params, dict):
                         for k, v in parsed_params.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            # Wrap value in triple backticks to match the required protocol hint
                             formatted_params += (
                                 f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
                             )
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 752aed9..3576667 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -58,6 +58,7 @@
 CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE)
 CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE)
 CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE)
+COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
     re.IGNORECASE,
@@ -131,14 +132,15 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def _repair_param_value(s: str) -> str:
+def repair_param_value(s: str) -> str:
     """
-    Standardize and repair LLM-generated parameter values
+    Standardize and repair LLM-generated values (unescaping, link normalization)
     to ensure compatibility with specialized clients like Roo Code.
     """
     if not s:
         return ""
 
+    s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
     s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
 
     return s
@@ -264,7 +266,9 @@ def _create_tool_call(name: str, raw_params: str) -> None:
         if not extract:
             return
 
-        name = name.strip()
+        name = repair_param_value(name.strip())
+        raw_params = repair_param_value(raw_params)
+
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return
@@ -272,7 +276,7 @@ def _create_tool_call(name: str, raw_params: str) -> None:
         param_matches = TAGGED_PARAM_RE.findall(raw_params)
         if param_matches:
             params_dict = {
-                param_name.strip(): _repair_param_value(_strip_param_fences(param_value))
+                param_name.strip(): _strip_param_fences(param_value)
                 for param_name, param_value in param_matches
             }
             arguments = orjson.dumps(params_dict).decode("utf-8")

From b324aef75d5a868d9ea409dffa2adc173f69c63b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:10:53 +0700
Subject: [PATCH 124/236] Revert "Refactor: Remove all escape logic handlers."

This reverts commit ce43d63c
---
 app/server/chat.py     | 61 ++++++++++++++++---------------
 app/services/client.py | 34 +++++++++---------
 app/services/lmdb.py   | 22 ++++++------
 app/utils/helper.py    | 81 +++++++++++++++++++++---------------------
 app/utils/logging.py   |  2 +-
 5 files changed, 103 insertions(+), 97 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 98277d6..881f7c9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -121,18 +121,18 @@ def _calculate_usage(
 ) -> tuple[int, int, int]:
     """Calculate prompt, completion and total tokens consistently."""
     prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
-    tool_params_text = ""
+    tool_args_text = ""
     if tool_calls:
         for call in tool_calls:
             if hasattr(call, "function"):
-                tool_params_text += call.function.arguments or ""
+                tool_args_text += call.function.arguments or ""
             elif isinstance(call, dict):
-                tool_params_text += call.get("function", {}).get("arguments", "")
+                tool_args_text += call.get("function", {}).get("arguments", "")
 
     completion_basis = assistant_text or ""
-    if tool_params_text:
+    if tool_args_text:
         completion_basis = (
-            f"{completion_basis}\n{tool_params_text}" if completion_basis else tool_params_text
+            f"{completion_basis}\n{tool_args_text}" if completion_basis else tool_args_text
         )
 
     completion_tokens = estimate_tokens(completion_basis)
@@ -343,7 +343,7 @@ def _build_tool_prompt(
     tools: list[Tool],
     tool_choice: str | ToolChoiceFunction | None,
 ) -> str:
-    """Generate a system prompt describing available tools and the snake_case protocol."""
+    """Generate a system prompt describing available tools and the PascalCase protocol."""
     if not tools:
         return ""
 
@@ -359,10 +359,10 @@ def _build_tool_prompt(
             schema_text = orjson.dumps(function.parameters, option=orjson.OPT_SORT_KEYS).decode(
                 "utf-8"
             )
-            lines.append("Parameters JSON schema:")
+            lines.append("Arguments JSON schema:")
             lines.append(schema_text)
         else:
-            lines.append("Parameters JSON schema: {}")
+            lines.append("Arguments JSON schema: {}")
 
     if tool_choice == "none":
         lines.append(
@@ -760,28 +760,28 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[tool_calls]"],
-                "ends": ["[/tool_calls]"],
+                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
+                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[call:"],
-                "ends": ["[/call]"],
+                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
+                "ends": ["[/Call]", "\\[/Call\\]"],
             },
             "RESP": {
-                "starts": ["[tool_results]"],
-                "ends": ["[/tool_results]"],
+                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
+                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
             },
-            "PARAM": {
-                "starts": ["[call_parameter:"],
-                "ends": ["[/call_parameter]"],
+            "ARG": {
+                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
+                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
             },
             "RESULT": {
-                "starts": ["[tool_result]"],
-                "ends": ["[/tool_result]"],
+                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
+                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>"],
-                "ends": ["<|im_end|>"],
+                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
+                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
             },
         }
 
@@ -794,10 +794,15 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "[/call]",
-            "[/tool_calls]",
-            "[/call_parameter]",
-            "[/tool_result]",
+            "\\<|im\\_end|\\>",
+            "[/Call]",
+            "\\[/Call\\]",
+            "[/ToolCalls]",
+            "\\[/ToolCalls\\]",
+            "[/CallParameter]",
+            "\\[/CallParameter\\]",
+            "[/ToolResult]",
+            "\\[/ToolResult\\]",
         ]
 
         self.WATCH_MARKERS = []
@@ -871,8 +876,8 @@ def process(self, chunk: str) -> str:
                         self.buffer = self.buffer[-max_end_len:]
                     break
 
-            elif self.state == "IN_PARAM":
-                cfg = self.STATE_MARKERS["PARAM"]
+            elif self.state == "IN_ARG":
+                cfg = self.STATE_MARKERS["ARG"]
                 found_idx, found_len = -1, 0
                 for p in cfg["ends"]:
                     idx = buf_low.find(p.lower())
@@ -998,7 +1003,7 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         """Release remaining buffer content and perform final cleanup at stream end."""
         res = ""
-        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_PARAM", "IN_RESULT"):
+        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
             res = ""
         elif self.state == "IN_BLOCK" and self.current_role != "tool":
             res = self.buffer
diff --git a/app/services/client.py b/app/services/client.py
index 9a2742b..6ab80cd 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -70,8 +70,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a Message into Gemini API format using the snake_case technical protocol.
-        Extracts text, handles files, and appends tool_calls/tool_results blocks.
+        Process a Message into Gemini API format using the PascalCase technical protocol.
+        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -109,34 +109,34 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
-            res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]"
+            res_block = (
+                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
+            )
             if wrap_tool:
-                text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"]
+                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
             else:
                 text_fragments = [res_block]
 
         if message.tool_calls:
             tool_blocks: list[str] = []
             for call in message.tool_calls:
-                params_text = call.function.arguments.strip()
-                formatted_params = ""
+                args_text = call.function.arguments.strip()
+                formatted_args = "@args\n"
                 try:
-                    parsed_params = orjson.loads(params_text)
-                    if isinstance(parsed_params, dict):
-                        for k, v in parsed_params.items():
+                    parsed_args = orjson.loads(args_text)
+                    if isinstance(parsed_args, dict):
+                        for k, v in parsed_args.items():
                             val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_params += (
-                                f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
-                            )
+                            formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n"
                     else:
-                        formatted_params += f"```\n{params_text}\n```\n"
+                        formatted_args += args_text
                 except orjson.JSONDecodeError:
-                    formatted_params += f"```\n{params_text}\n```\n"
+                    formatted_args += args_text
 
-                tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
+                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]")
 
             if tool_blocks:
-                tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]"
+                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -168,7 +168,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]"
+                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 2f59662..ad92bbf 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -99,17 +99,17 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
-            params = tc.function.arguments or "{}"
+            args = tc.function.arguments or "{}"
             try:
-                parsed = orjson.loads(params)
-                canon_params = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+                parsed = orjson.loads(args)
+                canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
             except orjson.JSONDecodeError:
-                canon_params = params
+                canon_args = args
 
             calls_data.append(
                 {
                     "name": tc.function.name,
-                    "arguments": canon_params,
+                    "arguments": canon_args,
                 }
             )
         calls_data.sort(key=lambda x: (x["name"], x["arguments"]))
@@ -149,7 +149,7 @@ def __init__(
         """
         Initialize LMDB store.
 
-        Params:
+        Args:
             db_path: Path to LMDB database directory
             max_db_size: Maximum database size in bytes (default: 256 MB)
             retention_days: Number of days to retain conversations (default: 14, 0 disables cleanup)
@@ -194,7 +194,7 @@ def _get_transaction(self, write: bool = False):
         """
         Context manager for LMDB transactions.
 
-        Params:
+        Args:
             write: Whether the transaction should be writable.
         """
         if not self._env:
@@ -265,7 +265,7 @@ def store(
         """
         Store a conversation model in LMDB.
 
-        Params:
+        Args:
             conv: Conversation model to store
             custom_key: Optional custom key, if not provided, hash will be used
 
@@ -313,7 +313,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
         """
         Retrieve conversation data by key.
 
-        Params:
+        Args:
             key: Storage key (hash or custom key)
 
         Returns:
@@ -342,7 +342,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
         Search conversation data by message list.
         Tries raw matching, then sanitized matching, and finally fuzzy matching.
 
-        Params:
+        Args:
             model: Model name
             messages: List of messages to match
 
@@ -382,7 +382,7 @@ def _find_by_message_list(
         """
         Internal find implementation based on a message list.
 
-        Params:
+        Args:
             model: Model name
             messages: Message list to hash
             fuzzy: Whether to use fuzzy hashing
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 3576667..ae96f05 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,45 +19,47 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n"
+    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
     "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
-    "[tool_calls]\n"
-    "[call:tool_name]\n"
-    "[call_parameter:parameter_name]\n"
+    "[ToolCalls]\n"
+    "[Call:tool_name]\n"
+    "[CallParameter:arg_name]\n"
     "```\n"
     "value\n"
     "```\n"
-    "[/call_parameter]\n"
-    "[/call]\n"
-    "[/tool_calls]\n\n"
+    "[/CallParameter]\n"
+    "[/Call]\n"
+    "[/ToolCalls]\n\n"
     "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+)
+TOOL_BLOCK_RE = re.compile(
+    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
 )
-TOOL_BLOCK_RE = re.compile(r"\[tool_calls]\s*(.*?)\s*\[/tool_calls]", re.DOTALL | re.IGNORECASE)
 TOOL_CALL_RE = re.compile(
-    r"\[call:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call]", re.DOTALL | re.IGNORECASE
+    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\[tool_results]\s*(.*?)\s*\[/tool_results]",
+    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\[result:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/result]",
+    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-TAGGED_PARAM_RE = re.compile(
-    r"\[call_parameter:((?:[^]\\]|\\.)+)]\s*(.*?)\s*\[/call_parameter]",
+TAGGED_ARG_RE = re.compile(
+    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\[tool_result]\s*(.*?)\s*\[/tool_result]",
+    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"<\|im_(?:start|end)\|>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"<\|im_start\|>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"<\|im_end\|>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
@@ -132,9 +134,9 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def repair_param_value(s: str) -> str:
+def unescape_llm_text(s: str) -> str:
     """
-    Standardize and repair LLM-generated values (unescaping, link normalization)
+    Standardize and repair LLM-generated text fragments (unescaping, link normalization)
     to ensure compatibility with specialized clients like Roo Code.
     """
     if not s:
@@ -246,7 +248,7 @@ def strip_system_hints(text: str) -> str:
     cleaned = TOOL_CALL_RE.sub("", cleaned)
     cleaned = RESPONSE_BLOCK_RE.sub("", cleaned)
     cleaned = RESPONSE_ITEM_RE.sub("", cleaned)
-    cleaned = TAGGED_PARAM_RE.sub("", cleaned)
+    cleaned = TAGGED_ARG_RE.sub("", cleaned)
     cleaned = TAGGED_RESULT_RE.sub("", cleaned)
 
     return cleaned
@@ -255,39 +257,38 @@ def strip_system_hints(text: str) -> str:
 def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
     """
     Extract tool metadata and return text stripped of technical markers.
-    Parameters are parsed into JSON and assigned deterministic call IDs.
+    Arguments are parsed into JSON and assigned deterministic call IDs.
     """
     if not text:
         return text, []
 
     tool_calls: list[ToolCall] = []
 
-    def _create_tool_call(name: str, raw_params: str) -> None:
+    def _create_tool_call(name: str, raw_args: str) -> None:
         if not extract:
             return
-
-        name = repair_param_value(name.strip())
-        raw_params = repair_param_value(raw_params)
-
         if not name:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        param_matches = TAGGED_PARAM_RE.findall(raw_params)
-        if param_matches:
-            params_dict = {
-                param_name.strip(): _strip_param_fences(param_value)
-                for param_name, param_value in param_matches
+        name = unescape_llm_text(name.strip())
+        raw_args = unescape_llm_text(raw_args)
+
+        arg_matches = TAGGED_ARG_RE.findall(raw_args)
+        if arg_matches:
+            args_dict = {
+                arg_name.strip(): _strip_param_fences(arg_value)
+                for arg_name, arg_value in arg_matches
             }
-            arguments = orjson.dumps(params_dict).decode("utf-8")
-            logger.debug(f"Successfully parsed {len(params_dict)} parameters for tool: {name}")
+            arguments = orjson.dumps(args_dict).decode("utf-8")
+            logger.debug(f"Successfully parsed {len(args_dict)} arguments for tool: {name}")
         else:
-            cleaned_raw = raw_params.strip()
+            cleaned_raw = raw_args.strip()
             if not cleaned_raw:
-                logger.debug(f"Successfully parsed 0 parameters for tool: {name}")
+                logger.debug(f"Successfully parsed 0 arguments for tool: {name}")
             else:
                 logger.warning(
-                    f"Malformed parameters for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
+                    f"Malformed arguments for tool '{name}'. Text found but no valid tags: {reprlib.repr(cleaned_raw)}"
                 )
             arguments = "{}"
 
@@ -322,7 +323,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
 
 
 def text_from_message(message: Message) -> str:
-    """Concatenate text and tool parameters from a message for token estimation."""
+    """Concatenate text and tool arguments from a message for token estimation."""
     base_text = ""
     if isinstance(message.content, str):
         base_text = message.content
@@ -334,8 +335,8 @@ def text_from_message(message: Message) -> str:
         base_text = ""
 
     if message.tool_calls:
-        tool_param_text = "".join(call.function.arguments or "" for call in message.tool_calls)
-        base_text = f"{base_text}\n{tool_param_text}" if base_text else tool_param_text
+        tool_arg_text = "".join(call.function.arguments or "" for call in message.tool_calls)
+        base_text = f"{base_text}\n{tool_arg_text}" if base_text else tool_arg_text
 
     return base_text
 
diff --git a/app/utils/logging.py b/app/utils/logging.py
index da417f1..87fcc7f 100644
--- a/app/utils/logging.py
+++ b/app/utils/logging.py
@@ -15,7 +15,7 @@ def setup_logging(
     """
     Setup loguru logging configuration to unify all project logging output
 
-    Params:
+    Args:
         level: Log level
         diagnose: Whether to enable diagnostic information
         backtrace: Whether to enable backtrace information

From 8ef108d4827c29c617fbb968f2c09d7fe8d83fcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:11:36 +0700
Subject: [PATCH 125/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

---
 app/services/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/services/client.py b/app/services/client.py
index 6ab80cd..0b2aea5 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -121,7 +121,7 @@ async def process_message(
             tool_blocks: list[str] = []
             for call in message.tool_calls:
                 args_text = call.function.arguments.strip()
-                formatted_args = "@args\n"
+                formatted_args = ""
                 try:
                     parsed_args = orjson.loads(args_text)
                     if isinstance(parsed_args, dict):

From 30043e585650c11876312b6dadc2a52eaaacdf60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:14:45 +0700
Subject: [PATCH 126/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

---
 app/utils/helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index ae96f05..0b91993 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -25,7 +25,7 @@
     "EXACT SYNTAX TEMPLATE:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
-    "[CallParameter:arg_name]\n"
+    "[CallParameter:parameter_name]\n"
     "```\n"
     "value\n"
     "```\n"

From bc888d1fbb8a076074486293531441553c956028 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:22:26 +0700
Subject: [PATCH 127/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

---
 app/services/client.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 0b2aea5..70dfce9 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -120,20 +120,25 @@ async def process_message(
         if message.tool_calls:
             tool_blocks: list[str] = []
             for call in message.tool_calls:
-                args_text = call.function.arguments.strip()
-                formatted_args = ""
-                try:
-                    parsed_args = orjson.loads(args_text)
-                    if isinstance(parsed_args, dict):
-                        for k, v in parsed_args.items():
-                            val_str = v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
-                            formatted_args += f"[CallParameter:{k}]{val_str}[/CallParameter]\n"
-                    else:
-                        formatted_args += args_text
-                except orjson.JSONDecodeError:
-                    formatted_args += args_text
-
-                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_args}[/Call]")
+                params_text = call.function.arguments.strip()
+                formatted_params = ""
+                if params_text:
+                    try:
+                        parsed_params = orjson.loads(params_text)
+                        if isinstance(parsed_params, dict):
+                            for k, v in parsed_params.items():
+                                val_str = (
+                                    v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
+                                )
+                                formatted_params += (
+                                    f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n"
+                                )
+                        else:
+                            formatted_params += f"```\n{params_text}\n```\n"
+                    except orjson.JSONDecodeError:
+                        formatted_params += f"```\n{params_text}\n```\n"
+
+                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]")
 
             if tool_blocks:
                 tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"

From d5349a096f9564d99b7939c6a32fae1193db0b8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Feb 2026 23:29:23 +0700
Subject: [PATCH 128/236] Refactor: Rewrite the function call format to match
 the client's complex argument structure

---
 app/utils/helper.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 0b91993..82bb562 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -134,7 +134,7 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def unescape_llm_text(s: str) -> str:
+def _repair_param_value(s: str) -> str:
     """
     Standardize and repair LLM-generated text fragments (unescaping, link normalization)
     to ensure compatibility with specialized clients like Roo Code.
@@ -271,8 +271,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        name = unescape_llm_text(name.strip())
-        raw_args = unescape_llm_text(raw_args)
+        name = _repair_param_value(name.strip())
+        raw_args = _repair_param_value(raw_args)
 
         arg_matches = TAGGED_ARG_RE.findall(raw_args)
         if arg_matches:

From 7e217e917778211ad002511d1323739ed2b0e293 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 00:28:49 +0700
Subject: [PATCH 129/236] Reattempt changing tool call tags to `snake_case`.

---
 app/server/chat.py     | 50 ++++++++++++++++++++++++------------------
 app/services/client.py | 20 +++++++----------
 app/utils/helper.py    | 40 +++++++++++++++++++--------------
 3 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 881f7c9..1211c99 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -760,28 +760,32 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
-                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
+                "starts": ["[tool_calls]", "\\[tool\\_calls\\]"],
+                "ends": ["[/tool_calls]", "\\[\\/tool\\_calls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
-                "ends": ["[/Call]", "\\[/Call\\]"],
+                "starts": ["[call:", "\\[call\\:"],
+                "ends": ["[/call]", "\\[\\/call\\]"],
             },
             "RESP": {
-                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
-                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
+                "starts": ["[tool_results]", "\\[tool\\_results\\]"],
+                "ends": ["[/tool_results]", "\\[\\/tool\\_results\\]"],
             },
             "ARG": {
-                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
-                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
+                "starts": ["[call_parameter:", "\\[call\\_parameter\\:"],
+                "ends": ["[/call_parameter]", "\\[\\/call\\_parameter\\]"],
             },
             "RESULT": {
-                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
-                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
+                "starts": ["[tool_result]", "\\[tool\\_result\\]"],
+                "ends": ["[/tool_result]", "\\[\\/tool\\_result\\]"],
+            },
+            "ITEM": {
+                "starts": ["[result:", "\\[result\\:"],
+                "ends": ["[/result]", "\\[\\/result\\]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
-                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
+                "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"],
+                "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"],
             },
         }
 
@@ -794,15 +798,19 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "\\<|im\\_end|\\>",
-            "[/Call]",
-            "\\[/Call\\]",
-            "[/ToolCalls]",
-            "\\[/ToolCalls\\]",
-            "[/CallParameter]",
-            "\\[/CallParameter\\]",
-            "[/ToolResult]",
-            "\\[/ToolResult\\]",
+            "\\<\\|im\\_end\\|\\>",
+            "[/call]",
+            "\\[\\/call\\]",
+            "[/tool_calls]",
+            "\\[\\/tool\\_calls\\]",
+            "[/call_parameter]",
+            "\\[\\/call\\_parameter\\]",
+            "[/tool_result]",
+            "\\[\\/tool\\_result\\]",
+            "[/tool_results]",
+            "\\[\\/tool\\_results\\]",
+            "[/result]",
+            "\\[\\/result\\]",
         ]
 
         self.WATCH_MARKERS = []
diff --git a/app/services/client.py b/app/services/client.py
index 70dfce9..05e7415 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -70,8 +70,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a Message into Gemini API format using the PascalCase technical protocol.
-        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
+        Process a Message into Gemini API format using the snake_case technical protocol.
+        Extracts text, handles files, and appends tool_calls/tool_results blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -109,11 +109,9 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
-            res_block = (
-                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
-            )
+            res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]"
             if wrap_tool:
-                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
+                text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"]
             else:
                 text_fragments = [res_block]
 
@@ -130,18 +128,16 @@ async def process_message(
                                 val_str = (
                                     v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
                                 )
-                                formatted_params += (
-                                    f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n"
-                                )
+                                formatted_params += f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
                         else:
                             formatted_params += f"```\n{params_text}\n```\n"
                     except orjson.JSONDecodeError:
                         formatted_params += f"```\n{params_text}\n```\n"
 
-                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]")
+                tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
 
             if tool_blocks:
-                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
+                tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -173,7 +169,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
+                wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 82bb562..a1292d3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,47 +19,53 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
+    "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
     "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
-    "[ToolCalls]\n"
-    "[Call:tool_name]\n"
-    "[CallParameter:parameter_name]\n"
+    "[tool_calls]\n"
+    "[call:tool_name]\n"
+    "[call_parameter:parameter_name]\n"
     "```\n"
     "value\n"
     "```\n"
-    "[/CallParameter]\n"
-    "[/Call]\n"
-    "[/ToolCalls]\n\n"
+    "[/call_parameter]\n"
+    "[/call]\n"
+    "[/tool_calls]\n\n"
     "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+    "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
+    r"(?:\[tool_calls]|\\\[tool\\_calls\\])\s*(.*?)\s*(?:\[/tool_calls]|\\\[\\/tool\\_calls\\])",
+    re.DOTALL | re.IGNORECASE,
 )
 TOOL_CALL_RE = re.compile(
-    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
+    r"(?:\[call:|\\\[call\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/call]|\\\[\\/call\\])",
+    re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
+    r"(?:\[tool_results]|\\\[tool\\_results\\])\s*(.*?)\s*(?:\[/tool_results]|\\\[\\/tool\\_results\\])",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
+    r"(?:\[result:|\\\[result\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/result]|\\\[\\/result\\])",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
+    r"(?:\[call_parameter:|\\\[call\\_parameter\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/call_parameter]|\\\[\\/call\\_parameter\\])",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
+    r"(?:\[tool_result]|\\\[tool\\_result\\])\s*(.*?)\s*(?:\[/tool_result]|\\\[\\/tool\\_result\\])",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(
+    r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE
+)
+CHATML_START_RE = re.compile(
+    r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE
+)
+CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",

From fe30a5d47001c4956f9e64a3eda4cf4c7c4fc9db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 00:51:42 +0700
Subject: [PATCH 130/236] Revert "Reattempt changing tool call tags to
 `snake_case`."

This reverts commit 7e217e917778211ad002511d1323739ed2b0e293.
---
 app/server/chat.py     | 50 ++++++++++++++++++------------------------
 app/services/client.py | 20 ++++++++++-------
 app/utils/helper.py    | 40 ++++++++++++++-------------------
 3 files changed, 50 insertions(+), 60 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 1211c99..881f7c9 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -760,32 +760,28 @@ def __init__(self):
 
         self.STATE_MARKERS = {
             "TOOL": {
-                "starts": ["[tool_calls]", "\\[tool\\_calls\\]"],
-                "ends": ["[/tool_calls]", "\\[\\/tool\\_calls\\]"],
+                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
+                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[call:", "\\[call\\:"],
-                "ends": ["[/call]", "\\[\\/call\\]"],
+                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
+                "ends": ["[/Call]", "\\[/Call\\]"],
             },
             "RESP": {
-                "starts": ["[tool_results]", "\\[tool\\_results\\]"],
-                "ends": ["[/tool_results]", "\\[\\/tool\\_results\\]"],
+                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
+                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
             },
             "ARG": {
-                "starts": ["[call_parameter:", "\\[call\\_parameter\\:"],
-                "ends": ["[/call_parameter]", "\\[\\/call\\_parameter\\]"],
+                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
+                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
             },
             "RESULT": {
-                "starts": ["[tool_result]", "\\[tool\\_result\\]"],
-                "ends": ["[/tool_result]", "\\[\\/tool\\_result\\]"],
-            },
-            "ITEM": {
-                "starts": ["[result:", "\\[result\\:"],
-                "ends": ["[/result]", "\\[\\/result\\]"],
+                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
+                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"],
-                "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"],
+                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
+                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
             },
         }
 
@@ -798,19 +794,15 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "\\<\\|im\\_end\\|\\>",
-            "[/call]",
-            "\\[\\/call\\]",
-            "[/tool_calls]",
-            "\\[\\/tool\\_calls\\]",
-            "[/call_parameter]",
-            "\\[\\/call\\_parameter\\]",
-            "[/tool_result]",
-            "\\[\\/tool\\_result\\]",
-            "[/tool_results]",
-            "\\[\\/tool\\_results\\]",
-            "[/result]",
-            "\\[\\/result\\]",
+            "\\<|im\\_end|\\>",
+            "[/Call]",
+            "\\[/Call\\]",
+            "[/ToolCalls]",
+            "\\[/ToolCalls\\]",
+            "[/CallParameter]",
+            "\\[/CallParameter\\]",
+            "[/ToolResult]",
+            "\\[/ToolResult\\]",
         ]
 
         self.WATCH_MARKERS = []
diff --git a/app/services/client.py b/app/services/client.py
index 05e7415..70dfce9 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -70,8 +70,8 @@ async def process_message(
         message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
     ) -> tuple[str, list[Path | str]]:
         """
-        Process a Message into Gemini API format using the snake_case technical protocol.
-        Extracts text, handles files, and appends tool_calls/tool_results blocks.
+        Process a Message into Gemini API format using the PascalCase technical protocol.
+        Extracts text, handles files, and appends ToolCalls/ToolResults blocks.
         """
         files: list[Path | str] = []
         text_fragments: list[str] = []
@@ -109,9 +109,11 @@ async def process_message(
         if message.role == "tool":
             tool_name = message.name or "unknown"
             combined_content = "\n".join(text_fragments).strip()
-            res_block = f"[result:{tool_name}]\n[tool_result]\n{combined_content}\n[/tool_result]\n[/result]"
+            res_block = (
+                f"[Result:{tool_name}]\n[ToolResult]\n{combined_content}\n[/ToolResult]\n[/Result]"
+            )
             if wrap_tool:
-                text_fragments = [f"[tool_results]\n{res_block}\n[/tool_results]"]
+                text_fragments = [f"[ToolResults]\n{res_block}\n[/ToolResults]"]
             else:
                 text_fragments = [res_block]
 
@@ -128,16 +130,18 @@ async def process_message(
                                 val_str = (
                                     v if isinstance(v, str) else orjson.dumps(v).decode("utf-8")
                                 )
-                                formatted_params += f"[call_parameter:{k}]\n```\n{val_str}\n```\n[/call_parameter]\n"
+                                formatted_params += (
+                                    f"[CallParameter:{k}]\n```\n{val_str}\n```\n[/CallParameter]\n"
+                                )
                         else:
                             formatted_params += f"```\n{params_text}\n```\n"
                     except orjson.JSONDecodeError:
                         formatted_params += f"```\n{params_text}\n```\n"
 
-                tool_blocks.append(f"[call:{call.function.name}]\n{formatted_params}[/call]")
+                tool_blocks.append(f"[Call:{call.function.name}]\n{formatted_params}[/Call]")
 
             if tool_blocks:
-                tool_section = "[tool_calls]\n" + "\n".join(tool_blocks) + "\n[/tool_calls]"
+                tool_section = "[ToolCalls]\n" + "\n".join(tool_blocks) + "\n[/ToolCalls]"
                 text_fragments.append(tool_section)
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
@@ -169,7 +173,7 @@ async def process_conversation(
                     i += 1
 
                 combined_tool_content = "\n".join(tool_blocks)
-                wrapped_content = f"[tool_results]\n{combined_tool_content}\n[/tool_results]"
+                wrapped_content = f"[ToolResults]\n{combined_tool_content}\n[/ToolResults]"
                 conversation.append(add_tag("tool", wrapped_content))
             else:
                 input_part, files_part = await GeminiClientWrapper.process_message(
diff --git a/app/utils/helper.py b/app/utils/helper.py
index a1292d3..82bb562 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -19,53 +19,47 @@
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
     "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [tool_calls] block. NO conversational text, NO explanations, NO filler.\n"
+    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
     "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
     "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
     "EXACT SYNTAX TEMPLATE:\n"
-    "[tool_calls]\n"
-    "[call:tool_name]\n"
-    "[call_parameter:parameter_name]\n"
+    "[ToolCalls]\n"
+    "[Call:tool_name]\n"
+    "[CallParameter:parameter_name]\n"
     "```\n"
     "value\n"
     "```\n"
-    "[/call_parameter]\n"
-    "[/call]\n"
-    "[/tool_calls]\n\n"
+    "[/CallParameter]\n"
+    "[/Call]\n"
+    "[/ToolCalls]\n\n"
     "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [tool_calls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"(?:\[tool_calls]|\\\[tool\\_calls\\])\s*(.*?)\s*(?:\[/tool_calls]|\\\[\\/tool\\_calls\\])",
-    re.DOTALL | re.IGNORECASE,
+    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
 )
 TOOL_CALL_RE = re.compile(
-    r"(?:\[call:|\\\[call\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/call]|\\\[\\/call\\])",
-    re.DOTALL | re.IGNORECASE,
+    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"(?:\[tool_results]|\\\[tool\\_results\\])\s*(.*?)\s*(?:\[/tool_results]|\\\[\\/tool\\_results\\])",
+    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"(?:\[result:|\\\[result\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/result]|\\\[\\/result\\])",
+    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"(?:\[call_parameter:|\\\[call\\_parameter\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/call_parameter]|\\\[\\/call\\_parameter\\])",
+    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"(?:\[tool_result]|\\\[tool\\_result\\])\s*(.*?)\s*(?:\[/tool_result]|\\\[\\/tool\\_result\\])",
+    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(
-    r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE
-)
-CHATML_START_RE = re.compile(
-    r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE
-)
-CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",

From 93e9ccdad4a55e1ed99cb2037f45aeba10e32226 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 01:18:56 +0700
Subject: [PATCH 131/236] Refactor: Handle all escape tags

---
 app/server/chat.py   |  38 +++++++++-------
 app/services/lmdb.py | 105 ++++++++++++++++++++++---------------------
 app/utils/helper.py  |  35 ++++++++++-----
 3 files changed, 101 insertions(+), 77 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 881f7c9..934091b 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -270,7 +270,7 @@ def _persist_conversation(
             tool_calls=tool_calls or None,
         )
         full_history = [*messages, current_assistant_message]
-        cleaned_history = db.sanitize_assistant_messages(full_history)
+        cleaned_history = db.sanitize_messages(full_history)
 
         conv = ConversationInStore(
             model=model_name,
@@ -761,27 +761,31 @@ def __init__(self):
         self.STATE_MARKERS = {
             "TOOL": {
                 "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
-                "ends": ["[/ToolCalls]", "\\[/ToolCalls\\]"],
+                "ends": ["[/ToolCalls]", "\\[\\/ToolCalls\\]"],
             },
             "ORPHAN": {
-                "starts": ["[Call:", "\\[Call:", "\\[Call\\:"],
-                "ends": ["[/Call]", "\\[/Call\\]"],
+                "starts": ["[Call:", "\\[Call\\:"],
+                "ends": ["[/Call]", "\\[\\/Call\\]"],
             },
             "RESP": {
                 "starts": ["[ToolResults]", "\\[ToolResults\\]"],
-                "ends": ["[/ToolResults]", "\\[/ToolResults\\]"],
+                "ends": ["[/ToolResults]", "\\[\\/ToolResults\\]"],
             },
             "ARG": {
-                "starts": ["[CallParameter:", "\\[CallParameter:", "\\[CallParameter\\:"],
-                "ends": ["[/CallParameter]", "\\[/CallParameter\\]"],
+                "starts": ["[CallParameter:", "\\[CallParameter\\:"],
+                "ends": ["[/CallParameter]", "\\[\\/CallParameter\\]"],
             },
             "RESULT": {
                 "starts": ["[ToolResult]", "\\[ToolResult\\]"],
-                "ends": ["[/ToolResult]", "\\[/ToolResult\\]"],
+                "ends": ["[/ToolResult]", "\\[\\/ToolResult\\]"],
+            },
+            "ITEM": {
+                "starts": ["[Result:", "\\[Result\\:"],
+                "ends": ["[/Result]", "\\[\\/Result\\]"],
             },
             "TAG": {
-                "starts": ["<|im_start|>", "\\<|im\\_start|\\>"],
-                "ends": ["<|im_end|>", "\\<|im\\_end|\\>"],
+                "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"],
+                "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"],
             },
         }
 
@@ -794,15 +798,19 @@ def __init__(self):
 
         self.ORPHAN_ENDS = [
             "<|im_end|>",
-            "\\<|im\\_end|\\>",
+            "\\<\\|im\\_end\\|\\>",
             "[/Call]",
-            "\\[/Call\\]",
+            "\\[\\/Call\\]",
             "[/ToolCalls]",
-            "\\[/ToolCalls\\]",
+            "\\[\\/ToolCalls\\]",
             "[/CallParameter]",
-            "\\[/CallParameter\\]",
+            "\\[\\/CallParameter\\]",
             "[/ToolResult]",
-            "\\[/ToolResult\\]",
+            "\\[\\/ToolResult\\]",
+            "[/ToolResults]",
+            "\\[\\/ToolResults\\]",
+            "[/Result]",
+            "\\[\\/Result\\]",
         ]
 
         self.WATCH_MARKERS = []
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index ad92bbf..dd4197a 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -16,6 +16,8 @@
     extract_tool_calls,
     normalize_llm_text,
     remove_tool_call_blocks,
+    strip_system_hints,
+    unescape_text,
 )
 from ..utils.singleton import Singleton
 
@@ -38,6 +40,7 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
         return None
 
     text = normalize_llm_text(text)
+    text = unescape_text(text)
 
     text = LMDBConversationStore.remove_think_tags(text)
     text = remove_tool_call_blocks(text)
@@ -589,63 +592,61 @@ def remove_think_tags(text: str) -> str:
         return cleaned_content.strip()
 
     @staticmethod
-    def sanitize_assistant_messages(messages: list[Message]) -> list[Message]:
-        """Clean assistant messages of internal markers and move tool calls to metadata."""
+    def sanitize_messages(messages: list[Message]) -> list[Message]:
+        """Clean all messages of internal markers, hints and normalize tool calls."""
         cleaned_messages = []
         for msg in messages:
-            if msg.role == "assistant":
-                if isinstance(msg.content, str):
-                    text = LMDBConversationStore.remove_think_tags(msg.content)
-                    tool_calls = msg.tool_calls
-                    if not tool_calls:
-                        text, tool_calls = extract_tool_calls(text)
-                    else:
-                        text = remove_tool_call_blocks(text).strip()
-
-                    normalized_content = text.strip() or None
-
-                    if normalized_content != msg.content or tool_calls != msg.tool_calls:
-                        cleaned_msg = msg.model_copy(
+            if isinstance(msg.content, str):
+                text = LMDBConversationStore.remove_think_tags(msg.content)
+                tool_calls = msg.tool_calls
+
+                if msg.role == "assistant" and not tool_calls:
+                    text, tool_calls = extract_tool_calls(text)
+                else:
+                    text = strip_system_hints(text)
+
+                normalized_content = text.strip() or None
+
+                if normalized_content != msg.content or tool_calls != msg.tool_calls:
+                    cleaned_msg = msg.model_copy(
+                        update={
+                            "content": normalized_content,
+                            "tool_calls": tool_calls or None,
+                        }
+                    )
+                    cleaned_messages.append(cleaned_msg)
+                else:
+                    cleaned_messages.append(msg)
+            elif isinstance(msg.content, list):
+                new_content = []
+                all_extracted_calls = list(msg.tool_calls or [])
+                changed = False
+
+                for item in msg.content:
+                    if isinstance(item, ContentItem) and item.type == "text" and item.text:
+                        text = LMDBConversationStore.remove_think_tags(item.text)
+                        if msg.role == "assistant" and not msg.tool_calls:
+                            text, extracted = extract_tool_calls(text)
+                            if extracted:
+                                all_extracted_calls.extend(extracted)
+                                changed = True
+                        else:
+                            text = strip_system_hints(text)
+
+                        if text != item.text:
+                            changed = True
+                            item = item.model_copy(update={"text": text.strip() or None})
+                    new_content.append(item)
+
+                if changed:
+                    cleaned_messages.append(
+                        msg.model_copy(
                             update={
-                                "content": normalized_content,
-                                "tool_calls": tool_calls or None,
+                                "content": new_content,
+                                "tool_calls": all_extracted_calls or None,
                             }
                         )
-                        cleaned_messages.append(cleaned_msg)
-                    else:
-                        cleaned_messages.append(msg)
-                elif isinstance(msg.content, list):
-                    new_content = []
-                    all_extracted_calls = list(msg.tool_calls or [])
-                    changed = False
-
-                    for item in msg.content:
-                        if isinstance(item, ContentItem) and item.type == "text" and item.text:
-                            text = LMDBConversationStore.remove_think_tags(item.text)
-                            if not msg.tool_calls:
-                                text, extracted = extract_tool_calls(text)
-                                if extracted:
-                                    all_extracted_calls.extend(extracted)
-                                    changed = True
-                            else:
-                                text = remove_tool_call_blocks(text).strip()
-
-                            if text != item.text:
-                                changed = True
-                                item = item.model_copy(update={"text": text.strip() or None})
-                        new_content.append(item)
-
-                    if changed:
-                        cleaned_messages.append(
-                            msg.model_copy(
-                                update={
-                                    "content": new_content,
-                                    "tool_calls": all_extracted_calls or None,
-                                }
-                            )
-                        )
-                    else:
-                        cleaned_messages.append(msg)
+                    )
                 else:
                     cleaned_messages.append(msg)
             else:
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 82bb562..9c75b45 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -36,30 +36,36 @@
     "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[/ToolCalls\\?]", re.DOTALL | re.IGNORECASE
+    r"(?:\[ToolCalls]|\\\[ToolCalls\\])\s*(.*?)\s*(?:\[/ToolCalls]|\\\[\\/ToolCalls\\])",
+    re.DOTALL | re.IGNORECASE,
 )
 TOOL_CALL_RE = re.compile(
-    r"\\?\[Call\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Call\\?]", re.DOTALL | re.IGNORECASE
+    r"(?:\[Call:|\\\[Call\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/Call]|\\\[\\/Call\\])",
+    re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[/ToolResults\\?]",
+    r"(?:\[ToolResults]|\\\[ToolResults\\])\s*(.*?)\s*(?:\[/ToolResults]|\\\[\\/ToolResults\\])",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[Result\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/Result\\?]",
+    r"(?:\[Result:|\\\[Result\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/Result]|\\\[\\/Result\\])",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"\\?\[CallParameter\\?:((?:[^]\\]|\\.)+)\\?]\s*(.*?)\s*\\?\[/CallParameter\\?]",
+    r"(?:\[CallParameter:|\\\[CallParameter\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/CallParameter]|\\\[\\/CallParameter\\])",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[/ToolResult\\?]",
+    r"(?:\[ToolResult]|\\\[ToolResult\\])\s*(.*?)\s*(?:\[/ToolResult]|\\\[\\/ToolResult\\])",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\|im\\?_(?:start|end)\|\\?>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"\\?<\|im\\?_start\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"\\?<\|im\\?_end\|\\?>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(
+    r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE
+)
+CHATML_START_RE = re.compile(
+    r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE
+)
+CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 FILE_PATH_PATTERN = re.compile(
     r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
@@ -103,6 +109,13 @@ def normalize_llm_text(s: str) -> str:
     return s
 
 
+def unescape_text(s: str) -> str:
+    """Remove CommonMark backslash escapes."""
+    if not s:
+        return ""
+    return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
+
+
 def _strip_google_search(match: re.Match) -> str:
     """Extract raw text from Google Search links if it looks like a file path."""
     text_to_check = match.group("text") if match.group("text") else unquote(match.group("query"))
@@ -231,7 +244,9 @@ def strip_system_hints(text: str) -> str:
     if not text:
         return text
 
-    cleaned = text.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
+    t_unescaped = unescape_text(text)
+
+    cleaned = t_unescaped.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
 
     if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END:
         pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?"

From 30f61257a951b19fa0492fce6efa3421a2b990cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 01:39:11 +0700
Subject: [PATCH 132/236] Refactor: Handle all escape tags

---
 app/services/lmdb.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index dd4197a..abf8859 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -279,7 +279,7 @@ def store(
             raise ValueError("Messages list cannot be empty")
 
         # Ensure consistent sanitization before hashing and storage
-        sanitized_messages = self.sanitize_assistant_messages(conv.messages)
+        sanitized_messages = self.sanitize_messages(conv.messages)
         conv.messages = sanitized_messages
 
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
@@ -359,7 +359,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
             logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.")
             return conv
 
-        cleaned_messages = self.sanitize_assistant_messages(messages)
+        cleaned_messages = self.sanitize_messages(messages)
         if cleaned_messages != messages:
             if conv := self._find_by_message_list(model, cleaned_messages):
                 logger.debug(

From a35525234ef206422570b168934d5e7c53a7a848 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 09:12:24 +0700
Subject: [PATCH 133/236] Refactor: Remove `_strip_google_search` as it's no
 longer needed

---
 app/utils/helper.py | 44 ++++----------------------------------------
 1 file changed, 4 insertions(+), 40 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 9c75b45..f6a3e77 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -8,7 +8,7 @@
 import tempfile
 import unicodedata
 from pathlib import Path
-from urllib.parse import unquote, urlparse
+from urllib.parse import urlparse
 
 import httpx
 import orjson
@@ -67,18 +67,6 @@
 )
 CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
-FILE_PATH_PATTERN = re.compile(
-    r"^(?=.*[./\\]|.*:\d+|^(?:Dockerfile|Makefile|Jenkinsfile|Procfile|Rakefile|Gemfile|Vagrantfile|Caddyfile|Justfile|LICENSE|README|CONTRIBUTING|CODEOWNERS|AUTHORS|NOTICE|CHANGELOG)$)([a-zA-Z0-9_./\\-]+(?::\d+)?)$",
-    re.IGNORECASE,
-)
-GOOGLE_SEARCH_PATTERN = re.compile(
-    r"(?P<md_start>`?\[`?)?"
-    r"(?P<text>[^]]+)?"
-    r"(?(md_start)`?]\()?"
-    r"https://www\.google\.com/search\?q=(?P<query>[^&\s\"'<>)]+)"
-    r"(?(md_start)\)?`?)",
-    re.IGNORECASE,
-)
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -110,22 +98,12 @@ def normalize_llm_text(s: str) -> str:
 
 
 def unescape_text(s: str) -> str:
-    """Remove CommonMark backslash escapes."""
+    """Remove CommonMark backslash escapes from LLM-generated text."""
     if not s:
         return ""
     return COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
 
 
-def _strip_google_search(match: re.Match) -> str:
-    """Extract raw text from Google Search links if it looks like a file path."""
-    text_to_check = match.group("text") if match.group("text") else unquote(match.group("query"))
-    text_to_check = unquote(text_to_check.strip())
-
-    if FILE_PATH_PATTERN.match(text_to_check):
-        return text_to_check
-    return match.group(0)
-
-
 def _strip_param_fences(s: str) -> str:
     """
     Remove one layer of outermost Markdown code fences,
@@ -147,20 +125,6 @@ def _strip_param_fences(s: str) -> str:
     return s[n:-n].strip()
 
 
-def _repair_param_value(s: str) -> str:
-    """
-    Standardize and repair LLM-generated text fragments (unescaping, link normalization)
-    to ensure compatibility with specialized clients like Roo Code.
-    """
-    if not s:
-        return ""
-
-    s = COMMONMARK_UNESCAPE_RE.sub(r"\1", s)
-    s = GOOGLE_SEARCH_PATTERN.sub(_strip_google_search, s)
-
-    return s
-
-
 def estimate_tokens(text: str | None) -> int:
     """Estimate the number of tokens heuristically based on character count."""
     if not text:
@@ -286,8 +250,8 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             logger.warning("Encountered tool_call without a function name.")
             return
 
-        name = _repair_param_value(name.strip())
-        raw_args = _repair_param_value(raw_args)
+        name = unescape_text(name.strip())
+        raw_args = unescape_text(raw_args)
 
         arg_matches = TAGGED_ARG_RE.findall(raw_args)
         if arg_matches:

From 45af127004adcec8a48c9f9432f0c0b54a581724 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 12:30:29 +0700
Subject: [PATCH 134/236] Update `TOOL_WRAP_HINT` to ensure Gemini strictly
 follows the instructions.

---
 app/utils/helper.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index f6a3e77..64df4f7 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -18,11 +18,12 @@
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
-    "\n\nSYSTEM INTERFACE: Tool calling protocol. You MUST follow these MANDATORY rules:\n\n"
-    "1. Respond ONLY with a single [ToolCalls] block. NO conversational text, NO explanations, NO filler.\n"
-    "2. For ALL parameters, the value MUST be entirely enclosed in a single markdown code block (start/end with backticks) inside the tags. NO text allowed outside this block.\n"
-    "3. Use a markdown fence longer than any backtick sequence in the value (e.g., use ```` if value has ```).\n\n"
-    "EXACT SYNTAX TEMPLATE:\n"
+    "\n\n### SYSTEM: TOOL CALLING PROTOCOL (MANDATORY) ###\n"
+    "If tool execution is required, you MUST adhere to this EXACT protocol. No exceptions.\n\n"
+    "1. OUTPUT RESTRICTION: Your response MUST contain ONLY the [ToolCalls] block. Conversational filler, preambles, or concluding remarks are STRICTLY PROHIBITED.\n"
+    "2. WRAPPING LOGIC: Every parameter value MUST be enclosed in a markdown code block. Use 3 backticks (```) by default. If the value contains backticks, the outer fence MUST be longer than any sequence inside (e.g., ````).\n"
+    "3. TAG SYMMETRY: All tags MUST be balanced and closed in the exact reverse order of opening. Incomplete or unclosed blocks are strictly prohibited.\n\n"
+    "REQUIRED SYNTAX:\n"
     "[ToolCalls]\n"
     "[Call:tool_name]\n"
     "[CallParameter:parameter_name]\n"
@@ -32,8 +33,7 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "CRITICAL: Every tag MUST be opened and closed accurately.\n\n"
-    "Multiple tools: List them sequentially inside one [ToolCalls] block. No tool: respond naturally, NEVER use protocol tags.\n"
+    "CRITICAL: Do NOT mix natural language with protocol tags. Either respond naturally OR provide the protocol block alone. There is no middle ground.\n"
 )
 TOOL_BLOCK_RE = re.compile(
     r"(?:\[ToolCalls]|\\\[ToolCalls\\])\s*(.*?)\s*(?:\[/ToolCalls]|\\\[\\/ToolCalls\\])",

From f144e1440f6812ae330438b316ad1e08eb052240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Feb 2026 12:32:26 +0700
Subject: [PATCH 135/236] Update required dependencies

---
 pyproject.toml | 2 +-
 uv.lock        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 93dabab..0cae786 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.12.*"
 dependencies = [
     "fastapi>=0.129.0",
-    "gemini-webapi>=1.19.1",
+    "gemini-webapi>=1.19.2",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
diff --git a/uv.lock b/uv.lock
index 249e84b..5b687e4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -107,7 +107,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "fastapi", specifier = ">=0.129.0" },
-    { name = "gemini-webapi", specifier = ">=1.19.1" },
+    { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
@@ -123,7 +123,7 @@ dev = [{ name = "ruff", specifier = ">=0.15.1" }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.19.1"
+version = "1.19.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx", extra = ["http2"] },
@@ -131,9 +131,9 @@ dependencies = [
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e5/d1/c61ee05ca6e20c70caa25a3f0f12e2a810bbc6b39e588ff937821de43690/gemini_webapi-1.19.1.tar.gz", hash = "sha256:a52afdfc2d9f6e87a6ae8cd926fb2ce5c562a0a99dc75ce97d8d50ffc2a3e133", size = 266761, upload-time = "2026-02-10T05:44:29.195Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d4/d3/b4ff659bfb0fff378b16f934429d53b7f78451ac184406ab2f9ddda9357e/gemini_webapi-1.19.2.tar.gz", hash = "sha256:f6e96e28f3f1e78be6176fbb8b2eca25ad509aec6cfacf99c415559f27691b71", size = 266805, upload-time = "2026-02-14T05:26:04.103Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/0b/7a73919ee8621f6559ae679a20d754959b989a3f09cf20478d89971f40b4/gemini_webapi-1.19.1-py3-none-any.whl", hash = "sha256:0dc4c7daa58d281722d52d6acf520f2e850c6c3c6020080fdbc5f77736c8be9a", size = 63500, upload-time = "2026-02-10T05:44:27.692Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e7/a676f721980e3daa43e05abe94884a84648efdc6203889e7a0f5c8ca2e98/gemini_webapi-1.19.2-py3-none-any.whl", hash = "sha256:fdc088ca35361301f40ea807a58c4bec18886b17a54164a1a8f3d639eadc6a66", size = 63524, upload-time = "2026-02-14T05:26:02.173Z" },
 ]
 
 [[package]]

From 9d014b093512b72d022b57133c3f31224c8f271e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Feb 2026 14:29:41 +0700
Subject: [PATCH 136/236] Ignore github directory

---
 .github/workflows/docker.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 7231e08..995b60c 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -8,8 +8,7 @@ on:
       - "v*"
     paths-ignore:
       - "**/*.md"
-      - ".github/workflows/ruff.yaml"
-      - ".github/workflows/track.yml"
+      - ".github/*"
 
 env:
   REGISTRY: ghcr.io

From 3fab502ab495868f20c725e3657b11829895204c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Feb 2026 16:41:55 +0700
Subject: [PATCH 137/236] Upgrade to fully support Python 3.13

---
 .github/workflows/docker.yaml |   6 +
 .github/workflows/ruff.yaml   |   4 +-
 Dockerfile                    |  18 ++-
 README.md                     |   4 +-
 README.zh.md                  |   4 +-
 app/main.py                   |   2 +-
 app/models/__init__.py        |  58 +++++++++-
 app/models/models.py          | 150 ++++++++++++-------------
 app/server/chat.py            |  54 ++++-----
 app/server/health.py          |   4 +-
 app/server/images.py          |   2 +-
 app/server/middleware.py      |   2 +-
 app/services/client.py        |  11 +-
 app/services/lmdb.py          |  55 ++++-----
 app/services/pool.py          |  18 +--
 app/utils/config.py           |  12 +-
 app/utils/helper.py           |   7 +-
 app/utils/singleton.py        |   6 +-
 pyproject.toml                |  50 +++++++--
 scripts/dump_lmdb.py          |  16 ++-
 uv.lock                       | 203 ++++++++++++++++++++++------------
 21 files changed, 427 insertions(+), 259 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 995b60c..775e9e4 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -9,6 +9,8 @@ on:
     paths-ignore:
       - "**/*.md"
       - ".github/*"
+      - "LICENSE"
+      - ".gitignore"
 
 env:
   REGISTRY: ghcr.io
@@ -25,6 +27,9 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v6
 
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -45,6 +50,7 @@ jobs:
             type=semver,pattern={{version}}
             type=semver,pattern={{major}}.{{minor}}
             type=semver,pattern={{major}}
+            type=sha,format=short
             type=raw,value=latest,enable={{is_default_branch}}
 
       - name: Build and push Docker image
diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml
index 6b9e536..d451cdc 100644
--- a/.github/workflows/ruff.yaml
+++ b/.github/workflows/ruff.yaml
@@ -19,12 +19,12 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v6
         with:
-          python-version: "3.12"
+          python-version: "3.13"
 
       - name: Install Ruff
         run: |
           python -m pip install --upgrade pip
-          pip install "ruff>=0.11.7"
+          pip install "ruff>=0.15.1"
 
       - name: Run Ruff
         run: ruff check .
diff --git a/Dockerfile b/Dockerfile
index 938bc2f..ef7f41e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,16 +1,26 @@
-FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
+FROM ghcr.io/astral-sh/uv:python3.13-trixie-slim
 
 LABEL org.opencontainers.image.description="Web-based Gemini models wrapped into an OpenAI-compatible API."
 
 WORKDIR /app
 
-# Install dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    tini \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV UV_COMPILE_BYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+
 COPY pyproject.toml uv.lock ./
-RUN uv sync --no-cache --no-dev
+RUN uv sync --no-cache --frozen --no-install-project --no-dev
 
 COPY app/ app/
 COPY config/ config/
 COPY run.py .
 
-# Command to run the application
+EXPOSE 8000
+
+ENTRYPOINT ["/usr/bin/tini", "--"]
+
 CMD ["uv", "run", "--no-dev", "run.py"]
diff --git a/README.md b/README.md
index 330e9c8..91f687c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Gemini-FastAPI
 
-[![Python 3.12](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
+[![Python 3.13](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/)
 [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
@@ -24,7 +24,7 @@ Web-based Gemini models wrapped into an OpenAI-compatible API. Powered by [Hanao
 
 ### Prerequisites
 
-- Python 3.12
+- Python 3.13
 - Google account with Gemini access on web
 - `secure_1psid` and `secure_1psidts` cookies from Gemini web interface
 
diff --git a/README.zh.md b/README.zh.md
index 2f9e1b5..d23bec1 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -1,6 +1,6 @@
 # Gemini-FastAPI
 
-[![Python 3.12](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
+[![Python 3.13](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/)
 [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
@@ -24,7 +24,7 @@
 
 ### 前置条件
 
-- Python 3.12
+- Python 3.13
 - 拥有网页版 Gemini 访问权限的 Google 账号
 - 从 Gemini 网页获取的 `secure_1psid` 和 `secure_1psidts` Cookie
 
diff --git a/app/main.py b/app/main.py
index f4e6711..0634ce2 100644
--- a/app/main.py
+++ b/app/main.py
@@ -43,7 +43,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None:
                 stop_event.wait(),
                 timeout=RETENTION_CLEANUP_INTERVAL_SECONDS,
             )
-        except asyncio.TimeoutError:
+        except TimeoutError:
             continue
 
     logger.info("LMDB retention cleanup task stopped.")
diff --git a/app/models/__init__.py b/app/models/__init__.py
index c6a3640..a72efdc 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -1 +1,57 @@
-from .models import *  # noqa: F403
+from .models import (
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    Choice,
+    ContentItem,
+    ConversationInStore,
+    FunctionCall,
+    HealthCheckResponse,
+    Message,
+    ModelData,
+    ModelListResponse,
+    ResponseCreateRequest,
+    ResponseCreateResponse,
+    ResponseImageGenerationCall,
+    ResponseImageTool,
+    ResponseInputContent,
+    ResponseInputItem,
+    ResponseOutputContent,
+    ResponseOutputMessage,
+    ResponseToolCall,
+    ResponseToolChoice,
+    Tool,
+    ToolCall,
+    ToolChoiceFunction,
+    ToolChoiceFunctionDetail,
+    ToolFunctionDefinition,
+    Usage,
+)
+
+__all__ = [
+    "ChatCompletionRequest",
+    "ChatCompletionResponse",
+    "Choice",
+    "ContentItem",
+    "ConversationInStore",
+    "FunctionCall",
+    "HealthCheckResponse",
+    "Message",
+    "ModelData",
+    "ModelListResponse",
+    "ResponseCreateRequest",
+    "ResponseCreateResponse",
+    "ResponseImageGenerationCall",
+    "ResponseImageTool",
+    "ResponseInputContent",
+    "ResponseInputItem",
+    "ResponseOutputContent",
+    "ResponseOutputMessage",
+    "ResponseToolCall",
+    "ResponseToolChoice",
+    "Tool",
+    "ToolCall",
+    "ToolChoiceFunction",
+    "ToolChoiceFunctionDetail",
+    "ToolFunctionDefinition",
+    "Usage",
+]
diff --git a/app/models/models.py b/app/models/models.py
index 64ceaa9..ca206b7 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from datetime import datetime
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Literal
 
 from pydantic import BaseModel, Field, model_validator
 
@@ -10,28 +10,28 @@ class ContentItem(BaseModel):
     """Individual content item (text, image, or file) within a message."""
 
     type: Literal["text", "image_url", "file", "input_audio"]
-    text: Optional[str] = None
-    image_url: Optional[Dict[str, str]] = None
-    input_audio: Optional[Dict[str, Any]] = None
-    file: Optional[Dict[str, str]] = None
-    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+    text: str | None = None
+    image_url: dict[str, str] | None = None
+    input_audio: dict[str, Any] | None = None
+    file: dict[str, str] | None = None
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
 class Message(BaseModel):
     """Message model"""
 
     role: str
-    content: Union[str, List[ContentItem], None] = None
-    name: Optional[str] = None
-    tool_calls: Optional[List["ToolCall"]] = None
-    tool_call_id: Optional[str] = None
-    refusal: Optional[str] = None
-    reasoning_content: Optional[str] = None
-    audio: Optional[Dict[str, Any]] = None
-    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+    content: str | list[ContentItem] | None = None
+    name: str | None = None
+    tool_calls: list[ToolCall] | None = None
+    tool_call_id: str | None = None
+    refusal: str | None = None
+    reasoning_content: str | None = None
+    audio: dict[str, Any] | None = None
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
     @model_validator(mode="after")
-    def normalize_role(self) -> "Message":
+    def normalize_role(self) -> Message:
         """Normalize 'developer' role to 'system' for Gemini compatibility."""
         if self.role == "developer":
             self.role = "system"
@@ -44,7 +44,7 @@ class Choice(BaseModel):
     index: int
     message: Message
     finish_reason: str
-    logprobs: Optional[Dict[str, Any]] = None
+    logprobs: dict[str, Any] | None = None
 
 
 class FunctionCall(BaseModel):
@@ -66,8 +66,8 @@ class ToolFunctionDefinition(BaseModel):
     """Function definition for tool."""
 
     name: str
-    description: Optional[str] = None
-    parameters: Optional[Dict[str, Any]] = None
+    description: str | None = None
+    parameters: dict[str, Any] | None = None
 
 
 class Tool(BaseModel):
@@ -96,8 +96,8 @@ class Usage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
-    prompt_tokens_details: Optional[Dict[str, int]] = None
-    completion_tokens_details: Optional[Dict[str, int]] = None
+    prompt_tokens_details: dict[str, int] | None = None
+    completion_tokens_details: dict[str, int] | None = None
 
 
 class ModelData(BaseModel):
@@ -113,17 +113,17 @@ class ChatCompletionRequest(BaseModel):
     """Chat completion request model"""
 
     model: str
-    messages: List[Message]
-    stream: Optional[bool] = False
-    user: Optional[str] = None
-    temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 1.0
-    max_tokens: Optional[int] = None
-    tools: Optional[List["Tool"]] = None
-    tool_choice: Optional[
-        Union[Literal["none"], Literal["auto"], Literal["required"], "ToolChoiceFunction"]
-    ] = None
-    response_format: Optional[Dict[str, Any]] = None
+    messages: list[Message]
+    stream: bool | None = False
+    user: str | None = None
+    temperature: float | None = 0.7
+    top_p: float | None = 1.0
+    max_tokens: int | None = None
+    tools: list[Tool] | None = None
+    tool_choice: (
+        Literal["none"] | Literal["auto"] | Literal["required"] | ToolChoiceFunction | None
+    ) = None
+    response_format: dict[str, Any] | None = None
 
 
 class ChatCompletionResponse(BaseModel):
@@ -133,7 +133,7 @@ class ChatCompletionResponse(BaseModel):
     object: str = "chat.completion"
     created: int
     model: str
-    choices: List[Choice]
+    choices: list[Choice]
     usage: Usage
 
 
@@ -141,23 +141,23 @@ class ModelListResponse(BaseModel):
     """Model list model"""
 
     object: str = "list"
-    data: List[ModelData]
+    data: list[ModelData]
 
 
 class HealthCheckResponse(BaseModel):
     """Health check response model"""
 
     ok: bool
-    storage: Optional[Dict[str, str | int]] = None
-    clients: Optional[Dict[str, bool]] = None
-    error: Optional[str] = None
+    storage: dict[str, str | int] | None = None
+    clients: dict[str, bool] | None = None
+    error: str | None = None
 
 
 class ConversationInStore(BaseModel):
     """Conversation model for storing in the database."""
 
-    created_at: Optional[datetime] = Field(default=None)
-    updated_at: Optional[datetime] = Field(default=None)
+    created_at: datetime | None = Field(default=None)
+    updated_at: datetime | None = Field(default=None)
 
     # Gemini Web API does not support changing models once a conversation is created.
     model: str = Field(..., description="Model used for the conversation")
@@ -172,13 +172,13 @@ class ResponseInputContent(BaseModel):
     """Content item for Responses API input."""
 
     type: Literal["input_text", "input_image", "input_file"]
-    text: Optional[str] = None
-    image_url: Optional[str] = None
-    detail: Optional[Literal["auto", "low", "high"]] = None
-    file_url: Optional[str] = None
-    file_data: Optional[str] = None
-    filename: Optional[str] = None
-    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+    text: str | None = None
+    image_url: str | None = None
+    detail: Literal["auto", "low", "high"] | None = None
+    file_url: str | None = None
+    file_data: str | None = None
+    filename: str | None = None
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
     @model_validator(mode="before")
     @classmethod
@@ -192,42 +192,42 @@ def normalize_output_text(cls, data: Any) -> Any:
 class ResponseInputItem(BaseModel):
     """Single input item for Responses API."""
 
-    type: Optional[Literal["message"]] = "message"
+    type: Literal["message"] | None = "message"
     role: Literal["user", "assistant", "system", "developer"]
-    content: Union[str, List[ResponseInputContent]]
+    content: str | list[ResponseInputContent]
 
 
 class ResponseToolChoice(BaseModel):
     """Tool choice enforcing a specific tool in Responses API."""
 
     type: Literal["function", "image_generation"]
-    function: Optional[ToolChoiceFunctionDetail] = None
+    function: ToolChoiceFunctionDetail | None = None
 
 
 class ResponseImageTool(BaseModel):
     """Image generation tool specification for Responses API."""
 
     type: Literal["image_generation"]
-    model: Optional[str] = None
-    output_format: Optional[str] = None
+    model: str | None = None
+    output_format: str | None = None
 
 
 class ResponseCreateRequest(BaseModel):
     """Responses API request payload."""
 
     model: str
-    input: Union[str, List[ResponseInputItem]]
-    instructions: Optional[Union[str, List[ResponseInputItem]]] = None
-    temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 1.0
-    max_output_tokens: Optional[int] = None
-    stream: Optional[bool] = False
-    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
-    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
-    store: Optional[bool] = None
-    user: Optional[str] = None
-    response_format: Optional[Dict[str, Any]] = None
-    metadata: Optional[Dict[str, Any]] = None
+    input: str | list[ResponseInputItem]
+    instructions: str | list[ResponseInputItem] | None = None
+    temperature: float | None = 0.7
+    top_p: float | None = 1.0
+    max_output_tokens: int | None = None
+    stream: bool | None = False
+    tool_choice: str | ResponseToolChoice | None = None
+    tools: list[Tool | ResponseImageTool] | None = None
+    store: bool | None = None
+    user: str | None = None
+    response_format: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
 
 
 class ResponseUsage(BaseModel):
@@ -242,8 +242,8 @@ class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
     type: Literal["output_text"]
-    text: Optional[str] = ""
-    annotations: List[Dict[str, Any]] = Field(default_factory=list)
+    text: str | None = ""
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
 class ResponseOutputMessage(BaseModel):
@@ -252,7 +252,7 @@ class ResponseOutputMessage(BaseModel):
     id: str
     type: Literal["message"]
     role: Literal["assistant"]
-    content: List[ResponseOutputContent]
+    content: list[ResponseOutputContent]
 
 
 class ResponseImageGenerationCall(BaseModel):
@@ -261,10 +261,10 @@ class ResponseImageGenerationCall(BaseModel):
     id: str
     type: Literal["image_generation_call"] = "image_generation_call"
     status: Literal["completed", "in_progress", "generating", "failed"] = "completed"
-    result: Optional[str] = None
-    output_format: Optional[str] = None
-    size: Optional[str] = None
-    revised_prompt: Optional[str] = None
+    result: str | None = None
+    output_format: str | None = None
+    size: str | None = None
+    revised_prompt: str | None = None
 
 
 class ResponseToolCall(BaseModel):
@@ -283,7 +283,7 @@ class ResponseCreateResponse(BaseModel):
     object: Literal["response"] = "response"
     created_at: int
     model: str
-    output: List[Union[ResponseOutputMessage, ResponseImageGenerationCall, ResponseToolCall]]
+    output: list[ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall]
     status: Literal[
         "in_progress",
         "completed",
@@ -292,12 +292,12 @@ class ResponseCreateResponse(BaseModel):
         "cancelled",
         "requires_action",
     ] = "completed"
-    tool_choice: Optional[Union[str, ResponseToolChoice]] = None
-    tools: Optional[List[Union[Tool, ResponseImageTool]]] = None
+    tool_choice: str | ResponseToolChoice | None = None
+    tools: list[Tool | ResponseImageTool] | None = None
     usage: ResponseUsage
-    error: Optional[Dict[str, Any]] = None
-    metadata: Optional[Dict[str, Any]] = None
-    input: Optional[Union[str, List[ResponseInputItem]]] = None
+    error: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
+    input: str | list[ResponseInputItem] | None = None
 
 
 # Rebuild models with forward references
diff --git a/app/server/chat.py b/app/server/chat.py
index 934091b..3849af5 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -3,10 +3,11 @@
 import io
 import reprlib
 import uuid
+from collections.abc import AsyncGenerator
 from dataclasses import dataclass
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any, AsyncGenerator
+from typing import Any
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
@@ -17,7 +18,7 @@
 from gemini_webapi.types.image import GeneratedImage, Image
 from loguru import logger
 
-from ..models import (
+from app.models import (
     ChatCompletionRequest,
     ContentItem,
     ConversationInStore,
@@ -38,9 +39,15 @@
     Tool,
     ToolChoiceFunction,
 )
-from ..services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
-from ..utils import g_config
-from ..utils.helper import (
+from app.server.middleware import (
+    get_image_store_dir,
+    get_image_token,
+    get_temp_dir,
+    verify_api_key,
+)
+from app.services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
+from app.utils import g_config
+from app.utils.helper import (
     TOOL_HINT_LINE_END,
     TOOL_HINT_LINE_START,
     TOOL_HINT_STRIPPED,
@@ -53,7 +60,6 @@
     strip_system_hints,
     text_from_message,
 )
-from .middleware import get_image_store_dir, get_image_token, get_temp_dir, verify_api_key
 
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
 METADATA_TTL_MINUTES = 15
@@ -98,11 +104,7 @@ async def _image_to_base64(
 
     if not suffix:
         detected_ext = detect_image_extension(data)
-        if detected_ext:
-            suffix = detected_ext
-        else:
-            # Fallback if detection fails
-            suffix = ".png" if isinstance(image, GeneratedImage) else ".jpg"
+        suffix = detected_ext or (".png" if isinstance(image, GeneratedImage) else ".jpg")
 
     random_name = f"img_{uuid.uuid4().hex}{suffix}"
     new_path = temp_dir / random_name
@@ -628,7 +630,7 @@ def _get_model_by_name(name: str) -> Model:
 
 def _get_available_models() -> list[ModelData]:
     """Return a list of available models based on configuration strategy."""
-    now = int(datetime.now(tz=timezone.utc).timestamp())
+    now = int(datetime.now(tz=UTC).timestamp())
     strategy = g_config.gemini.model_strategy
     models_data = []
 
@@ -712,7 +714,7 @@ async def _send_with_split(
     text: str,
     files: list[Path | str | io.BytesIO] | None = None,
     stream: bool = False,
-) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
+) -> AsyncGenerator[ModelOutput] | ModelOutput:
     """Send text to Gemini, splitting or converting to attachment if too long."""
     if len(text) <= MAX_CHARS_PER_REQUEST:
         try:
@@ -1013,9 +1015,7 @@ def flush(self) -> str:
         res = ""
         if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
             res = ""
-        elif self.state == "IN_BLOCK" and self.current_role != "tool":
-            res = self.buffer
-        elif self.state == "NORMAL":
+        elif (self.state == "IN_BLOCK" and self.current_role != "tool") or self.state == "NORMAL":
             res = self.buffer
 
         self.buffer = ""
@@ -1027,7 +1027,7 @@ def flush(self) -> str:
 
 
 def _create_real_streaming_response(
-    generator: AsyncGenerator[ModelOutput, None],
+    generator: AsyncGenerator[ModelOutput],
     completion_id: str,
     created_time: int,
     model_name: str,
@@ -1221,7 +1221,7 @@ async def generate_stream():
 
 
 def _create_responses_real_streaming_response(
-    generator: AsyncGenerator[ModelOutput, None],
+    generator: AsyncGenerator[ModelOutput],
     response_id: str,
     created_time: int,
     model_name: str,
@@ -1455,10 +1455,12 @@ async def create_chat_completion(
             m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
         except Exception as e:
             logger.exception("Error in preparing conversation")
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)
+            ) from e
 
     completion_id = f"chatcmpl-{uuid.uuid4()}"
-    created_time = int(datetime.now(tz=timezone.utc).timestamp())
+    created_time = int(datetime.now(tz=UTC).timestamp())
 
     try:
         assert session and client
@@ -1470,7 +1472,7 @@ async def create_chat_completion(
         )
     except Exception as e:
         logger.exception("Gemini API error")
-        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
         return _create_real_streaming_response(
@@ -1620,10 +1622,12 @@ async def create_response(
             m_input, files = await GeminiClientWrapper.process_conversation(messages, tmp_dir)
         except Exception as e:
             logger.exception("Error in preparing conversation")
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e))
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)
+            ) from e
 
     response_id = f"resp_{uuid.uuid4().hex}"
-    created_time = int(datetime.now(tz=timezone.utc).timestamp())
+    created_time = int(datetime.now(tz=UTC).timestamp())
 
     try:
         assert session and client
@@ -1635,7 +1639,7 @@ async def create_response(
         )
     except Exception as e:
         logger.exception("Gemini API error")
-        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
         return _create_responses_real_streaming_response(
diff --git a/app/server/health.py b/app/server/health.py
index f521db1..444c938 100644
--- a/app/server/health.py
+++ b/app/server/health.py
@@ -1,8 +1,8 @@
 from fastapi import APIRouter
 from loguru import logger
 
-from ..models import HealthCheckResponse
-from ..services import GeminiClientPool, LMDBConversationStore
+from app.models import HealthCheckResponse
+from app.services import GeminiClientPool, LMDBConversationStore
 
 router = APIRouter()
 
diff --git a/app/server/images.py b/app/server/images.py
index fe078f7..e1c161c 100644
--- a/app/server/images.py
+++ b/app/server/images.py
@@ -1,7 +1,7 @@
 from fastapi import APIRouter, HTTPException, Query
 from fastapi.responses import FileResponse
 
-from ..server.middleware import get_image_store_dir, verify_image_token
+from app.server.middleware import get_image_store_dir, verify_image_token
 
 router = APIRouter()
 
diff --git a/app/server/middleware.py b/app/server/middleware.py
index 630e1f5..4bc358d 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -10,7 +10,7 @@
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from loguru import logger
 
-from ..utils import g_config
+from app.utils import g_config
 
 # Persistent directory for storing generated images
 IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images"
diff --git a/app/services/client.py b/app/services/client.py
index 70dfce9..49d9e87 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -5,9 +5,9 @@
 from gemini_webapi import GeminiClient, ModelOutput
 from loguru import logger
 
-from ..models import Message
-from ..utils import g_config
-from ..utils.helper import (
+from app.models import Message
+from app.utils import g_config
+from app.utils.helper import (
     add_tag,
     normalize_llm_text,
     save_file_to_tempfile,
@@ -146,9 +146,8 @@ async def process_message(
 
         model_input = "\n".join(fragment for fragment in text_fragments if fragment is not None)
 
-        if model_input or message.role == "tool":
-            if tagged:
-                model_input = add_tag(message.role, model_input)
+        if (model_input or message.role == "tool") and tagged:
+            model_input = add_tag(message.role, model_input)
 
         return model_input, files
 
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index abf8859..87a1449 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -4,22 +4,22 @@
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 import lmdb
 import orjson
 from loguru import logger
 
-from ..models import ContentItem, ConversationInStore, Message
-from ..utils import g_config
-from ..utils.helper import (
+from app.models import ContentItem, ConversationInStore, Message
+from app.utils import g_config
+from app.utils.helper import (
     extract_tool_calls,
     normalize_llm_text,
     remove_tool_call_blocks,
     strip_system_hints,
     unescape_text,
 )
-from ..utils.singleton import Singleton
+from app.utils.singleton import Singleton
 
 _VOLATILE_TRANS_TABLE = str.maketrans("", "", string.whitespace + string.punctuation)
 
@@ -125,7 +125,7 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
 
 
 def _hash_conversation(
-    client_id: str, model: str, messages: List[Message], fuzzy: bool = False
+    client_id: str, model: str, messages: list[Message], fuzzy: bool = False
 ) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
@@ -145,9 +145,9 @@ class LMDBConversationStore(metaclass=Singleton):
 
     def __init__(
         self,
-        db_path: Optional[str] = None,
-        max_db_size: Optional[int] = None,
-        retention_days: Optional[int] = None,
+        db_path: str | None = None,
+        max_db_size: int | None = None,
+        retention_days: int | None = None,
     ):
         """
         Initialize LMDB store.
@@ -219,7 +219,7 @@ def _get_transaction(self, write: bool = False):
             raise
 
     @staticmethod
-    def _decode_index_value(data: bytes) -> List[str]:
+    def _decode_index_value(data: bytes) -> list[str]:
         """Decode index value, handling both legacy single-string and new list-of-strings formats."""
         if not data:
             return []
@@ -238,7 +238,7 @@ def _decode_index_value(data: bytes) -> List[str]:
     @staticmethod
     def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
         """Add a storage key to the index for a given hash, avoiding duplicates."""
-        idx_key = f"{prefix}{hash_val}".encode("utf-8")
+        idx_key = f"{prefix}{hash_val}".encode()
         existing = txn.get(idx_key)
         keys = LMDBConversationStore._decode_index_value(existing) if existing else []
         if storage_key not in keys:
@@ -248,7 +248,7 @@ def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key
     @staticmethod
     def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
         """Remove a specific storage key from the index for a given hash."""
-        idx_key = f"{prefix}{hash_val}".encode("utf-8")
+        idx_key = f"{prefix}{hash_val}".encode()
         existing = txn.get(idx_key)
         if not existing:
             return
@@ -263,7 +263,7 @@ def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storag
     def store(
         self,
         conv: ConversationInStore,
-        custom_key: Optional[str] = None,
+        custom_key: str | None = None,
     ) -> str:
         """
         Store a conversation model in LMDB.
@@ -312,7 +312,7 @@ def store(
             )
             raise
 
-    def get(self, key: str) -> Optional[ConversationInStore]:
+    def get(self, key: str) -> ConversationInStore | None:
         """
         Retrieve conversation data by key.
 
@@ -340,7 +340,7 @@ def get(self, key: str) -> Optional[ConversationInStore]:
             logger.error(f"Unexpected error retrieving messages with key {key[:12]}: {e}")
             return None
 
-    def find(self, model: str, messages: List[Message]) -> Optional[ConversationInStore]:
+    def find(self, model: str, messages: list[Message]) -> ConversationInStore | None:
         """
         Search conversation data by message list.
         Tries raw matching, then sanitized matching, and finally fuzzy matching.
@@ -360,12 +360,13 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
             return conv
 
         cleaned_messages = self.sanitize_messages(messages)
-        if cleaned_messages != messages:
-            if conv := self._find_by_message_list(model, cleaned_messages):
-                logger.debug(
-                    f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages."
-                )
-                return conv
+        if cleaned_messages != messages and (
+            conv := self._find_by_message_list(model, cleaned_messages)
+        ):
+            logger.debug(
+                f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages."
+            )
+            return conv
 
         if conv := self._find_by_message_list(model, messages, fuzzy=True):
             logger.debug(
@@ -379,9 +380,9 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
     def _find_by_message_list(
         self,
         model: str,
-        messages: List[Message],
+        messages: list[Message],
         fuzzy: bool = False,
-    ) -> Optional[ConversationInStore]:
+    ) -> ConversationInStore | None:
         """
         Internal find implementation based on a message list.
 
@@ -440,7 +441,7 @@ def exists(self, key: str) -> bool:
             logger.error(f"Failed to check existence of key {key}: {e}")
             return False
 
-    def delete(self, key: str) -> Optional[ConversationInStore]:
+    def delete(self, key: str) -> ConversationInStore | None:
         """Delete conversation model by key."""
         try:
             with self._get_transaction(write=True) as txn:
@@ -466,7 +467,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
             logger.error(f"Failed to delete messages with key {key[:12]}: {e}")
             return None
 
-    def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
+    def keys(self, prefix: str = "", limit: int | None = None) -> list[str]:
         """List all keys in the store, optionally filtered by prefix."""
         keys = []
         try:
@@ -492,7 +493,7 @@ def keys(self, prefix: str = "", limit: Optional[int] = None) -> List[str]:
             logger.error(f"Failed to list keys: {e}")
         return keys
 
-    def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
+    def cleanup_expired(self, retention_days: int | None = None) -> int:
         """Delete conversations older than the given retention period."""
         retention_value = (
             self.retention_days if retention_days is None else max(0, int(retention_days))
@@ -561,7 +562,7 @@ def cleanup_expired(self, retention_days: Optional[int] = None) -> int:
 
         return removed
 
-    def stats(self) -> Dict[str, Any]:
+    def stats(self) -> dict[str, Any]:
         """Get database statistics."""
         if not self._env:
             logger.error("LMDB environment not initialized")
diff --git a/app/services/pool.py b/app/services/pool.py
index decc21a..3b4197c 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -1,11 +1,11 @@
 import asyncio
 from collections import deque
-from typing import Dict, List, Optional
 
 from loguru import logger
 
-from ..utils import g_config
-from ..utils.singleton import Singleton
+from app.utils import g_config
+from app.utils.singleton import Singleton
+
 from .client import GeminiClientWrapper
 
 
@@ -13,10 +13,10 @@ class GeminiClientPool(metaclass=Singleton):
     """Pool of GeminiClient instances identified by unique ids."""
 
     def __init__(self) -> None:
-        self._clients: List[GeminiClientWrapper] = []
-        self._id_map: Dict[str, GeminiClientWrapper] = {}
+        self._clients: list[GeminiClientWrapper] = []
+        self._id_map: dict[str, GeminiClientWrapper] = {}
         self._round_robin: deque[GeminiClientWrapper] = deque()
-        self._restart_locks: Dict[str, asyncio.Lock] = {}
+        self._restart_locks: dict[str, asyncio.Lock] = {}
 
         if len(g_config.gemini.clients) == 0:
             raise ValueError("No Gemini clients configured")
@@ -55,7 +55,7 @@ async def init(self) -> None:
         if success_count == 0:
             raise RuntimeError("Failed to initialize any Gemini clients")
 
-    async def acquire(self, client_id: Optional[str] = None) -> GeminiClientWrapper:
+    async def acquire(self, client_id: str | None = None) -> GeminiClientWrapper:
         """Return a healthy client by id or using round-robin."""
         if not self._round_robin:
             raise RuntimeError("No Gemini clients configured")
@@ -106,10 +106,10 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
                 return False
 
     @property
-    def clients(self) -> List[GeminiClientWrapper]:
+    def clients(self) -> list[GeminiClientWrapper]:
         """Return managed clients."""
         return self._clients
 
-    def status(self) -> Dict[str, bool]:
+    def status(self) -> dict[str, bool]:
         """Return running status for each client."""
         return {client.id: client.running() for client in self._clients}
diff --git a/app/utils/config.py b/app/utils/config.py
index 4c1709f..21d2891 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,7 +1,7 @@
 import ast
 import os
 import sys
-from typing import Any, Literal, Optional
+from typing import Any, Literal
 
 import orjson
 from loguru import logger
@@ -28,7 +28,7 @@ class ServerConfig(BaseModel):
 
     host: str = Field(default="0.0.0.0", description="Server host address")
     port: int = Field(default=8000, ge=1, le=65535, description="Server port number")
-    api_key: Optional[str] = Field(
+    api_key: str | None = Field(
         default=None,
         description="API key for authentication, if set, will enable API key validation",
     )
@@ -41,11 +41,11 @@ class GeminiClientSettings(BaseModel):
     id: str = Field(..., description="Unique identifier for the client")
     secure_1psid: str = Field(..., description="Gemini Secure 1PSID")
     secure_1psidts: str = Field(..., description="Gemini Secure 1PSIDTS")
-    proxy: Optional[str] = Field(default=None, description="Proxy URL for this Gemini client")
+    proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client")
 
     @field_validator("proxy", mode="before")
     @classmethod
-    def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]:
+    def _blank_proxy_to_none(cls, value: str | None) -> str | None:
         if value is None:
             return None
         stripped = value.strip()
@@ -55,8 +55,8 @@ def _blank_proxy_to_none(cls, value: Optional[str]) -> Optional[str]:
 class GeminiModelConfig(BaseModel):
     """Configuration for a custom Gemini model."""
 
-    model_name: Optional[str] = Field(default=None, description="Name of the model")
-    model_header: Optional[dict[str, Optional[str]]] = Field(
+    model_name: str | None = Field(default=None, description="Name of the model")
+    model_header: dict[str, str | None] | None = Field(
         default=None, description="Header for the model"
     )
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 64df4f7..002d401 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,7 +14,7 @@
 import orjson
 from loguru import logger
 
-from ..models import FunctionCall, Message, ToolCall
+from app.models import FunctionCall, Message, ToolCall
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
@@ -67,6 +67,7 @@
 )
 CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
+PARAM_FENCE_RE = re.compile(r"^(?P<fence>`{3,})")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
@@ -113,7 +114,7 @@ def _strip_param_fences(s: str) -> str:
     if not s:
         return ""
 
-    match = re.match(r"^(?P<fence>`{3,})", s)
+    match = PARAM_FENCE_RE.match(s)
     if not match or not s.endswith(match.group("fence")):
         return s
 
@@ -272,7 +273,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
             arguments = "{}"
 
         index = len(tool_calls)
-        seed = f"{name}:{arguments}:{index}".encode("utf-8")
+        seed = f"{name}:{arguments}:{index}".encode()
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
         tool_calls.append(
diff --git a/app/utils/singleton.py b/app/utils/singleton.py
index 489e87e..2a258af 100644
--- a/app/utils/singleton.py
+++ b/app/utils/singleton.py
@@ -1,10 +1,10 @@
-from typing import ClassVar, Dict
+from typing import ClassVar
 
 
 class Singleton(type):
-    _instances: ClassVar[Dict[type, object]] = {}
+    _instances: ClassVar[dict[type, object]] = {}
 
     def __call__(cls, *args, **kwargs):
         if cls not in cls._instances:
-            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+            cls._instances[cls] = super().__call__(*args, **kwargs)
         return cls._instances[cls]
diff --git a/pyproject.toml b/pyproject.toml
index 0cae786..a1ae29d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,33 +3,61 @@ name = "gemini-fastapi"
 version = "1.0.0"
 description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
-requires-python = "==3.12.*"
+requires-python = "==3.13.*"
 dependencies = [
     "fastapi>=0.129.0",
     "gemini-webapi>=1.19.2",
+    "httptools>=0.7.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
-    "pydantic-settings[yaml]>=2.12.0",
-    "uvicorn>=0.40.0",
+    "pydantic-settings[yaml]>=2.13.0",
+    "uvicorn>=0.41.0",
     "uvloop>=0.22.1; sys_platform != 'win32'",
 ]
 
+[project.urls]
+Repository = "https://github.com/Nativu5/Gemini-FastAPI"
+
 [project.optional-dependencies]
 dev = [
-    "ruff>=0.15.0",
+    "pytest>=9.0.2",
+    "ruff>=0.15.1",
+]
+
+[dependency-groups]
+dev = [
+    "gemini-fastapi[dev]",
 ]
 
 [tool.ruff]
 line-length = 100
-lint.select = ["E", "F", "W", "I", "RUF"]
-lint.ignore = ["E501"]
+target-version = "py313"
+
+[tool.ruff.lint]
+select = [
+    "E",    # pycodestyle errors
+    "F",    # pyflakes
+    "W",    # pycodestyle warnings
+    "I",    # isort
+    "UP",   # pyupgrade
+    "B",    # flake8-bugbear
+    "C4",   # flake8-comprehensions
+    "SIM",  # flake8-simplify
+    "RUF",  # ruff-specific rules
+    "TID",  # flake8-tidy-imports
+]
+ignore = [
+    "E501", # line too long
+]
+
+[tool.ruff.lint.flake8-bugbear]
+extend-immutable-calls = [
+    "fastapi.Depends",
+    "fastapi.Query",
+    "fastapi.security.HTTPBearer",
+]
 
 [tool.ruff.format]
 quote-style = "double"
 indent-style = "space"
-
-[dependency-groups]
-dev = [
-    "ruff>=0.15.1",
-]
diff --git a/scripts/dump_lmdb.py b/scripts/dump_lmdb.py
index a331325..889af4f 100644
--- a/scripts/dump_lmdb.py
+++ b/scripts/dump_lmdb.py
@@ -1,6 +1,7 @@
 import argparse
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Any, Iterable, List
+from typing import Any
 
 import lmdb
 import orjson
@@ -14,17 +15,17 @@ def _decode_value(value: bytes) -> Any:
         return value.decode("utf-8", errors="replace")
 
 
-def _dump_all(txn: lmdb.Transaction) -> List[dict[str, Any]]:
+def _dump_all(txn: lmdb.Transaction) -> list[dict[str, Any]]:
     """Return all records from the database."""
-    result: List[dict[str, Any]] = []
+    result: list[dict[str, Any]] = []
     for key, value in txn.cursor():
         result.append({"key": key.decode("utf-8"), "value": _decode_value(value)})
     return result
 
 
-def _dump_selected(txn: lmdb.Transaction, keys: Iterable[str]) -> List[dict[str, Any]]:
+def _dump_selected(txn: lmdb.Transaction, keys: Iterable[str]) -> list[dict[str, Any]]:
     """Return records for the provided keys."""
-    result: List[dict[str, Any]] = []
+    result: list[dict[str, Any]] = []
     for key in keys:
         raw = txn.get(key.encode("utf-8"))
         if raw is not None:
@@ -36,10 +37,7 @@ def dump_lmdb(path: Path, keys: Iterable[str] | None = None) -> None:
     """Print selected or all key-value pairs from the LMDB database."""
     env = lmdb.open(str(path), readonly=True, lock=False)
     with env.begin() as txn:
-        if keys:
-            records = _dump_selected(txn, keys)
-        else:
-            records = _dump_all(txn)
+        records = _dump_selected(txn, keys) if keys else _dump_all(txn)
     env.close()
 
     print(orjson.dumps(records, option=orjson.OPT_INDENT_2).decode("utf-8"))
diff --git a/uv.lock b/uv.lock
index 5b687e4..4c819e7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,6 +1,6 @@
 version = 1
 revision = 3
-requires-python = "==3.12.*"
+requires-python = "==3.13.*"
 
 [[package]]
 name = "annotated-doc"
@@ -26,7 +26,6 @@ version = "4.12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
-    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
 wheels = [
@@ -86,6 +85,7 @@ source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },
     { name = "gemini-webapi" },
+    { name = "httptools" },
     { name = "lmdb" },
     { name = "loguru" },
     { name = "orjson" },
@@ -96,30 +96,33 @@ dependencies = [
 
 [package.optional-dependencies]
 dev = [
+    { name = "pytest" },
     { name = "ruff" },
 ]
 
 [package.dev-dependencies]
 dev = [
-    { name = "ruff" },
+    { name = "gemini-fastapi", extra = ["dev"] },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "fastapi", specifier = ">=0.129.0" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
+    { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
-    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.12.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.0" },
-    { name = "uvicorn", specifier = ">=0.40.0" },
+    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.1" },
+    { name = "uvicorn", specifier = ">=0.41.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
-dev = [{ name = "ruff", specifier = ">=0.15.1" }]
+dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
@@ -180,6 +183,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
 ]
 
+[[package]]
+name = "httptools"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/09/8f/c77b1fcbfd262d422f12da02feb0d218fa228d52485b77b953832105bb90/httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3", size = 202889, upload-time = "2025-10-10T03:54:47.089Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/1a/22887f53602feaa066354867bc49a68fc295c2293433177ee90870a7d517/httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca", size = 108180, upload-time = "2025-10-10T03:54:48.052Z" },
+    { url = "https://files.pythonhosted.org/packages/32/6a/6aaa91937f0010d288d3d124ca2946d48d60c3a5ee7ca62afe870e3ea011/httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c", size = 478596, upload-time = "2025-10-10T03:54:48.919Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/70/023d7ce117993107be88d2cbca566a7c1323ccbaf0af7eabf2064fe356f6/httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66", size = 473268, upload-time = "2025-10-10T03:54:49.993Z" },
+    { url = "https://files.pythonhosted.org/packages/32/4d/9dd616c38da088e3f436e9a616e1d0cc66544b8cdac405cc4e81c8679fc7/httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346", size = 455517, upload-time = "2025-10-10T03:54:51.066Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/3a/a6c595c310b7df958e739aae88724e24f9246a514d909547778d776799be/httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650", size = 458337, upload-time = "2025-10-10T03:54:52.196Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" },
+]
+
 [[package]]
 name = "httpx"
 version = "0.28.1"
@@ -218,18 +236,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
 ]
 
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
 [[package]]
 name = "lmdb"
 version = "1.7.5"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/c7/a3/3756f2c6adba4a1413dba55e6c81a20b38a868656517308533e33cb59e1c/lmdb-1.7.5.tar.gz", hash = "sha256:f0604751762cb097059d5412444c4057b95f386c7ed958363cf63f453e5108da", size = 883490, upload-time = "2025-10-15T03:39:44.038Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/34/b4/8b862c4d7fd6f68cb33e2a919169fda8924121dc5ff61e3cc105304a6dd4/lmdb-1.7.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b48c2359eea876d7b634b49f84019ecc8c1626da97c795fc7b39a793676815df", size = 100910, upload-time = "2025-10-15T03:39:00.727Z" },
-    { url = "https://files.pythonhosted.org/packages/27/64/8ab5da48180d5f13a293ea00a9f8758b1bee080e76ea0ab0d6be0d51b55f/lmdb-1.7.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f84793baeb430ba984eb6c1b4e08c0a508b1c03e79ce79fcda0f29ecc06a95a", size = 99376, upload-time = "2025-10-15T03:39:01.791Z" },
-    { url = "https://files.pythonhosted.org/packages/43/e0/51bc942fe5ed3fce69c631b54f52d97785de3d94487376139be6de1e199a/lmdb-1.7.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:68cc21314a33faac1b749645a976b7655e7fa7cc104a72365d2429d2db7f6342", size = 298556, upload-time = "2025-10-15T03:39:02.787Z" },
-    { url = "https://files.pythonhosted.org/packages/66/c5/19ea75c88b91d12da5c6f4bbe2aca633047b6b270fd613d557583d32cc5c/lmdb-1.7.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f2d9b7e102fcfe5e0cfb3acdebd403eb55ccbe5f7202d8f49d60bdafb1546d1e", size = 299449, upload-time = "2025-10-15T03:39:03.903Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/74/365194203dbff47d3a1621366d6a1133cdcce261f4ac0e1d0496f01e6ace/lmdb-1.7.5-cp312-cp312-win_amd64.whl", hash = "sha256:69de89cc79e03e191fc6f95797f1bef91b45c415d1ea9d38872b00b2d989a50f", size = 99328, upload-time = "2025-10-15T03:39:04.949Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/3a/a441afebff5bd761f7f58d194fed7ac265279964957479a5c8a51c42f9ad/lmdb-1.7.5-cp312-cp312-win_arm64.whl", hash = "sha256:0c880ee4b309e900f2d58a710701f5e6316a351878588c6a95a9c0bcb640680b", size = 94191, upload-time = "2025-10-15T03:39:05.975Z" },
+    { url = "https://files.pythonhosted.org/packages/38/f8/03275084218eacdbdf7e185d693e1db4cb79c35d18fac47fa0d388522a0d/lmdb-1.7.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:66ae02fa6179e46bb69fe446b7e956afe8706ae17ec1d4cd9f7056e161019156", size = 101508, upload-time = "2025-10-15T03:39:07.228Z" },
+    { url = "https://files.pythonhosted.org/packages/20/b9/bc33ae2e4940359ba2fc412e6a755a2f126bc5062b4aaf35edd3a791f9a5/lmdb-1.7.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bf65c573311ac8330c7908257f76b28ae3576020123400a81a6b650990dc028c", size = 100105, upload-time = "2025-10-15T03:39:08.491Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/f6/22f84b776a64d3992f052ecb637c35f1764a39df4f2190ecc5a3a1295bd7/lmdb-1.7.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97bcb3fc12841a8828db918e494fe0fd016a73d2680ad830d75719bb3bf4e76a", size = 301500, upload-time = "2025-10-15T03:39:09.463Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/4d/8e6be8d7d5a30d47fa0ce4b55e3a8050ad689556e6e979d206b4ac67b733/lmdb-1.7.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:865f374f6206ab4aacb92ffb1dc612ee1a31a421db7c89733abe06b81ac87cb0", size = 302285, upload-time = "2025-10-15T03:39:10.856Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/dc/7e04fb31a8f88951db81ac677e3ccb3e09248eda40e6ad52f74fd9370c32/lmdb-1.7.5-cp313-cp313-win_amd64.whl", hash = "sha256:82a04d5ca2a6a799c8db7f209354c48aebb49ff338530f5813721fc4c68e4450", size = 99447, upload-time = "2025-10-15T03:39:12.151Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/50/e3f97efab17b3fad4afde99b3c957ecac4ffbefada6874a57ad0c695660a/lmdb-1.7.5-cp313-cp313-win_arm64.whl", hash = "sha256:0ad85a15acbfe8a42fdef92ee5e869610286d38507e976755f211be0fc905ca7", size = 94145, upload-time = "2025-10-15T03:39:13.461Z" },
     { url = "https://files.pythonhosted.org/packages/bd/2c/982cb5afed533d0cb8038232b40c19b5b85a2d887dec74dfd39e8351ef4b/lmdb-1.7.5-py3-none-any.whl", hash = "sha256:fc344bb8bc0786c87c4ccb19b31f09a38c08bd159ada6f037d669426fea06f03", size = 148539, upload-time = "2025-10-15T03:39:42.982Z" },
 ]
 
@@ -252,21 +279,39 @@ version = "3.11.7"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" },
-    { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" },
-    { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" },
-    { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" },
-    { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" },
-    { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" },
-    { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" },
-    { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" },
-    { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" },
+    { url = "https://files.pythonhosted.org/packages/89/25/6e0e52cac5aab51d7b6dcd257e855e1dec1c2060f6b28566c509b4665f62/orjson-3.11.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1d98b30cc1313d52d4af17d9c3d307b08389752ec5f2e5febdfada70b0f8c733", size = 228390, upload-time = "2026-02-02T15:38:06.8Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/29/a77f48d2fc8a05bbc529e5ff481fb43d914f9e383ea2469d4f3d51df3d00/orjson-3.11.7-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:d897e81f8d0cbd2abb82226d1860ad2e1ab3ff16d7b08c96ca00df9d45409ef4", size = 125189, upload-time = "2026-02-02T15:38:08.181Z" },
+    { url = "https://files.pythonhosted.org/packages/89/25/0a16e0729a0e6a1504f9d1a13cdd365f030068aab64cec6958396b9969d7/orjson-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814be4b49b228cfc0b3c565acf642dd7d13538f966e3ccde61f4f55be3e20785", size = 128106, upload-time = "2026-02-02T15:38:09.41Z" },
+    { url = "https://files.pythonhosted.org/packages/66/da/a2e505469d60666a05ab373f1a6322eb671cb2ba3a0ccfc7d4bc97196787/orjson-3.11.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d06e5c5fed5caedd2e540d62e5b1c25e8c82431b9e577c33537e5fa4aa909539", size = 123363, upload-time = "2026-02-02T15:38:10.73Z" },
+    { url = "https://files.pythonhosted.org/packages/23/bf/ed73f88396ea35c71b38961734ea4a4746f7ca0768bf28fd551d37e48dd0/orjson-3.11.7-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31c80ce534ac4ea3739c5ee751270646cbc46e45aea7576a38ffec040b4029a1", size = 129007, upload-time = "2026-02-02T15:38:12.138Z" },
+    { url = "https://files.pythonhosted.org/packages/73/3c/b05d80716f0225fc9008fbf8ab22841dcc268a626aa550561743714ce3bf/orjson-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f50979824bde13d32b4320eedd513431c921102796d86be3eee0b58e58a3ecd1", size = 141667, upload-time = "2026-02-02T15:38:13.398Z" },
+    { url = "https://files.pythonhosted.org/packages/61/e8/0be9b0addd9bf86abfc938e97441dcd0375d494594b1c8ad10fe57479617/orjson-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e54f3808e2b6b945078c41aa8d9b5834b28c50843846e97807e5adb75fa9705", size = 130832, upload-time = "2026-02-02T15:38:14.698Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/ec/c68e3b9021a31d9ec15a94931db1410136af862955854ed5dd7e7e4f5bff/orjson-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12b80df61aab7b98b490fe9e4879925ba666fccdfcd175252ce4d9035865ace", size = 133373, upload-time = "2026-02-02T15:38:16.109Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/45/f3466739aaafa570cc8e77c6dbb853c48bf56e3b43738020e2661e08b0ac/orjson-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:996b65230271f1a97026fd0e6a753f51fbc0c335d2ad0c6201f711b0da32693b", size = 138307, upload-time = "2026-02-02T15:38:17.453Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/84/9f7f02288da1ffb31405c1be07657afd1eecbcb4b64ee2817b6fe0f785fa/orjson-3.11.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ab49d4b2a6a1d415ddb9f37a21e02e0d5dbfe10b7870b21bf779fc21e9156157", size = 408695, upload-time = "2026-02-02T15:38:18.831Z" },
+    { url = "https://files.pythonhosted.org/packages/18/07/9dd2f0c0104f1a0295ffbe912bc8d63307a539b900dd9e2c48ef7810d971/orjson-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:390a1dce0c055ddf8adb6aa94a73b45a4a7d7177b5c584b8d1c1947f2ba60fb3", size = 144099, upload-time = "2026-02-02T15:38:20.28Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/66/857a8e4a3292e1f7b1b202883bcdeb43a91566cf59a93f97c53b44bd6801/orjson-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1eb80451a9c351a71dfaf5b7ccc13ad065405217726b59fdbeadbcc544f9d223", size = 134806, upload-time = "2026-02-02T15:38:22.186Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/5b/6ebcf3defc1aab3a338ca777214966851e92efb1f30dc7fc8285216e6d1b/orjson-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7477aa6a6ec6139c5cb1cc7b214643592169a5494d200397c7fc95d740d5fcf3", size = 127914, upload-time = "2026-02-02T15:38:23.511Z" },
+    { url = "https://files.pythonhosted.org/packages/00/04/c6f72daca5092e3117840a1b1e88dfc809cc1470cf0734890d0366b684a1/orjson-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:b9f95dcdea9d4f805daa9ddf02617a89e484c6985fa03055459f90e87d7a0757", size = 124986, upload-time = "2026-02-02T15:38:24.836Z" },
+    { url = "https://files.pythonhosted.org/packages/03/ba/077a0f6f1085d6b806937246860fafbd5b17f3919c70ee3f3d8d9c713f38/orjson-3.11.7-cp313-cp313-win_arm64.whl", hash = "sha256:800988273a014a0541483dc81021247d7eacb0c845a9d1a34a422bc718f41539", size = 126045, upload-time = "2026-02-02T15:38:26.216Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "26.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
 [[package]]
@@ -293,38 +338,34 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
-    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
-    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
-    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
-    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
-    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
-    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
-    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
-    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
 ]
 
 [[package]]
 name = "pydantic-settings"
-version = "2.12.0"
+version = "2.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/a1/ae859ffac5a3338a66b74c5e29e244fd3a3cc483c89feaf9f56c39898d75/pydantic_settings-2.13.0.tar.gz", hash = "sha256:95d875514610e8595672800a5c40b073e99e4aae467fa7c8f9c263061ea2e1fe", size = 222450, upload-time = "2026-02-15T12:11:23.476Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1a/dd1b9d7e627486cf8e7523d09b70010e05a4bc41414f4ae6ce184cf0afb6/pydantic_settings-2.13.0-py3-none-any.whl", hash = "sha256:d67b576fff39cd086b595441bf9c75d4193ca9c0ed643b90360694d0f1240246", size = 58429, upload-time = "2026-02-15T12:11:22.133Z" },
 ]
 
 [package.optional-dependencies]
@@ -332,6 +373,31 @@ yaml = [
     { name = "pyyaml" },
 ]
 
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "9.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+]
+
 [[package]]
 name = "python-dotenv"
 version = "1.2.1"
@@ -347,16 +413,16 @@ version = "6.0.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
-    { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
-    { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
-    { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
-    { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
-    { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
-    { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
 ]
 
 [[package]]
@@ -390,7 +456,6 @@ version = "0.52.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
-    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
 wheels = [
@@ -420,15 +485,15 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.40.0"
+version = "0.41.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/32/ce/eeb58ae4ac36fe09e3842eb02e0eb676bf2c53ae062b98f1b2531673efdd/uvicorn-0.41.0.tar.gz", hash = "sha256:09d11cf7008da33113824ee5a1c6422d89fbc2ff476540d69a34c87fab8b571a", size = 82633, upload-time = "2026-02-16T23:07:24.1Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
+    { url = "https://files.pythonhosted.org/packages/83/e4/d04a086285c20886c0daad0e026f250869201013d18f81d9ff5eada73a88/uvicorn-0.41.0-py3-none-any.whl", hash = "sha256:29e35b1d2c36a04b9e180d4007ede3bcb32a85fbdfd6c6aeb3f26839de088187", size = 68783, upload-time = "2026-02-16T23:07:22.357Z" },
 ]
 
 [[package]]
@@ -437,12 +502,12 @@ version = "0.22.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" },
-    { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" },
-    { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" },
-    { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" },
+    { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" },
+    { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" },
 ]
 
 [[package]]

From 35864f65d722b40ce8bbef15b5a5e17944adf96d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Feb 2026 16:54:37 +0700
Subject: [PATCH 138/236] Upgrade to fully support Python 3.13

---
 .github/workflows/docker.yaml | 2 +-
 app/models/__init__.py        | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 775e9e4..11caa57 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -50,7 +50,7 @@ jobs:
             type=semver,pattern={{version}}
             type=semver,pattern={{major}}.{{minor}}
             type=semver,pattern={{major}}
-            type=sha,format=short
+            type=raw,value={{date 'YYYYMMDD'}}
             type=raw,value=latest,enable={{is_default_branch}}
 
       - name: Build and push Docker image
diff --git a/app/models/__init__.py b/app/models/__init__.py
index a72efdc..3896de1 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -19,6 +19,7 @@
     ResponseOutputMessage,
     ResponseToolCall,
     ResponseToolChoice,
+    ResponseUsage,
     Tool,
     ToolCall,
     ToolChoiceFunction,
@@ -48,6 +49,7 @@
     "ResponseOutputMessage",
     "ResponseToolCall",
     "ResponseToolChoice",
+    "ResponseUsage",
     "Tool",
     "ToolCall",
     "ToolChoiceFunction",

From a48c38d16fab0a0c27a716ee25bf12842c8bd9a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Feb 2026 17:19:51 +0700
Subject: [PATCH 139/236] Upgrade to fully support Python 3.13

---
 .github/workflows/docker.yaml |   2 +-
 app/models/models.py          | 130 +++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 11caa57..1c5a2ee 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -50,7 +50,7 @@ jobs:
             type=semver,pattern={{version}}
             type=semver,pattern={{major}}.{{minor}}
             type=semver,pattern={{major}}
-            type=raw,value={{date 'YYYYMMDD'}}
+            type=raw,value={{date 'YYYYMMDD'}}-{{sha}}
             type=raw,value=latest,enable={{is_default_branch}}
 
       - name: Build and push Docker image
diff --git a/app/models/models.py b/app/models/models.py
index ca206b7..3b3e627 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -10,10 +10,10 @@ class ContentItem(BaseModel):
     """Individual content item (text, image, or file) within a message."""
 
     type: Literal["text", "image_url", "file", "input_audio"]
-    text: str | None = None
-    image_url: dict[str, str] | None = None
-    input_audio: dict[str, Any] | None = None
-    file: dict[str, str] | None = None
+    text: str | None = Field(default=None)
+    image_url: dict[str, Any] | None = Field(default=None)
+    input_audio: dict[str, Any] | None = Field(default=None)
+    file: dict[str, Any] | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
@@ -21,13 +21,13 @@ class Message(BaseModel):
     """Message model"""
 
     role: str
-    content: str | list[ContentItem] | None = None
-    name: str | None = None
-    tool_calls: list[ToolCall] | None = None
-    tool_call_id: str | None = None
-    refusal: str | None = None
-    reasoning_content: str | None = None
-    audio: dict[str, Any] | None = None
+    content: str | list[ContentItem] | None = Field(default=None)
+    name: str | None = Field(default=None)
+    tool_calls: list[ToolCall] | None = Field(default=None)
+    tool_call_id: str | None = Field(default=None)
+    refusal: str | None = Field(default=None)
+    reasoning_content: str | None = Field(default=None)
+    audio: dict[str, Any] | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
     @model_validator(mode="after")
@@ -44,7 +44,7 @@ class Choice(BaseModel):
     index: int
     message: Message
     finish_reason: str
-    logprobs: dict[str, Any] | None = None
+    logprobs: dict[str, Any] | None = Field(default=None)
 
 
 class FunctionCall(BaseModel):
@@ -66,8 +66,8 @@ class ToolFunctionDefinition(BaseModel):
     """Function definition for tool."""
 
     name: str
-    description: str | None = None
-    parameters: dict[str, Any] | None = None
+    description: str | None = Field(default=None)
+    parameters: dict[str, Any] | None = Field(default=None)
 
 
 class Tool(BaseModel):
@@ -96,8 +96,8 @@ class Usage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
-    prompt_tokens_details: dict[str, int] | None = None
-    completion_tokens_details: dict[str, int] | None = None
+    prompt_tokens_details: dict[str, int] | None = Field(default=None)
+    completion_tokens_details: dict[str, int] | None = Field(default=None)
 
 
 class ModelData(BaseModel):
@@ -114,16 +114,16 @@ class ChatCompletionRequest(BaseModel):
 
     model: str
     messages: list[Message]
-    stream: bool | None = False
-    user: str | None = None
-    temperature: float | None = 0.7
-    top_p: float | None = 1.0
-    max_tokens: int | None = None
-    tools: list[Tool] | None = None
+    stream: bool | None = Field(default=False)
+    user: str | None = Field(default=None)
+    temperature: float | None = Field(default=0.7)
+    top_p: float | None = Field(default=1.0)
+    max_tokens: int | None = Field(default=None)
+    tools: list[Tool] | None = Field(default=None)
     tool_choice: (
         Literal["none"] | Literal["auto"] | Literal["required"] | ToolChoiceFunction | None
-    ) = None
-    response_format: dict[str, Any] | None = None
+    ) = Field(default=None)
+    response_format: dict[str, Any] | None = Field(default=None)
 
 
 class ChatCompletionResponse(BaseModel):
@@ -148,9 +148,9 @@ class HealthCheckResponse(BaseModel):
     """Health check response model"""
 
     ok: bool
-    storage: dict[str, str | int] | None = None
-    clients: dict[str, bool] | None = None
-    error: str | None = None
+    storage: dict[str, Any] | None = Field(default=None)
+    clients: dict[str, bool] | None = Field(default=None)
+    error: str | None = Field(default=None)
 
 
 class ConversationInStore(BaseModel):
@@ -172,12 +172,12 @@ class ResponseInputContent(BaseModel):
     """Content item for Responses API input."""
 
     type: Literal["input_text", "input_image", "input_file"]
-    text: str | None = None
-    image_url: str | None = None
-    detail: Literal["auto", "low", "high"] | None = None
-    file_url: str | None = None
-    file_data: str | None = None
-    filename: str | None = None
+    text: str | None = Field(default=None)
+    image_url: str | None = Field(default=None)
+    detail: Literal["auto", "low", "high"] | None = Field(default=None)
+    file_url: str | None = Field(default=None)
+    file_data: str | None = Field(default=None)
+    filename: str | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
     @model_validator(mode="before")
@@ -192,7 +192,7 @@ def normalize_output_text(cls, data: Any) -> Any:
 class ResponseInputItem(BaseModel):
     """Single input item for Responses API."""
 
-    type: Literal["message"] | None = "message"
+    type: Literal["message"] | None = Field(default="message")
     role: Literal["user", "assistant", "system", "developer"]
     content: str | list[ResponseInputContent]
 
@@ -201,15 +201,15 @@ class ResponseToolChoice(BaseModel):
     """Tool choice enforcing a specific tool in Responses API."""
 
     type: Literal["function", "image_generation"]
-    function: ToolChoiceFunctionDetail | None = None
+    function: ToolChoiceFunctionDetail | None = Field(default=None)
 
 
 class ResponseImageTool(BaseModel):
     """Image generation tool specification for Responses API."""
 
     type: Literal["image_generation"]
-    model: str | None = None
-    output_format: str | None = None
+    model: str | None = Field(default=None)
+    output_format: str | None = Field(default=None)
 
 
 class ResponseCreateRequest(BaseModel):
@@ -217,17 +217,17 @@ class ResponseCreateRequest(BaseModel):
 
     model: str
     input: str | list[ResponseInputItem]
-    instructions: str | list[ResponseInputItem] | None = None
-    temperature: float | None = 0.7
-    top_p: float | None = 1.0
-    max_output_tokens: int | None = None
-    stream: bool | None = False
-    tool_choice: str | ResponseToolChoice | None = None
-    tools: list[Tool | ResponseImageTool] | None = None
-    store: bool | None = None
-    user: str | None = None
-    response_format: dict[str, Any] | None = None
-    metadata: dict[str, Any] | None = None
+    instructions: str | list[ResponseInputItem] | None = Field(default=None)
+    temperature: float | None = Field(default=0.7)
+    top_p: float | None = Field(default=1.0)
+    max_output_tokens: int | None = Field(default=None)
+    stream: bool | None = Field(default=False)
+    tool_choice: str | ResponseToolChoice | None = Field(default=None)
+    tools: list[Tool | ResponseImageTool] | None = Field(default=None)
+    store: bool | None = Field(default=None)
+    user: str | None = Field(default=None)
+    response_format: dict[str, Any] | None = Field(default=None)
+    metadata: dict[str, Any] | None = Field(default=None)
 
 
 class ResponseUsage(BaseModel):
@@ -242,7 +242,7 @@ class ResponseOutputContent(BaseModel):
     """Content item for Responses API output."""
 
     type: Literal["output_text"]
-    text: str | None = ""
+    text: str | None = Field(default="")
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
@@ -259,20 +259,22 @@ class ResponseImageGenerationCall(BaseModel):
     """Image generation call record emitted in Responses API."""
 
     id: str
-    type: Literal["image_generation_call"] = "image_generation_call"
-    status: Literal["completed", "in_progress", "generating", "failed"] = "completed"
-    result: str | None = None
-    output_format: str | None = None
-    size: str | None = None
-    revised_prompt: str | None = None
+    type: Literal["image_generation_call"] = Field(default="image_generation_call")
+    status: Literal["completed", "in_progress", "generating", "failed"] = Field(default="completed")
+    result: str | None = Field(default=None)
+    output_format: str | None = Field(default=None)
+    size: str | None = Field(default=None)
+    revised_prompt: str | None = Field(default=None)
 
 
 class ResponseToolCall(BaseModel):
     """Tool call record emitted in Responses API."""
 
     id: str
-    type: Literal["tool_call"] = "tool_call"
-    status: Literal["in_progress", "completed", "failed", "requires_action"] = "completed"
+    type: Literal["tool_call"] = Field(default="tool_call")
+    status: Literal["in_progress", "completed", "failed", "requires_action"] = Field(
+        default="completed"
+    )
     function: FunctionCall
 
 
@@ -280,7 +282,7 @@ class ResponseCreateResponse(BaseModel):
     """Responses API response payload."""
 
     id: str
-    object: Literal["response"] = "response"
+    object: Literal["response"] = Field(default="response")
     created_at: int
     model: str
     output: list[ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall]
@@ -291,13 +293,13 @@ class ResponseCreateResponse(BaseModel):
         "incomplete",
         "cancelled",
         "requires_action",
-    ] = "completed"
-    tool_choice: str | ResponseToolChoice | None = None
-    tools: list[Tool | ResponseImageTool] | None = None
+    ] = Field(default="completed")
+    tool_choice: str | ResponseToolChoice | None = Field(default=None)
+    tools: list[Tool | ResponseImageTool] | None = Field(default=None)
     usage: ResponseUsage
-    error: dict[str, Any] | None = None
-    metadata: dict[str, Any] | None = None
-    input: str | list[ResponseInputItem] | None = None
+    error: dict[str, Any] | None = Field(default=None)
+    metadata: dict[str, Any] | None = Field(default=None)
+    input: str | list[ResponseInputItem] | None = Field(default=None)
 
 
 # Rebuild models with forward references

From 0540c4f16df0a2aef8558c6bd30f6a34dd733fb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 10:25:08 +0700
Subject: [PATCH 140/236] Refactor regex patterns for tool and chat message
 processing in helper module

---
 Dockerfile          |   3 +-
 app/server/chat.py  | 285 +++++++-------------------------------------
 app/utils/helper.py |  74 ++++++++----
 3 files changed, 100 insertions(+), 262 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index ef7f41e..62ce9d1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,7 @@
 FROM ghcr.io/astral-sh/uv:python3.13-trixie-slim
 
-LABEL org.opencontainers.image.description="Web-based Gemini models wrapped into an OpenAI-compatible API."
+LABEL org.opencontainers.image.title="Gemini-FastAPI" \
+      org.opencontainers.image.description="Web-based Gemini models wrapped into an OpenAI-compatible API."
 
 WORKDIR /app
 
diff --git a/app/server/chat.py b/app/server/chat.py
index 3849af5..b7c0564 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -48,8 +48,8 @@
 from app.services import GeminiClientPool, GeminiClientWrapper, LMDBConversationStore
 from app.utils import g_config
 from app.utils.helper import (
-    TOOL_HINT_LINE_END,
-    TOOL_HINT_LINE_START,
+    STREAM_MASTER_RE,
+    STREAM_TAIL_RE,
     TOOL_HINT_STRIPPED,
     TOOL_WRAP_HINT,
     detect_image_extension,
@@ -751,275 +751,80 @@ async def _send_with_split(
 class StreamingOutputFilter:
     """
     Filter to suppress technical protocol markers, tool calls, and system hints from the stream.
-    Uses a state machine to handle fragmentation where markers are split across multiple chunks.
+    Uses a high-performance regex state machine to handle fragmented markers.
     """
 
     def __init__(self):
         self.buffer = ""
         self.state = "NORMAL"
         self.current_role = ""
-        self.block_buffer = ""
-
-        self.STATE_MARKERS = {
-            "TOOL": {
-                "starts": ["[ToolCalls]", "\\[ToolCalls\\]"],
-                "ends": ["[/ToolCalls]", "\\[\\/ToolCalls\\]"],
-            },
-            "ORPHAN": {
-                "starts": ["[Call:", "\\[Call\\:"],
-                "ends": ["[/Call]", "\\[\\/Call\\]"],
-            },
-            "RESP": {
-                "starts": ["[ToolResults]", "\\[ToolResults\\]"],
-                "ends": ["[/ToolResults]", "\\[\\/ToolResults\\]"],
-            },
-            "ARG": {
-                "starts": ["[CallParameter:", "\\[CallParameter\\:"],
-                "ends": ["[/CallParameter]", "\\[\\/CallParameter\\]"],
-            },
-            "RESULT": {
-                "starts": ["[ToolResult]", "\\[ToolResult\\]"],
-                "ends": ["[/ToolResult]", "\\[\\/ToolResult\\]"],
-            },
-            "ITEM": {
-                "starts": ["[Result:", "\\[Result\\:"],
-                "ends": ["[/Result]", "\\[\\/Result\\]"],
-            },
-            "TAG": {
-                "starts": ["<|im_start|>", "\\<\\|im\\_start\\|\\>"],
-                "ends": ["<|im_end|>", "\\<\\|im\\_end\\|\\>"],
-            },
-        }
-
-        hint_start = f"\n{TOOL_HINT_LINE_START}" if TOOL_HINT_LINE_START else ""
-        if hint_start:
-            self.STATE_MARKERS["HINT"] = {
-                "starts": [hint_start],
-                "ends": [TOOL_HINT_LINE_END],
-            }
-
-        self.ORPHAN_ENDS = [
-            "<|im_end|>",
-            "\\<\\|im\\_end\\|\\>",
-            "[/Call]",
-            "\\[\\/Call\\]",
-            "[/ToolCalls]",
-            "\\[\\/ToolCalls\\]",
-            "[/CallParameter]",
-            "\\[\\/CallParameter\\]",
-            "[/ToolResult]",
-            "\\[\\/ToolResult\\]",
-            "[/ToolResults]",
-            "\\[\\/ToolResults\\]",
-            "[/Result]",
-            "\\[\\/Result\\]",
-        ]
-
-        self.WATCH_MARKERS = []
-        for cfg in self.STATE_MARKERS.values():
-            self.WATCH_MARKERS.extend(cfg["starts"])
-            self.WATCH_MARKERS.extend(cfg.get("ends", []))
-        self.WATCH_MARKERS.extend(self.ORPHAN_ENDS)
+        self.in_chatml = False
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
         output = []
 
         while self.buffer:
-            buf_low = self.buffer.lower()
-            if self.state == "NORMAL":
-                indices = []
-                for m_type, cfg in self.STATE_MARKERS.items():
-                    for p in cfg["starts"]:
-                        idx = buf_low.find(p.lower())
-                        if idx != -1:
-                            indices.append((idx, m_type, len(p)))
-
-                for p in self.ORPHAN_ENDS:
-                    idx = buf_low.find(p.lower())
-                    if idx != -1:
-                        indices.append((idx, "SKIP", len(p)))
-
-                if not indices:
-                    keep_len = 0
-                    for marker in self.WATCH_MARKERS:
-                        m_low = marker.lower()
-                        for i in range(len(m_low) - 1, 0, -1):
-                            if buf_low.endswith(m_low[:i]):
-                                keep_len = max(keep_len, i)
-                                break
-                    yield_len = len(self.buffer) - keep_len
-                    if yield_len > 0:
-                        output.append(self.buffer[:yield_len])
-                        self.buffer = self.buffer[yield_len:]
-                    break
-
-                indices.sort()
-                idx, m_type, m_len = indices[0]
-                output.append(self.buffer[:idx])
-                self.buffer = self.buffer[idx:]
-
-                if m_type == "SKIP":
-                    self.buffer = self.buffer[m_len:]
-                    continue
-
-                self.state = f"IN_{m_type}"
-                if m_type in ("TOOL", "ORPHAN"):
-                    self.block_buffer = ""
-
-                self.buffer = self.buffer[m_len:]
-
-            elif self.state == "IN_HINT":
-                cfg = self.STATE_MARKERS["HINT"]
-                found_idx, found_len = -1, 0
-                for p in cfg["ends"]:
-                    idx = buf_low.find(p.lower())
-                    if idx != -1 and (found_idx == -1 or idx < found_idx):
-                        found_idx, found_len = idx, len(p)
-
-                if found_idx != -1:
-                    self.buffer = self.buffer[found_idx + found_len :]
-                    self.state = "NORMAL"
-                else:
-                    max_end_len = max(len(p) for p in cfg["ends"])
-                    if len(self.buffer) > max_end_len:
-                        self.buffer = self.buffer[-max_end_len:]
-                    break
-
-            elif self.state == "IN_ARG":
-                cfg = self.STATE_MARKERS["ARG"]
-                found_idx, found_len = -1, 0
-                for p in cfg["ends"]:
-                    idx = buf_low.find(p.lower())
-                    if idx != -1 and (found_idx == -1 or idx < found_idx):
-                        found_idx, found_len = idx, len(p)
-
-                if found_idx != -1:
-                    self.buffer = self.buffer[found_idx + found_len :]
-                    self.state = "NORMAL"
-                else:
-                    max_end_len = max(len(p) for p in cfg["ends"])
-                    if len(self.buffer) > max_end_len:
-                        self.buffer = self.buffer[-max_end_len:]
-                    break
-
-            elif self.state == "IN_RESULT":
-                cfg = self.STATE_MARKERS["RESULT"]
-                found_idx, found_len = -1, 0
-                for p in cfg["ends"]:
-                    idx = buf_low.find(p.lower())
-                    if idx != -1 and (found_idx == -1 or idx < found_idx):
-                        found_idx, found_len = idx, len(p)
-
-                if found_idx != -1:
-                    self.buffer = self.buffer[found_idx + found_len :]
-                    self.state = "NORMAL"
-                else:
-                    max_end_len = max(len(p) for p in cfg["ends"])
-                    if len(self.buffer) > max_end_len:
-                        self.buffer = self.buffer[-max_end_len:]
-                    break
-
-            elif self.state == "IN_RESP":
-                cfg = self.STATE_MARKERS["RESP"]
-                found_idx, found_len = -1, 0
-                for p in cfg["ends"]:
-                    idx = buf_low.find(p.lower())
-                    if idx != -1 and (found_idx == -1 or idx < found_idx):
-                        found_idx, found_len = idx, len(p)
-
-                if found_idx != -1:
-                    self.buffer = self.buffer[found_idx + found_len :]
-                    self.state = "NORMAL"
-                else:
-                    break
-
-            elif self.state == "IN_TOOL":
-                cfg = self.STATE_MARKERS["TOOL"]
-                found_idx, found_len = -1, 0
-                for p in cfg["ends"]:
-                    idx = buf_low.find(p.lower())
-                    if idx != -1 and (found_idx == -1 or idx < found_idx):
-                        found_idx, found_len = idx, len(p)
-
-                if found_idx != -1:
-                    self.block_buffer += self.buffer[:found_idx]
-                    self.buffer = self.buffer[found_idx + found_len :]
-                    self.state = "NORMAL"
-                else:
-                    max_end_len = max(len(p) for p in cfg["ends"])
-                    if len(self.buffer) > max_end_len:
-                        self.block_buffer += self.buffer[:-max_end_len]
-                        self.buffer = self.buffer[-max_end_len:]
-                    break
-
-            elif self.state == "IN_ORPHAN":
-                cfg = self.STATE_MARKERS["ORPHAN"]
-                found_idx, found_len = -1, 0
-                for p in cfg["ends"]:
-                    idx = buf_low.find(p.lower())
-                    if idx != -1 and (found_idx == -1 or idx < found_idx):
-                        found_idx, found_len = idx, len(p)
-
-                if found_idx != -1:
-                    self.block_buffer += self.buffer[:found_idx]
-                    self.buffer = self.buffer[found_idx + found_len :]
-                    self.state = "NORMAL"
-                else:
-                    max_end_len = max(len(p) for p in cfg["ends"])
-                    if len(self.buffer) > max_end_len:
-                        self.block_buffer += self.buffer[:-max_end_len]
-                        self.buffer = self.buffer[-max_end_len:]
-                    break
-
-            elif self.state == "IN_TAG":
+            if self.state == "IN_TAG_HEADER":
                 nl_idx = self.buffer.find("\n")
                 if nl_idx != -1:
                     self.current_role = self.buffer[:nl_idx].strip().lower()
                     self.buffer = self.buffer[nl_idx + 1 :]
                     self.state = "IN_BLOCK"
+                    self.in_chatml = True
+                    continue
                 else:
                     break
 
-            elif self.state == "IN_BLOCK":
-                cfg = self.STATE_MARKERS["TAG"]
-                found_idx, found_len = -1, 0
-                for p in cfg["ends"]:
-                    idx = buf_low.find(p.lower())
-                    if idx != -1 and (found_idx == -1 or idx < found_idx):
-                        found_idx, found_len = idx, len(p)
-
-                if found_idx != -1:
-                    content = self.buffer[:found_idx]
-                    if self.current_role != "tool":
-                        output.append(content)
-                    self.buffer = self.buffer[found_idx + found_len :]
-                    self.state = "NORMAL"
-                    self.current_role = ""
+            match = STREAM_MASTER_RE.search(self.buffer)
+            if not match:
+                tail_match = STREAM_TAIL_RE.search(self.buffer)
+                keep_len = len(tail_match.group(0)) if tail_match else 0
+                yield_len = len(self.buffer) - keep_len
+                if yield_len > 0:
+                    if self.state == "NORMAL" or (
+                        self.state == "IN_BLOCK" and self.current_role != "tool"
+                    ):
+                        output.append(self.buffer[:yield_len])
+                    self.buffer = self.buffer[yield_len:]
+                break
+
+            start, end = match.span()
+            matched_group = match.lastgroup
+            pre_text = self.buffer[:start]
+
+            if self.state == "NORMAL" or (self.state == "IN_BLOCK" and self.current_role != "tool"):
+                output.append(pre_text)
+
+            if matched_group.endswith("_START"):
+                m_type = matched_group.split("_")[0]
+                if m_type == "TAG":
+                    self.state = "IN_TAG_HEADER"
                 else:
-                    max_end_len = max(len(p) for p in cfg["ends"])
-                    if self.current_role != "tool":
-                        if len(self.buffer) > max_end_len:
-                            output.append(self.buffer[:-max_end_len])
-                            self.buffer = self.buffer[-max_end_len:]
-                        break
-                    else:
-                        if len(self.buffer) > max_end_len:
-                            self.buffer = self.buffer[-max_end_len:]
-                        break
+                    self.state = f"IN_{m_type}"
+            elif matched_group in ("PROTOCOL_EXIT", "HINT_EXIT"):
+                self.state = "IN_BLOCK" if self.in_chatml else "NORMAL"
+            elif matched_group == "TAG_EXIT":
+                self.state = "NORMAL"
+                self.in_chatml = False
+                self.current_role = ""
+
+            self.buffer = self.buffer[end:]
 
         return "".join(output)
 
     def flush(self) -> str:
         """Release remaining buffer content and perform final cleanup at stream end."""
         res = ""
-        if self.state in ("IN_TOOL", "IN_ORPHAN", "IN_RESP", "IN_HINT", "IN_ARG", "IN_RESULT"):
-            res = ""
-        elif (self.state == "IN_BLOCK" and self.current_role != "tool") or self.state == "NORMAL":
+        if self.state == "NORMAL" or (self.state == "IN_BLOCK" and self.current_role != "tool"):
             res = self.buffer
+            tail_match = STREAM_TAIL_RE.search(res)
+            if tail_match:
+                res = res[: -len(tail_match.group(0))]
 
         self.buffer = ""
         self.state = "NORMAL"
+        self.in_chatml = False
         return strip_system_hints(res)
 
 
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 002d401..e101e17 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -36,42 +36,76 @@
     "CRITICAL: Do NOT mix natural language with protocol tags. Either respond naturally OR provide the protocol block alone. There is no middle ground.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"(?:\[ToolCalls]|\\\[ToolCalls\\])\s*(.*?)\s*(?:\[/ToolCalls]|\\\[\\/ToolCalls\\])",
+    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[\\?/?ToolCalls\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TOOL_CALL_RE = re.compile(
-    r"(?:\[Call:|\\\[Call\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/Call]|\\\[\\/Call\\])",
+    r"\\?\[Call\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\\?/?Call\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"(?:\[ToolResults]|\\\[ToolResults\\])\s*(.*?)\s*(?:\[/ToolResults]|\\\[\\/ToolResults\\])",
+    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[\\?/?ToolResults\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"(?:\[Result:|\\\[Result\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/Result]|\\\[\\/Result\\])",
+    r"\\?\[Result\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\\?/?Result\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"(?:\[CallParameter:|\\\[CallParameter\\:)(?P<name>(?:[^]\\]|\\.)+)(?:]|\\])\s*(?P<body>.*?)\s*(?:\[/CallParameter]|\\\[\\/CallParameter\\])",
+    r"\\?\[CallParameter\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\\?/?CallParameter\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"(?:\[ToolResult]|\\\[ToolResult\\])\s*(.*?)\s*(?:\[/ToolResult]|\\\[\\/ToolResult\\])",
+    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[\\?/?ToolResult\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(
-    r"<\|im_(?:start|end)\|>|\\<\\\|im\\_(?:start|end)\\\|\\>", re.IGNORECASE
-)
-CHATML_START_RE = re.compile(
-    r"(?:<\|im_start\|>|\\<\\\|im\\_start\\\|\\>)\s*(\w+)\s*\n?", re.IGNORECASE
-)
-CHATML_END_RE = re.compile(r"<\|im_end\|>|\\<\\\|im\\_end\\\|\\>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"\\?<\\?\|im\\?_(?:start|end)\\?\|\\?>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"\\?<\\?\|im\\?_start\\?\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"\\?<\\?\|im\\?_end\\?\|\\?>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 PARAM_FENCE_RE = re.compile(r"^(?P<fence>`{3,})")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
 _hint_lines = [line.strip() for line in TOOL_WRAP_HINT.split("\n") if line.strip()]
 TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
 TOOL_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
+TOOL_HINT_START_ESC = re.escape(TOOL_HINT_LINE_START) if TOOL_HINT_LINE_START else ""
+TOOL_HINT_END_ESC = re.escape(TOOL_HINT_LINE_END) if TOOL_HINT_LINE_END else ""
+
+HINT_FULL_RE = (
+    re.compile(rf"\n?{TOOL_HINT_START_ESC}:?.*?{TOOL_HINT_END_ESC}\\.?\n?", re.DOTALL)
+    if TOOL_HINT_START_ESC and TOOL_HINT_END_ESC
+    else None
+)
+HINT_START_RE = re.compile(rf"\n?{TOOL_HINT_START_ESC}:?\s*") if TOOL_HINT_START_ESC else None
+HINT_END_RE = re.compile(rf"\s*{TOOL_HINT_END_ESC}\.?\n?") if TOOL_HINT_END_ESC else None
+
+# --- Streaming Specific Patterns ---
+_START_PATTERNS = {
+    "TOOL": r"\\?\[ToolCalls\\?\]",
+    "ORPHAN": r"\\?\[Call\\?:\s*[^\]\\]+\s*\\?\]",
+    "RESP": r"\\?\[ToolResults\\?\]",
+    "ARG": r"\\?\[CallParameter\\?:\s*[^\]\\]+\s*\\?\]",
+    "RESULT": r"\\?\[ToolResult\\?\]",
+    "ITEM": r"\\?\[Result\\?:\s*[^\]\\]+\s*\\?\]",
+    "TAG": r"\\?<\\?\|im\\?_start\\?\|\\?>",
+}
+
+_PROTOCOL_ENDS = r"\\?\[\\?/(?:ToolCalls|Call|ToolResults|CallParameter|ToolResult|Result)\\?\]"
+_TAG_END = r"\\?<\\?\|im\\?_end\\?\|\\?>"
+
+_master_parts = [f"(?P<{name}_START>{pattern})" for name, pattern in _START_PATTERNS.items()]
+_master_parts.append(f"(?P<PROTOCOL_EXIT>{_PROTOCOL_ENDS})")
+_master_parts.append(f"(?P<TAG_EXIT>{_TAG_END})")
+
+if TOOL_HINT_START_ESC and TOOL_HINT_END_ESC:
+    _START_PATTERNS["HINT"] = rf"\n?{TOOL_HINT_START_ESC}:?\s*"
+    _master_parts.append(f"(?P<HINT_EXIT>{TOOL_HINT_END_ESC}\\.?\n?)")
+
+STREAM_MASTER_RE = re.compile("|".join(_master_parts), re.IGNORECASE)
+STREAM_TAIL_RE = re.compile(
+    r"(?:\\|\\?\[(?:T(?:o?o?l?)?|C(?:a?l?l?)?|R(?:e?s?u?l?t?)?|/)?[\w/:]*|\\?<\??\|?i?m?_?(?:s?t?a?r?t?|e?n?d?)\|?|)$",
+    re.IGNORECASE,
+)
 
 
 def add_tag(role: str, content: str, unclose: bool = False) -> str:
@@ -213,14 +247,12 @@ def strip_system_hints(text: str) -> str:
 
     cleaned = t_unescaped.replace(TOOL_WRAP_HINT, "").replace(TOOL_HINT_STRIPPED, "")
 
-    if TOOL_HINT_LINE_START and TOOL_HINT_LINE_END:
-        pattern = rf"\n?{re.escape(TOOL_HINT_LINE_START)}.*?{re.escape(TOOL_HINT_LINE_END)}\.?\n?"
-        cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL)
-
-    if TOOL_HINT_LINE_START:
-        cleaned = re.sub(rf"\n?{re.escape(TOOL_HINT_LINE_START)}:?\s*", "", cleaned)
-    if TOOL_HINT_LINE_END:
-        cleaned = re.sub(rf"\s*{re.escape(TOOL_HINT_LINE_END)}\.?\n?", "", cleaned)
+    if HINT_FULL_RE:
+        cleaned = HINT_FULL_RE.sub("", cleaned)
+    if HINT_START_RE:
+        cleaned = HINT_START_RE.sub("", cleaned)
+    if HINT_END_RE:
+        cleaned = HINT_END_RE.sub("", cleaned)
 
     cleaned = strip_tagged_blocks(cleaned)
     cleaned = CONTROL_TOKEN_RE.sub("", cleaned)

From 084eda7942a09030bc456f59184799790a7edb88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 10:47:54 +0700
Subject: [PATCH 141/236] Update dependencies to latest versions

---
 pyproject.toml |  6 ++---
 uv.lock        | 60 +++++++++++++++++++++++++-------------------------
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a1ae29d..6599122 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,13 +5,13 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
-    "fastapi>=0.129.0",
+    "fastapi>=0.129.2",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
-    "pydantic-settings[yaml]>=2.13.0",
+    "pydantic-settings[yaml]>=2.13.1",
     "uvicorn>=0.41.0",
     "uvloop>=0.22.1; sys_platform != 'win32'",
 ]
@@ -22,7 +22,7 @@ Repository = "https://github.com/Nativu5/Gemini-FastAPI"
 [project.optional-dependencies]
 dev = [
     "pytest>=9.0.2",
-    "ruff>=0.15.1",
+    "ruff>=0.15.2",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index 4c819e7..4cae2c0 100644
--- a/uv.lock
+++ b/uv.lock
@@ -64,7 +64,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.129.0"
+version = "0.129.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -73,9 +73,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/48/47/75f6bea02e797abff1bca968d5997793898032d9923c1935ae2efdece642/fastapi-0.129.0.tar.gz", hash = "sha256:61315cebd2e65df5f97ec298c888f9de30430dd0612d59d6480beafbc10655af", size = 375450, upload-time = "2026-02-12T13:54:52.541Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fd/cc/1b0d90ed759ff8c9dbc4800de7475d4e9256a81b97b45bd05a1affcb350a/fastapi-0.129.2.tar.gz", hash = "sha256:e2b3637a2b47856e704dbd9a3a09393f6df48e8b9cb6c7a3e26ba44d2053f9ab", size = 368211, upload-time = "2026-02-21T17:25:49.198Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9e/dd/d0ee25348ac58245ee9f90b6f3cbb666bf01f69be7e0911f9851bddbda16/fastapi-0.129.0-py3-none-any.whl", hash = "sha256:b4946880e48f462692b31c083be0432275cbfb6e2274566b1be91479cc1a84ec", size = 102950, upload-time = "2026-02-12T13:54:54.528Z" },
+    { url = "https://files.pythonhosted.org/packages/18/d0/a89a640308016c7fff8d2a47b86cc03ee7cca780b5079d0b69f466f9e1a9/fastapi-0.129.2-py3-none-any.whl", hash = "sha256:e21d9f6e8db376655187905ad0145edd6f6a4e5f2bff241c4efb8a0bffd6a540", size = 103227, upload-time = "2026-02-21T17:25:47.745Z" },
 ]
 
 [[package]]
@@ -107,15 +107,15 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.129.0" },
+    { name = "fastapi", specifier = ">=0.129.2" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
-    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.0" },
+    { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.1" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.2" },
     { name = "uvicorn", specifier = ">=0.41.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
@@ -356,16 +356,16 @@ wheels = [
 
 [[package]]
 name = "pydantic-settings"
-version = "2.13.0"
+version = "2.13.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/96/a1/ae859ffac5a3338a66b74c5e29e244fd3a3cc483c89feaf9f56c39898d75/pydantic_settings-2.13.0.tar.gz", hash = "sha256:95d875514610e8595672800a5c40b073e99e4aae467fa7c8f9c263061ea2e1fe", size = 222450, upload-time = "2026-02-15T12:11:23.476Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b0/1a/dd1b9d7e627486cf8e7523d09b70010e05a4bc41414f4ae6ce184cf0afb6/pydantic_settings-2.13.0-py3-none-any.whl", hash = "sha256:d67b576fff39cd086b595441bf9c75d4193ca9c0ed643b90360694d0f1240246", size = 58429, upload-time = "2026-02-15T12:11:22.133Z" },
+    { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" },
 ]
 
 [package.optional-dependencies]
@@ -427,27 +427,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/04/dc/4e6ac71b511b141cf626357a3946679abeba4cf67bc7cc5a17920f31e10d/ruff-0.15.1.tar.gz", hash = "sha256:c590fe13fb57c97141ae975c03a1aedb3d3156030cabd740d6ff0b0d601e203f", size = 4540855, upload-time = "2026-02-12T23:09:09.998Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/23/bf/e6e4324238c17f9d9120a9d60aa99a7daaa21204c07fcd84e2ef03bb5fd1/ruff-0.15.1-py3-none-linux_armv6l.whl", hash = "sha256:b101ed7cf4615bda6ffe65bdb59f964e9f4a0d3f85cbf0e54f0ab76d7b90228a", size = 10367819, upload-time = "2026-02-12T23:09:03.598Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/ea/c8f89d32e7912269d38c58f3649e453ac32c528f93bb7f4219258be2e7ed/ruff-0.15.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:939c995e9277e63ea632cc8d3fae17aa758526f49a9a850d2e7e758bfef46602", size = 10798618, upload-time = "2026-02-12T23:09:22.928Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/0f/1d0d88bc862624247d82c20c10d4c0f6bb2f346559d8af281674cf327f15/ruff-0.15.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1d83466455fdefe60b8d9c8df81d3c1bbb2115cede53549d3b522ce2bc703899", size = 10148518, upload-time = "2026-02-12T23:08:58.339Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/c8/291c49cefaa4a9248e986256df2ade7add79388fe179e0691be06fae6f37/ruff-0.15.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9457e3c3291024866222b96108ab2d8265b477e5b1534c7ddb1810904858d16", size = 10518811, upload-time = "2026-02-12T23:09:31.865Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/1a/f5707440e5ae43ffa5365cac8bbb91e9665f4a883f560893829cf16a606b/ruff-0.15.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:92c92b003e9d4f7fbd33b1867bb15a1b785b1735069108dfc23821ba045b29bc", size = 10196169, upload-time = "2026-02-12T23:09:17.306Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/ff/26ddc8c4da04c8fd3ee65a89c9fb99eaa5c30394269d424461467be2271f/ruff-0.15.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fe5c41ab43e3a06778844c586251eb5a510f67125427625f9eb2b9526535779", size = 10990491, upload-time = "2026-02-12T23:09:25.503Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/00/50920cb385b89413f7cdb4bb9bc8fc59c1b0f30028d8bccc294189a54955/ruff-0.15.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66a6dd6df4d80dc382c6484f8ce1bcceb55c32e9f27a8b94c32f6c7331bf14fb", size = 11843280, upload-time = "2026-02-12T23:09:19.88Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/6d/2f5cad8380caf5632a15460c323ae326f1e1a2b5b90a6ee7519017a017ca/ruff-0.15.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a4a42cbb8af0bda9bcd7606b064d7c0bc311a88d141d02f78920be6acb5aa83", size = 11274336, upload-time = "2026-02-12T23:09:14.907Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/1d/5f56cae1d6c40b8a318513599b35ea4b075d7dc1cd1d04449578c29d1d75/ruff-0.15.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab064052c31dddada35079901592dfba2e05f5b1e43af3954aafcbc1096a5b2", size = 11137288, upload-time = "2026-02-12T23:09:07.475Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/20/6f8d7d8f768c93b0382b33b9306b3b999918816da46537d5a61635514635/ruff-0.15.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:5631c940fe9fe91f817a4c2ea4e81f47bee3ca4aa646134a24374f3c19ad9454", size = 11070681, upload-time = "2026-02-12T23:08:55.43Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/67/d640ac76069f64cdea59dba02af2e00b1fa30e2103c7f8d049c0cff4cafd/ruff-0.15.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:68138a4ba184b4691ccdc39f7795c66b3c68160c586519e7e8444cf5a53e1b4c", size = 10486401, upload-time = "2026-02-12T23:09:27.927Z" },
-    { url = "https://files.pythonhosted.org/packages/65/3d/e1429f64a3ff89297497916b88c32a5cc88eeca7e9c787072d0e7f1d3e1e/ruff-0.15.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:518f9af03bfc33c03bdb4cb63fabc935341bb7f54af500f92ac309ecfbba6330", size = 10197452, upload-time = "2026-02-12T23:09:12.147Z" },
-    { url = "https://files.pythonhosted.org/packages/78/83/e2c3bade17dad63bf1e1c2ffaf11490603b760be149e1419b07049b36ef2/ruff-0.15.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:da79f4d6a826caaea95de0237a67e33b81e6ec2e25fc7e1993a4015dffca7c61", size = 10693900, upload-time = "2026-02-12T23:09:34.418Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/27/fdc0e11a813e6338e0706e8b39bb7a1d61ea5b36873b351acee7e524a72a/ruff-0.15.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3dd86dccb83cd7d4dcfac303ffc277e6048600dfc22e38158afa208e8bf94a1f", size = 11227302, upload-time = "2026-02-12T23:09:36.536Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/58/ac864a75067dcbd3b95be5ab4eb2b601d7fbc3d3d736a27e391a4f92a5c1/ruff-0.15.1-py3-none-win32.whl", hash = "sha256:660975d9cb49b5d5278b12b03bb9951d554543a90b74ed5d366b20e2c57c2098", size = 10462555, upload-time = "2026-02-12T23:09:29.899Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/5e/d4ccc8a27ecdb78116feac4935dfc39d1304536f4296168f91ed3ec00cd2/ruff-0.15.1-py3-none-win_amd64.whl", hash = "sha256:c820fef9dd5d4172a6570e5721704a96c6679b80cf7be41659ed439653f62336", size = 11599956, upload-time = "2026-02-12T23:09:01.157Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/07/5bda6a85b220c64c65686bc85bd0bbb23b29c62b3a9f9433fa55f17cda93/ruff-0.15.1-py3-none-win_arm64.whl", hash = "sha256:5ff7d5f0f88567850f45081fac8f4ec212be8d0b963e385c3f7d0d2eb4899416", size = 10874604, upload-time = "2026-02-12T23:09:05.515Z" },
+version = "0.15.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/06/04/eab13a954e763b0606f460443fcbf6bb5a0faf06890ea3754ff16523dce5/ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342", size = 4558148, upload-time = "2026-02-19T22:32:20.271Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/70/3a4dc6d09b13cb3e695f28307e5d889b2e1a66b7af9c5e257e796695b0e6/ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d", size = 10430565, upload-time = "2026-02-19T22:32:41.824Z" },
+    { url = "https://files.pythonhosted.org/packages/71/0b/bb8457b56185ece1305c666dc895832946d24055be90692381c31d57466d/ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e", size = 10820354, upload-time = "2026-02-19T22:32:07.366Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/c1/e0532d7f9c9e0b14c46f61b14afd563298b8b83f337b6789ddd987e46121/ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87", size = 10170767, upload-time = "2026-02-19T22:32:13.188Z" },
+    { url = "https://files.pythonhosted.org/packages/47/e8/da1aa341d3af017a21c7a62fb5ec31d4e7ad0a93ab80e3a508316efbcb23/ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9", size = 10529591, upload-time = "2026-02-19T22:32:02.547Z" },
+    { url = "https://files.pythonhosted.org/packages/93/74/184fbf38e9f3510231fbc5e437e808f0b48c42d1df9434b208821efcd8d6/ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80", size = 10260771, upload-time = "2026-02-19T22:32:36.938Z" },
+    { url = "https://files.pythonhosted.org/packages/05/ac/605c20b8e059a0bc4b42360414baa4892ff278cec1c91fff4be0dceedefd/ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f", size = 11045791, upload-time = "2026-02-19T22:32:31.642Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/52/db6e419908f45a894924d410ac77d64bdd98ff86901d833364251bd08e22/ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77", size = 11879271, upload-time = "2026-02-19T22:32:29.305Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d8/7992b18f2008bdc9231d0f10b16df7dda964dbf639e2b8b4c1b4e91b83af/ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea", size = 11303707, upload-time = "2026-02-19T22:32:22.492Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/02/849b46184bcfdd4b64cde61752cc9a146c54759ed036edd11857e9b8443b/ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a", size = 11149151, upload-time = "2026-02-19T22:32:44.234Z" },
+    { url = "https://files.pythonhosted.org/packages/70/04/f5284e388bab60d1d3b99614a5a9aeb03e0f333847e2429bebd2aaa1feec/ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956", size = 11091132, upload-time = "2026-02-19T22:32:24.691Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/ae/88d844a21110e14d92cf73d57363fab59b727ebeabe78009b9ccb23500af/ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4", size = 10504717, upload-time = "2026-02-19T22:32:26.75Z" },
+    { url = "https://files.pythonhosted.org/packages/64/27/867076a6ada7f2b9c8292884ab44d08fd2ba71bd2b5364d4136f3cd537e1/ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de", size = 10263122, upload-time = "2026-02-19T22:32:10.036Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/ef/faf9321d550f8ebf0c6373696e70d1758e20ccdc3951ad7af00c0956be7c/ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c", size = 10735295, upload-time = "2026-02-19T22:32:39.227Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/55/e8089fec62e050ba84d71b70e7834b97709ca9b7aba10c1a0b196e493f97/ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8", size = 11241641, upload-time = "2026-02-19T22:32:34.617Z" },
+    { url = "https://files.pythonhosted.org/packages/23/01/1c30526460f4d23222d0fabd5888868262fd0e2b71a00570ca26483cd993/ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f", size = 10507885, upload-time = "2026-02-19T22:32:15.635Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/10/3d18e3bbdf8fc50bbb4ac3cc45970aa5a9753c5cb51bf9ed9a3cd8b79fa3/ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5", size = 11623725, upload-time = "2026-02-19T22:32:04.947Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" },
 ]
 
 [[package]]

From bdf80282cc9f51cfe8894bd2117f6ba1408f2fff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 10:50:46 +0700
Subject: [PATCH 142/236] Refactor StreamingOutputFilter to use a stack-based
 state machine for improved handling of nested fragmented markers

---
 app/server/chat.py | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index b7c0564..a8d07cc 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -751,14 +751,21 @@ async def _send_with_split(
 class StreamingOutputFilter:
     """
     Filter to suppress technical protocol markers, tool calls, and system hints from the stream.
-    Uses a high-performance regex state machine to handle fragmented markers.
+    Uses a stack-based state machine to handle nested fragmented markers.
     """
 
     def __init__(self):
         self.buffer = ""
-        self.state = "NORMAL"
+        self.stack = ["NORMAL"]
         self.current_role = ""
-        self.in_chatml = False
+
+    @property
+    def state(self):
+        return self.stack[-1]
+
+    def _is_outputting(self) -> bool:
+        """Determines if the current state allows yielding text to the stream."""
+        return self.state == "NORMAL" or (self.state == "IN_BLOCK" and self.current_role != "tool")
 
     def process(self, chunk: str) -> str:
         self.buffer += chunk
@@ -770,8 +777,7 @@ def process(self, chunk: str) -> str:
                 if nl_idx != -1:
                     self.current_role = self.buffer[:nl_idx].strip().lower()
                     self.buffer = self.buffer[nl_idx + 1 :]
-                    self.state = "IN_BLOCK"
-                    self.in_chatml = True
+                    self.stack[-1] = "IN_BLOCK"
                     continue
                 else:
                     break
@@ -782,9 +788,7 @@ def process(self, chunk: str) -> str:
                 keep_len = len(tail_match.group(0)) if tail_match else 0
                 yield_len = len(self.buffer) - keep_len
                 if yield_len > 0:
-                    if self.state == "NORMAL" or (
-                        self.state == "IN_BLOCK" and self.current_role != "tool"
-                    ):
+                    if self._is_outputting():
                         output.append(self.buffer[:yield_len])
                     self.buffer = self.buffer[yield_len:]
                 break
@@ -793,21 +797,23 @@ def process(self, chunk: str) -> str:
             matched_group = match.lastgroup
             pre_text = self.buffer[:start]
 
-            if self.state == "NORMAL" or (self.state == "IN_BLOCK" and self.current_role != "tool"):
+            if self._is_outputting():
                 output.append(pre_text)
 
             if matched_group.endswith("_START"):
                 m_type = matched_group.split("_")[0]
                 if m_type == "TAG":
-                    self.state = "IN_TAG_HEADER"
+                    self.stack.append("IN_TAG_HEADER")
+                else:
+                    self.stack.append(f"IN_{m_type}")
+            elif matched_group in ("PROTOCOL_EXIT", "TAG_EXIT", "HINT_EXIT"):
+                if len(self.stack) > 1:
+                    self.stack.pop()
                 else:
-                    self.state = f"IN_{m_type}"
-            elif matched_group in ("PROTOCOL_EXIT", "HINT_EXIT"):
-                self.state = "IN_BLOCK" if self.in_chatml else "NORMAL"
-            elif matched_group == "TAG_EXIT":
-                self.state = "NORMAL"
-                self.in_chatml = False
-                self.current_role = ""
+                    self.stack = ["NORMAL"]
+
+                if self.state == "NORMAL":
+                    self.current_role = ""
 
             self.buffer = self.buffer[end:]
 
@@ -816,15 +822,15 @@ def process(self, chunk: str) -> str:
     def flush(self) -> str:
         """Release remaining buffer content and perform final cleanup at stream end."""
         res = ""
-        if self.state == "NORMAL" or (self.state == "IN_BLOCK" and self.current_role != "tool"):
+        if self._is_outputting():
             res = self.buffer
             tail_match = STREAM_TAIL_RE.search(res)
             if tail_match:
                 res = res[: -len(tail_match.group(0))]
 
         self.buffer = ""
-        self.state = "NORMAL"
-        self.in_chatml = False
+        self.stack = ["NORMAL"]
+        self.current_role = ""
         return strip_system_hints(res)
 
 

From 2e4a97f364849fe752316818ecdd6b795b44511c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 11:12:39 +0700
Subject: [PATCH 143/236] Refactor `StreamingOutputFilter` for improved
 handling of nested fragmented markers

---
 app/utils/helper.py | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index e101e17..4c217c3 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -36,32 +36,36 @@
     "CRITICAL: Do NOT mix natural language with protocol tags. Either respond naturally OR provide the protocol block alone. There is no middle ground.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\\?\[ToolCalls\\?]\s*(.*?)\s*\\?\[\\?/?ToolCalls\\?]",
+    r"\\?\[\s*ToolCalls\s*\\?]\s*(.*?)\s*\\?\[\s*\\?/\s*ToolCalls\s*\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TOOL_CALL_RE = re.compile(
-    r"\\?\[Call\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\\?/?Call\\?]",
+    r"\\?\[\s*Call\s*\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\s*\\?/\s*Call\s*\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[ToolResults\\?]\s*(.*?)\s*\\?\[\\?/?ToolResults\\?]",
+    r"\\?\[\s*ToolResults\s*\\?]\s*(.*?)\s*\\?\[\s*\\?/\s*ToolResults\s*\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[Result\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\\?/?Result\\?]",
+    r"\\?\[\s*Result\s*\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\s*\\?/\s*Result\s*\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"\\?\[CallParameter\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\\?/?CallParameter\\?]",
+    r"\\?\[\s*CallParameter\s*\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\s*\\?/\s*CallParameter\s*\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\\?\[ToolResult\\?]\s*(.*?)\s*\\?\[\\?/?ToolResult\\?]",
+    r"\\?\[\s*ToolResult\s*\\?]\s*(.*?)\s*\\?\[\s*\\?/\s*ToolResult\s*\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(r"\\?<\\?\|im\\?_(?:start|end)\\?\|\\?>", re.IGNORECASE)
-CHATML_START_RE = re.compile(r"\\?<\\?\|im\\?_start\\?\|\\?>\s*(\w+)\s*\n?", re.IGNORECASE)
-CHATML_END_RE = re.compile(r"\\?<\\?\|im\\?_end\\?\|\\?>", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(
+    r"\\?\s*<\s*\\?\|\s*im\s*\\?_(?:start|end)\s*\\?\|\s*>\s*", re.IGNORECASE
+)
+CHATML_START_RE = re.compile(
+    r"\\?\s*<\s*\\?\|\s*im\s*\\?_start\s*\\?\|\s*>\s*(\w+)\s*\n?", re.IGNORECASE
+)
+CHATML_END_RE = re.compile(r"\\?\s*<\s*\\?\|\s*im\s*\\?_end\s*\\?\|\s*>\s*", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 PARAM_FENCE_RE = re.compile(r"^(?P<fence>`{3,})")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
@@ -81,17 +85,19 @@
 
 # --- Streaming Specific Patterns ---
 _START_PATTERNS = {
-    "TOOL": r"\\?\[ToolCalls\\?\]",
-    "ORPHAN": r"\\?\[Call\\?:\s*[^\]\\]+\s*\\?\]",
-    "RESP": r"\\?\[ToolResults\\?\]",
-    "ARG": r"\\?\[CallParameter\\?:\s*[^\]\\]+\s*\\?\]",
-    "RESULT": r"\\?\[ToolResult\\?\]",
-    "ITEM": r"\\?\[Result\\?:\s*[^\]\\]+\s*\\?\]",
-    "TAG": r"\\?<\\?\|im\\?_start\\?\|\\?>",
+    "TOOL": r"\\?\[\s*ToolCalls\s*\\?\]",
+    "ORPHAN": r"\\?\[\s*Call\s*\\?:\s*(?:[^\]\\]|\\.)+\s*\\?\]",
+    "RESP": r"\\?\[\s*ToolResults\s*\\?\]",
+    "ARG": r"\\?\[\s*CallParameter\s*\\?:\s*(?:[^\]\\]|\\.)+\s*\\?\]",
+    "RESULT": r"\\?\[\s*ToolResult\s*\\?\]",
+    "ITEM": r"\\?\[\s*Result\s*\\?:\s*(?:[^\]\\]|\\.)+\s*\\?\]",
+    "TAG": r"\\?\s*<\s*\\?\|\s*im\s*\\?_start\s*\\?\|\s*>",
 }
 
-_PROTOCOL_ENDS = r"\\?\[\\?/(?:ToolCalls|Call|ToolResults|CallParameter|ToolResult|Result)\\?\]"
-_TAG_END = r"\\?<\\?\|im\\?_end\\?\|\\?>"
+_PROTOCOL_ENDS = (
+    r"\\?\[\s*\\?/\s*(?:ToolCalls|Call|ToolResults|CallParameter|ToolResult|Result)\s*\\?\]"
+)
+_TAG_END = r"\\?\s*<\s*\\?\|\s*im\s*\\?_end\s*\\?\|\s*>"
 
 _master_parts = [f"(?P<{name}_START>{pattern})" for name, pattern in _START_PATTERNS.items()]
 _master_parts.append(f"(?P<PROTOCOL_EXIT>{_PROTOCOL_ENDS})")
@@ -103,7 +109,7 @@
 
 STREAM_MASTER_RE = re.compile("|".join(_master_parts), re.IGNORECASE)
 STREAM_TAIL_RE = re.compile(
-    r"(?:\\|\\?\[(?:T(?:o?o?l?)?|C(?:a?l?l?)?|R(?:e?s?u?l?t?)?|/)?[\w/:]*|\\?<\??\|?i?m?_?(?:s?t?a?r?t?|e?n?d?)\|?|)$",
+    r"(?:\\|\\?\[[TCRP/]?\s*[^]]*|\\?\s*<\s*\\?\|?\s*i?\s*m?\s*\\?_?(?:s?t?a?r?t?|e?n?d?)\s*\\?\|?\s*>?|)$",
     re.IGNORECASE,
 )
 

From c4d9016024ee2da4efdde9778b0bfd94275a3491 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 12:32:14 +0700
Subject: [PATCH 144/236] Refactor `StreamingOutputFilter` for improved
 handling of nested fragmented markers

---
 app/utils/helper.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 4c217c3..9f07bf7 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -76,12 +76,16 @@
 TOOL_HINT_END_ESC = re.escape(TOOL_HINT_LINE_END) if TOOL_HINT_LINE_END else ""
 
 HINT_FULL_RE = (
-    re.compile(rf"\n?{TOOL_HINT_START_ESC}:?.*?{TOOL_HINT_END_ESC}\\.?\n?", re.DOTALL)
+    re.compile(rf"\n?{TOOL_HINT_START_ESC}:?.*?{TOOL_HINT_END_ESC}\n?", re.DOTALL | re.IGNORECASE)
     if TOOL_HINT_START_ESC and TOOL_HINT_END_ESC
     else None
 )
-HINT_START_RE = re.compile(rf"\n?{TOOL_HINT_START_ESC}:?\s*") if TOOL_HINT_START_ESC else None
-HINT_END_RE = re.compile(rf"\s*{TOOL_HINT_END_ESC}\.?\n?") if TOOL_HINT_END_ESC else None
+HINT_START_RE = (
+    re.compile(rf"\n?{TOOL_HINT_START_ESC}:?\s*", re.IGNORECASE) if TOOL_HINT_START_ESC else None
+)
+HINT_END_RE = (
+    re.compile(rf"\s*{TOOL_HINT_END_ESC}\n?", re.IGNORECASE) if TOOL_HINT_END_ESC else None
+)
 
 # --- Streaming Specific Patterns ---
 _START_PATTERNS = {
@@ -99,13 +103,15 @@
 )
 _TAG_END = r"\\?\s*<\s*\\?\|\s*im\s*\\?_end\s*\\?\|\s*>"
 
+if TOOL_HINT_START_ESC and TOOL_HINT_END_ESC:
+    _START_PATTERNS["HINT"] = rf"\n?{TOOL_HINT_START_ESC}:?\s*"
+
 _master_parts = [f"(?P<{name}_START>{pattern})" for name, pattern in _START_PATTERNS.items()]
 _master_parts.append(f"(?P<PROTOCOL_EXIT>{_PROTOCOL_ENDS})")
 _master_parts.append(f"(?P<TAG_EXIT>{_TAG_END})")
 
 if TOOL_HINT_START_ESC and TOOL_HINT_END_ESC:
-    _START_PATTERNS["HINT"] = rf"\n?{TOOL_HINT_START_ESC}:?\s*"
-    _master_parts.append(f"(?P<HINT_EXIT>{TOOL_HINT_END_ESC}\\.?\n?)")
+    _master_parts.append(f"(?P<HINT_EXIT>{TOOL_HINT_END_ESC}\n?)")
 
 STREAM_MASTER_RE = re.compile("|".join(_master_parts), re.IGNORECASE)
 STREAM_TAIL_RE = re.compile(

From 1da4daccc1bf5f9e96d30d40625461351e723097 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 13:24:11 +0700
Subject: [PATCH 145/236] Fix fence stripping logic in _strip_param_fences
 function

---
 app/utils/helper.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 9f07bf7..e4fb498 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -164,12 +164,12 @@ def _strip_param_fences(s: str) -> str:
     if not match or not s.endswith(match.group("fence")):
         return s
 
+    fence = match.group("fence")
     lines = s.splitlines()
-    if len(lines) >= 2:
+    if len(lines) >= 3 and lines[-1].strip() == fence:
         return "\n".join(lines[1:-1])
 
-    n = len(match.group("fence"))
-    return s[n:-n].strip()
+    return s[len(fence) : -len(fence)].strip()
 
 
 def estimate_tokens(text: str | None) -> int:

From ef058d45636829255ecde88486a6c3e280fd5557 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 16:18:58 +0700
Subject: [PATCH 146/236] Refactor think tag removal to use a precompiled regex
 pattern

---
 app/services/lmdb.py | 4 ++--
 app/utils/helper.py  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 87a1449..8c4edb4 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -1,5 +1,4 @@
 import hashlib
-import re
 import string
 from contextlib import contextmanager
 from datetime import datetime, timedelta
@@ -13,6 +12,7 @@
 from app.models import ContentItem, ConversationInStore, Message
 from app.utils import g_config
 from app.utils.helper import (
+    THINK_TAGS_RE,
     extract_tool_calls,
     normalize_llm_text,
     remove_tool_call_blocks,
@@ -589,7 +589,7 @@ def remove_think_tags(text: str) -> str:
         """Remove all <think>...</think> tags and strip whitespace."""
         if not text:
             return text
-        cleaned_content = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
+        cleaned_content = THINK_TAGS_RE.sub("", text)
         return cleaned_content.strip()
 
     @staticmethod
diff --git a/app/utils/helper.py b/app/utils/helper.py
index e4fb498..2e9e801 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -86,6 +86,7 @@
 HINT_END_RE = (
     re.compile(rf"\s*{TOOL_HINT_END_ESC}\n?", re.IGNORECASE) if TOOL_HINT_END_ESC else None
 )
+THINK_TAGS_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
 
 # --- Streaming Specific Patterns ---
 _START_PATTERNS = {

From 906380f79defe9d4c09aab4dfa24296b2b98d2b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 20:33:51 +0700
Subject: [PATCH 147/236] Refactor HTTP client usage to utilize AsyncSession
 from curl-cffi for improved performance

---
 app/utils/helper.py |  4 ++--
 pyproject.toml      |  1 +
 uv.lock             | 57 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2e9e801..9a930b7 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -10,8 +10,8 @@
 from pathlib import Path
 from urllib.parse import urlparse
 
-import httpx
 import orjson
+from curl_cffi.requests import AsyncSession
 from loguru import logger
 
 from app.models import FunctionCall, Message, ToolCall
@@ -202,7 +202,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         data = base64.b64decode(url.split(",")[1])
         suffix = mimetypes.guess_extension(mime_type) or f".{mime_type.split('/')[1]}"
     else:
-        async with httpx.AsyncClient(follow_redirects=True) as client:
+        async with AsyncSession(impersonate="chrome", allow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
diff --git a/pyproject.toml b/pyproject.toml
index 6599122..6802f51 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,6 +5,7 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
+    "curl-cffi>=0.14.0",
     "fastapi>=0.129.2",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
diff --git a/uv.lock b/uv.lock
index 4cae2c0..c7aa914 100644
--- a/uv.lock
+++ b/uv.lock
@@ -41,6 +41,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
+]
+
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -62,6 +85,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
 ]
 
+[[package]]
+name = "curl-cffi"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "cffi" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9b/c9/0067d9a25ed4592b022d4558157fcdb6e123516083700786d38091688767/curl_cffi-0.14.0.tar.gz", hash = "sha256:5ffbc82e59f05008ec08ea432f0e535418823cda44178ee518906a54f27a5f0f", size = 162633, upload-time = "2025-12-16T03:25:07.931Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/aa/f0/0f21e9688eaac85e705537b3a87a5588d0cefb2f09d83e83e0e8be93aa99/curl_cffi-0.14.0-cp39-abi3-macosx_14_0_arm64.whl", hash = "sha256:e35e89c6a69872f9749d6d5fda642ed4fc159619329e99d577d0104c9aad5893", size = 3087277, upload-time = "2025-12-16T03:24:49.607Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/a3/0419bd48fce5b145cb6a2344c6ac17efa588f5b0061f212c88e0723da026/curl_cffi-0.14.0-cp39-abi3-macosx_15_0_x86_64.whl", hash = "sha256:5945478cd28ad7dfb5c54473bcfb6743ee1d66554d57951fdf8fc0e7d8cf4e45", size = 5804650, upload-time = "2025-12-16T03:24:51.518Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/07/a238dd062b7841b8caa2fa8a359eb997147ff3161288f0dd46654d898b4d/curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c42e8fa3c667db9ccd2e696ee47adcd3cd5b0838d7282f3fc45f6c0ef3cfdfa7", size = 8231918, upload-time = "2025-12-16T03:24:52.862Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/d2/ce907c9b37b5caf76ac08db40cc4ce3d9f94c5500db68a195af3513eacbc/curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:060fe2c99c41d3cb7f894de318ddf4b0301b08dca70453d769bd4e74b36b8483", size = 8654624, upload-time = "2025-12-16T03:24:54.579Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/ae/6256995b18c75e6ef76b30753a5109e786813aa79088b27c8eabb1ef85c9/curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b158c41a25388690dd0d40b5bc38d1e0f512135f17fdb8029868cbc1993d2e5b", size = 8010654, upload-time = "2025-12-16T03:24:56.507Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/10/ff64249e516b103cb762e0a9dca3ee0f04cf25e2a1d5d9838e0f1273d071/curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_i686.whl", hash = "sha256:1439fbef3500fb723333c826adf0efb0e2e5065a703fb5eccce637a2250db34a", size = 7781969, upload-time = "2025-12-16T03:24:57.885Z" },
+    { url = "https://files.pythonhosted.org/packages/51/76/d6f7bb76c2d12811aa7ff16f5e17b678abdd1b357b9a8ac56310ceccabd5/curl_cffi-0.14.0-cp39-abi3-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7176f2c2d22b542e3cf261072a81deb018cfa7688930f95dddef215caddb469", size = 7969133, upload-time = "2025-12-16T03:24:59.261Z" },
+    { url = "https://files.pythonhosted.org/packages/23/7c/cca39c0ed4e1772613d3cba13091c0e9d3b89365e84b9bf9838259a3cd8f/curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:03f21ade2d72978c2bb8670e9b6de5260e2755092b02d94b70b906813662998d", size = 9080167, upload-time = "2025-12-16T03:25:00.946Z" },
+    { url = "https://files.pythonhosted.org/packages/75/03/a942d7119d3e8911094d157598ae0169b1c6ca1bd3f27d7991b279bcc45b/curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:58ebf02de64ee5c95613209ddacb014c2d2f86298d7080c0a1c12ed876ee0690", size = 9520464, upload-time = "2025-12-16T03:25:02.922Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/77/78900e9b0833066d2274bda75cba426fdb4cef7fbf6a4f6a6ca447607bec/curl_cffi-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:6e503f9a103f6ae7acfb3890c843b53ec030785a22ae7682a22cc43afb94123e", size = 1677416, upload-time = "2025-12-16T03:25:04.902Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/7c/d2ba86b0b3e1e2830bd94163d047de122c69a8df03c5c7c36326c456ad82/curl_cffi-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:2eed50a969201605c863c4c31269dfc3e0da52916086ac54553cfa353022425c", size = 1425067, upload-time = "2025-12-16T03:25:06.454Z" },
+]
+
 [[package]]
 name = "fastapi"
 version = "0.129.2"
@@ -83,6 +129,7 @@ name = "gemini-fastapi"
 version = "1.0.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "curl-cffi" },
     { name = "fastapi" },
     { name = "gemini-webapi" },
     { name = "httptools" },
@@ -107,6 +154,7 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.129.2" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
@@ -314,6 +362,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "pycparser"
+version = "3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.5"

From 9ecb982893c483397b9c74e345b1e977b1bee097 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 21:28:32 +0700
Subject: [PATCH 148/236] Revert "Refactor HTTP client usage to utilize
 AsyncSession from curl-cffi for improved performance"

This reverts commit 906380f79defe9d4c09aab4dfa24296b2b98d2b8.
---
 app/utils/helper.py |  4 ++--
 pyproject.toml      |  1 -
 uv.lock             | 57 ---------------------------------------------
 3 files changed, 2 insertions(+), 60 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 9a930b7..2e9e801 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -10,8 +10,8 @@
 from pathlib import Path
 from urllib.parse import urlparse
 
+import httpx
 import orjson
-from curl_cffi.requests import AsyncSession
 from loguru import logger
 
 from app.models import FunctionCall, Message, ToolCall
@@ -202,7 +202,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         data = base64.b64decode(url.split(",")[1])
         suffix = mimetypes.guess_extension(mime_type) or f".{mime_type.split('/')[1]}"
     else:
-        async with AsyncSession(impersonate="chrome", allow_redirects=True) as client:
+        async with httpx.AsyncClient(follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
diff --git a/pyproject.toml b/pyproject.toml
index 6802f51..6599122 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,6 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
-    "curl-cffi>=0.14.0",
     "fastapi>=0.129.2",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
diff --git a/uv.lock b/uv.lock
index c7aa914..4cae2c0 100644
--- a/uv.lock
+++ b/uv.lock
@@ -41,29 +41,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
-[[package]]
-name = "cffi"
-version = "2.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
-    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
-    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
-    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
-    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
-]
-
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -85,29 +62,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
 ]
 
-[[package]]
-name = "curl-cffi"
-version = "0.14.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "certifi" },
-    { name = "cffi" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/9b/c9/0067d9a25ed4592b022d4558157fcdb6e123516083700786d38091688767/curl_cffi-0.14.0.tar.gz", hash = "sha256:5ffbc82e59f05008ec08ea432f0e535418823cda44178ee518906a54f27a5f0f", size = 162633, upload-time = "2025-12-16T03:25:07.931Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/aa/f0/0f21e9688eaac85e705537b3a87a5588d0cefb2f09d83e83e0e8be93aa99/curl_cffi-0.14.0-cp39-abi3-macosx_14_0_arm64.whl", hash = "sha256:e35e89c6a69872f9749d6d5fda642ed4fc159619329e99d577d0104c9aad5893", size = 3087277, upload-time = "2025-12-16T03:24:49.607Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/a3/0419bd48fce5b145cb6a2344c6ac17efa588f5b0061f212c88e0723da026/curl_cffi-0.14.0-cp39-abi3-macosx_15_0_x86_64.whl", hash = "sha256:5945478cd28ad7dfb5c54473bcfb6743ee1d66554d57951fdf8fc0e7d8cf4e45", size = 5804650, upload-time = "2025-12-16T03:24:51.518Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/07/a238dd062b7841b8caa2fa8a359eb997147ff3161288f0dd46654d898b4d/curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c42e8fa3c667db9ccd2e696ee47adcd3cd5b0838d7282f3fc45f6c0ef3cfdfa7", size = 8231918, upload-time = "2025-12-16T03:24:52.862Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/d2/ce907c9b37b5caf76ac08db40cc4ce3d9f94c5500db68a195af3513eacbc/curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:060fe2c99c41d3cb7f894de318ddf4b0301b08dca70453d769bd4e74b36b8483", size = 8654624, upload-time = "2025-12-16T03:24:54.579Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/ae/6256995b18c75e6ef76b30753a5109e786813aa79088b27c8eabb1ef85c9/curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b158c41a25388690dd0d40b5bc38d1e0f512135f17fdb8029868cbc1993d2e5b", size = 8010654, upload-time = "2025-12-16T03:24:56.507Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/10/ff64249e516b103cb762e0a9dca3ee0f04cf25e2a1d5d9838e0f1273d071/curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_i686.whl", hash = "sha256:1439fbef3500fb723333c826adf0efb0e2e5065a703fb5eccce637a2250db34a", size = 7781969, upload-time = "2025-12-16T03:24:57.885Z" },
-    { url = "https://files.pythonhosted.org/packages/51/76/d6f7bb76c2d12811aa7ff16f5e17b678abdd1b357b9a8ac56310ceccabd5/curl_cffi-0.14.0-cp39-abi3-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7176f2c2d22b542e3cf261072a81deb018cfa7688930f95dddef215caddb469", size = 7969133, upload-time = "2025-12-16T03:24:59.261Z" },
-    { url = "https://files.pythonhosted.org/packages/23/7c/cca39c0ed4e1772613d3cba13091c0e9d3b89365e84b9bf9838259a3cd8f/curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:03f21ade2d72978c2bb8670e9b6de5260e2755092b02d94b70b906813662998d", size = 9080167, upload-time = "2025-12-16T03:25:00.946Z" },
-    { url = "https://files.pythonhosted.org/packages/75/03/a942d7119d3e8911094d157598ae0169b1c6ca1bd3f27d7991b279bcc45b/curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:58ebf02de64ee5c95613209ddacb014c2d2f86298d7080c0a1c12ed876ee0690", size = 9520464, upload-time = "2025-12-16T03:25:02.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/77/78900e9b0833066d2274bda75cba426fdb4cef7fbf6a4f6a6ca447607bec/curl_cffi-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:6e503f9a103f6ae7acfb3890c843b53ec030785a22ae7682a22cc43afb94123e", size = 1677416, upload-time = "2025-12-16T03:25:04.902Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/7c/d2ba86b0b3e1e2830bd94163d047de122c69a8df03c5c7c36326c456ad82/curl_cffi-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:2eed50a969201605c863c4c31269dfc3e0da52916086ac54553cfa353022425c", size = 1425067, upload-time = "2025-12-16T03:25:06.454Z" },
-]
-
 [[package]]
 name = "fastapi"
 version = "0.129.2"
@@ -129,7 +83,6 @@ name = "gemini-fastapi"
 version = "1.0.0"
 source = { virtual = "." }
 dependencies = [
-    { name = "curl-cffi" },
     { name = "fastapi" },
     { name = "gemini-webapi" },
     { name = "httptools" },
@@ -154,7 +107,6 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.129.2" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
@@ -362,15 +314,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
-[[package]]
-name = "pycparser"
-version = "3.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
-]
-
 [[package]]
 name = "pydantic"
 version = "2.12.5"

From 6c5cabd306e7e66f1c7a7bc9bf12c3d18df68a1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 22 Feb 2026 21:34:15 +0700
Subject: [PATCH 149/236] Update: Explicitly added httpx as a dependency to
 safeguard against potential breakages from lower-level library changes and
 ensure consistent availability.

---
 app/utils/helper.py | 2 +-
 pyproject.toml      | 1 +
 uv.lock             | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index 2e9e801..da5b3c5 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -202,7 +202,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         data = base64.b64decode(url.split(",")[1])
         suffix = mimetypes.guess_extension(mime_type) or f".{mime_type.split('/')[1]}"
     else:
-        async with httpx.AsyncClient(follow_redirects=True) as client:
+        async with httpx.AsyncClient(http2=True, follow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
diff --git a/pyproject.toml b/pyproject.toml
index 6599122..ebbadd4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,7 @@ dependencies = [
     "fastapi>=0.129.2",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
+    "httpx[http2]>=0.28.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
diff --git a/uv.lock b/uv.lock
index 4cae2c0..cab521a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -86,6 +86,7 @@ dependencies = [
     { name = "fastapi" },
     { name = "gemini-webapi" },
     { name = "httptools" },
+    { name = "httpx", extra = ["http2"] },
     { name = "lmdb" },
     { name = "loguru" },
     { name = "orjson" },
@@ -110,6 +111,7 @@ requires-dist = [
     { name = "fastapi", specifier = ">=0.129.2" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
+    { name = "httpx", extras = ["http2"], specifier = ">=0.28.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },

From 6b6e589f7ac015965eda661d5bdc489188b961d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 23 Feb 2026 10:17:55 +0700
Subject: [PATCH 150/236] ORJSONResponse is deprecated

FastAPIDeprecationWarning: ORJSONResponse is deprecated, FastAPI now serializes data directly to JSON bytes via Pydantic when a return type or response model is set, which is faster and doesn't need a custom response class. Read more in the FastAPI docs: https://fastapi.tiangolo.com/advanced/custom-response/#orjson-or-response-model and https://fastapi.tiangolo.com/tutorial/response-model/
---
 app/main.py              | 2 --
 app/server/middleware.py | 6 +++---
 pyproject.toml           | 2 +-
 uv.lock                  | 8 ++++----
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/app/main.py b/app/main.py
index 0634ce2..20d15b0 100644
--- a/app/main.py
+++ b/app/main.py
@@ -2,7 +2,6 @@
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI
-from fastapi.responses import ORJSONResponse
 from loguru import logger
 
 from .server.chat import router as chat_router
@@ -93,7 +92,6 @@ def create_app() -> FastAPI:
         description="OpenAI-compatible API for Gemini Web",
         version="1.0.0",
         lifespan=lifespan,
-        default_response_class=ORJSONResponse,
     )
 
     add_cors_middleware(app)
diff --git a/app/server/middleware.py b/app/server/middleware.py
index 4bc358d..b5bc55b 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -6,7 +6,7 @@
 
 from fastapi import Depends, FastAPI, HTTPException, Request, status
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import ORJSONResponse
+from fastapi.responses import JSONResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from loguru import logger
 
@@ -70,12 +70,12 @@ def cleanup_expired_images(retention_days: int) -> int:
 
 def global_exception_handler(request: Request, exc: Exception):
     if isinstance(exc, HTTPException):
-        return ORJSONResponse(
+        return JSONResponse(
             status_code=exc.status_code,
             content={"error": {"message": exc.detail}},
         )
 
-    return ORJSONResponse(
+    return JSONResponse(
         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
         content={"error": {"message": str(exc)}},
     )
diff --git a/pyproject.toml b/pyproject.toml
index ebbadd4..f699096 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
-    "fastapi>=0.129.2",
+    "fastapi>=0.131.0",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "httpx[http2]>=0.28.1",
diff --git a/uv.lock b/uv.lock
index cab521a..d1f77a0 100644
--- a/uv.lock
+++ b/uv.lock
@@ -64,7 +64,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.129.2"
+version = "0.131.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -73,9 +73,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fd/cc/1b0d90ed759ff8c9dbc4800de7475d4e9256a81b97b45bd05a1affcb350a/fastapi-0.129.2.tar.gz", hash = "sha256:e2b3637a2b47856e704dbd9a3a09393f6df48e8b9cb6c7a3e26ba44d2053f9ab", size = 368211, upload-time = "2026-02-21T17:25:49.198Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/32/158cbf685b7d5a26f87131069da286bf10fc9fbf7fc968d169d48a45d689/fastapi-0.131.0.tar.gz", hash = "sha256:6531155e52bee2899a932c746c9a8250f210e3c3303a5f7b9f8a808bfe0548ff", size = 369612, upload-time = "2026-02-22T16:38:11.252Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/18/d0/a89a640308016c7fff8d2a47b86cc03ee7cca780b5079d0b69f466f9e1a9/fastapi-0.129.2-py3-none-any.whl", hash = "sha256:e21d9f6e8db376655187905ad0145edd6f6a4e5f2bff241c4efb8a0bffd6a540", size = 103227, upload-time = "2026-02-21T17:25:47.745Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/94/b58ec24c321acc2ad1327f69b033cadc005e0f26df9a73828c9e9c7db7ce/fastapi-0.131.0-py3-none-any.whl", hash = "sha256:ed0e53decccf4459de78837ce1b867cd04fa9ce4579497b842579755d20b405a", size = 103854, upload-time = "2026-02-22T16:38:09.814Z" },
 ]
 
 [[package]]
@@ -108,7 +108,7 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastapi", specifier = ">=0.129.2" },
+    { name = "fastapi", specifier = ">=0.131.0" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "httpx", extras = ["http2"], specifier = ">=0.28.1" },

From fbe19db6d682ca8e93da11cfef8af0a84ffdf06f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 23 Feb 2026 11:23:53 +0700
Subject: [PATCH 151/236] Refactor HTTP client usage to utilize AsyncSession
 from curl-cffi for improved performance

---
 app/utils/helper.py |  4 +--
 pyproject.toml      |  2 +-
 uv.lock             | 59 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/app/utils/helper.py b/app/utils/helper.py
index da5b3c5..9a930b7 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -10,8 +10,8 @@
 from pathlib import Path
 from urllib.parse import urlparse
 
-import httpx
 import orjson
+from curl_cffi.requests import AsyncSession
 from loguru import logger
 
 from app.models import FunctionCall, Message, ToolCall
@@ -202,7 +202,7 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         data = base64.b64decode(url.split(",")[1])
         suffix = mimetypes.guess_extension(mime_type) or f".{mime_type.split('/')[1]}"
     else:
-        async with httpx.AsyncClient(http2=True, follow_redirects=True) as client:
+        async with AsyncSession(impersonate="chrome", allow_redirects=True) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
diff --git a/pyproject.toml b/pyproject.toml
index f699096..38c88e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,10 +5,10 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
+    "curl-cffi>=0.14.0",
     "fastapi>=0.131.0",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
-    "httpx[http2]>=0.28.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
diff --git a/uv.lock b/uv.lock
index d1f77a0..379fa2d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -41,6 +41,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
+]
+
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -62,6 +85,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
 ]
 
+[[package]]
+name = "curl-cffi"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "cffi" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9b/c9/0067d9a25ed4592b022d4558157fcdb6e123516083700786d38091688767/curl_cffi-0.14.0.tar.gz", hash = "sha256:5ffbc82e59f05008ec08ea432f0e535418823cda44178ee518906a54f27a5f0f", size = 162633, upload-time = "2025-12-16T03:25:07.931Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/aa/f0/0f21e9688eaac85e705537b3a87a5588d0cefb2f09d83e83e0e8be93aa99/curl_cffi-0.14.0-cp39-abi3-macosx_14_0_arm64.whl", hash = "sha256:e35e89c6a69872f9749d6d5fda642ed4fc159619329e99d577d0104c9aad5893", size = 3087277, upload-time = "2025-12-16T03:24:49.607Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/a3/0419bd48fce5b145cb6a2344c6ac17efa588f5b0061f212c88e0723da026/curl_cffi-0.14.0-cp39-abi3-macosx_15_0_x86_64.whl", hash = "sha256:5945478cd28ad7dfb5c54473bcfb6743ee1d66554d57951fdf8fc0e7d8cf4e45", size = 5804650, upload-time = "2025-12-16T03:24:51.518Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/07/a238dd062b7841b8caa2fa8a359eb997147ff3161288f0dd46654d898b4d/curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c42e8fa3c667db9ccd2e696ee47adcd3cd5b0838d7282f3fc45f6c0ef3cfdfa7", size = 8231918, upload-time = "2025-12-16T03:24:52.862Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/d2/ce907c9b37b5caf76ac08db40cc4ce3d9f94c5500db68a195af3513eacbc/curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:060fe2c99c41d3cb7f894de318ddf4b0301b08dca70453d769bd4e74b36b8483", size = 8654624, upload-time = "2025-12-16T03:24:54.579Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/ae/6256995b18c75e6ef76b30753a5109e786813aa79088b27c8eabb1ef85c9/curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b158c41a25388690dd0d40b5bc38d1e0f512135f17fdb8029868cbc1993d2e5b", size = 8010654, upload-time = "2025-12-16T03:24:56.507Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/10/ff64249e516b103cb762e0a9dca3ee0f04cf25e2a1d5d9838e0f1273d071/curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_i686.whl", hash = "sha256:1439fbef3500fb723333c826adf0efb0e2e5065a703fb5eccce637a2250db34a", size = 7781969, upload-time = "2025-12-16T03:24:57.885Z" },
+    { url = "https://files.pythonhosted.org/packages/51/76/d6f7bb76c2d12811aa7ff16f5e17b678abdd1b357b9a8ac56310ceccabd5/curl_cffi-0.14.0-cp39-abi3-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7176f2c2d22b542e3cf261072a81deb018cfa7688930f95dddef215caddb469", size = 7969133, upload-time = "2025-12-16T03:24:59.261Z" },
+    { url = "https://files.pythonhosted.org/packages/23/7c/cca39c0ed4e1772613d3cba13091c0e9d3b89365e84b9bf9838259a3cd8f/curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:03f21ade2d72978c2bb8670e9b6de5260e2755092b02d94b70b906813662998d", size = 9080167, upload-time = "2025-12-16T03:25:00.946Z" },
+    { url = "https://files.pythonhosted.org/packages/75/03/a942d7119d3e8911094d157598ae0169b1c6ca1bd3f27d7991b279bcc45b/curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:58ebf02de64ee5c95613209ddacb014c2d2f86298d7080c0a1c12ed876ee0690", size = 9520464, upload-time = "2025-12-16T03:25:02.922Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/77/78900e9b0833066d2274bda75cba426fdb4cef7fbf6a4f6a6ca447607bec/curl_cffi-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:6e503f9a103f6ae7acfb3890c843b53ec030785a22ae7682a22cc43afb94123e", size = 1677416, upload-time = "2025-12-16T03:25:04.902Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/7c/d2ba86b0b3e1e2830bd94163d047de122c69a8df03c5c7c36326c456ad82/curl_cffi-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:2eed50a969201605c863c4c31269dfc3e0da52916086ac54553cfa353022425c", size = 1425067, upload-time = "2025-12-16T03:25:06.454Z" },
+]
+
 [[package]]
 name = "fastapi"
 version = "0.131.0"
@@ -83,10 +129,10 @@ name = "gemini-fastapi"
 version = "1.0.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "curl-cffi" },
     { name = "fastapi" },
     { name = "gemini-webapi" },
     { name = "httptools" },
-    { name = "httpx", extra = ["http2"] },
     { name = "lmdb" },
     { name = "loguru" },
     { name = "orjson" },
@@ -108,10 +154,10 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.131.0" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
-    { name = "httpx", extras = ["http2"], specifier = ">=0.28.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
@@ -316,6 +362,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "pycparser"
+version = "3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.5"

From b7a9ca50001d7750de161aedf69f373acedff6cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 24 Feb 2026 11:34:06 +0700
Subject: [PATCH 152/236] Update dependencies to latest versions

---
 pyproject.toml | 2 +-
 uv.lock        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 38c88e3..9ef1f91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.131.0",
+    "fastapi>=0.132.0",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
diff --git a/uv.lock b/uv.lock
index 379fa2d..6481763 100644
--- a/uv.lock
+++ b/uv.lock
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.131.0"
+version = "0.132.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/91/32/158cbf685b7d5a26f87131069da286bf10fc9fbf7fc968d169d48a45d689/fastapi-0.131.0.tar.gz", hash = "sha256:6531155e52bee2899a932c746c9a8250f210e3c3303a5f7b9f8a808bfe0548ff", size = 369612, upload-time = "2026-02-22T16:38:11.252Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a0/55/f1b4d4e478a0a1b4b1113d0f610a1b08e539b69900f97fdc97155d62fdee/fastapi-0.132.0.tar.gz", hash = "sha256:ef687847936d8a57ea6ea04cf9a85fe5f2c6ba64e22bfa721467094b69d48d92", size = 372422, upload-time = "2026-02-23T17:56:22.218Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ff/94/b58ec24c321acc2ad1327f69b033cadc005e0f26df9a73828c9e9c7db7ce/fastapi-0.131.0-py3-none-any.whl", hash = "sha256:ed0e53decccf4459de78837ce1b867cd04fa9ce4579497b842579755d20b405a", size = 103854, upload-time = "2026-02-22T16:38:09.814Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/de/6171c3363bbc5e01686e200e0880647c9270daa476d91030435cf14d32f5/fastapi-0.132.0-py3-none-any.whl", hash = "sha256:3c487d5afce196fa8ea509ae1531e96ccd5cdd2fd6eae78b73e2c20fba706689", size = 104652, upload-time = "2026-02-23T17:56:20.836Z" },
 ]
 
 [[package]]
@@ -155,7 +155,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
-    { name = "fastapi", specifier = ">=0.131.0" },
+    { name = "fastapi", specifier = ">=0.132.0" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },

From c4aefbf779f6952a1252e5188ac358ce83b7230d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 24 Feb 2026 11:57:56 +0700
Subject: [PATCH 153/236] Add API endpoint documentation for OpenAI-compatible
 and advanced endpoints in README files

---
 README.md    | 24 ++++++++++++++++++++++++
 README.zh.md | 24 ++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/README.md b/README.md
index 91f687c..6b6f485 100644
--- a/README.md
+++ b/README.md
@@ -74,6 +74,30 @@ python run.py
 
 The server will start on `http://localhost:8000` by default.
 
+## API Endpoints
+
+The server provides several endpoints, including OpenAI-compatible ones.
+
+### OpenAI-Compatible Endpoints
+
+These endpoints are designed to be compatible with OpenAI's API structure, allowing you to use Gemini as a drop-in replacement.
+
+- **`GET /v1/models`**: Lists all supported Gemini models.
+- **`POST /v1/chat/completions`**: Unified chat interface.
+  - **Streaming**: Set `stream: true` to receive real-time delta chunks.
+  - **Multi-modal**: Supports text, images, and file uploads.
+  - **Tool Calling**: Supports function calling via the `tools` parameter.
+  - **Structured Output**: Supports `response_format` for JSON schema enforcement.
+
+### Advanced Endpoints
+
+- **`POST /v1/responses`**: An alternative endpoint for complex interaction patterns, supporting rich output items including generated images and tool calls.
+
+### Utility Endpoints
+
+- **`GET /health`**: Health check endpoint. Returns the status of the server, configured Gemini clients, and conversation storage.
+- **`GET /images/{filename}`**: Internal endpoint to serve generated images. Requires a valid token (automatically included in image URLs returned by the API).
+
 ## Docker Deployment
 
 ### Run with Options
diff --git a/README.zh.md b/README.zh.md
index d23bec1..d012d32 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -74,6 +74,30 @@ python run.py
 
 服务默认启动在 `http://localhost:8000`。
 
+## API 接口
+
+本服务器提供了一系列接口，重点支持 OpenAI 兼容协议。
+
+### OpenAI 兼容接口
+
+这些接口遵循 OpenAI 的 API 规范，允许你将 Gemini 作为 **Drop-in 替代方案** 直接接入现有的 AI 应用。
+
+- **`GET /v1/models`**: 列出所有可用的 Gemini 模型。
+- **`POST /v1/chat/completions`**: 统一聊天对话接口。
+  - **流式传输**: 设置 `stream: true` 即可实时接收增量响应 (Stream Delta)。
+  - **多模态支持**: 支持在消息中包含文本、图片以及文件上传。
+  - **工具调用**: 支持通过 `tools` 参数进行函数调用 (Function Calling)。
+  - **结构化输出**: 支持 `response_format`，可严格遵循 JSON Schema。
+
+### 高级接口
+
+- **`POST /v1/responses`**: 用于复杂交互模式的专用接口，支持分步输出、生成图片及工具调用等更丰富的响应项。
+
+### 辅助与系统接口
+
+- **`GET /health`**: 健康检查接口。返回服务器运行状态、已配置的 Gemini 客户端健康度以及对话存储统计信息。
+- **`GET /images/{filename}`**: 用于访问生成的图片的内部接口。需携带有效 Token（API 返回的图片 URL 中已自动包含该 Token）。
+
 ## Docker 部署
 
 ### 直接运行

From e00a8fa0261063e4cc82f4e22951da3ff17dc012 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 25 Feb 2026 15:24:28 +0700
Subject: [PATCH 154/236] Add optional custom cookies parameter

---
 app/services/pool.py | 25 +++++++++++++++++++------
 app/utils/config.py  | 14 +++++++++++++-
 pyproject.toml       |  2 +-
 uv.lock              | 14 +++++++-------
 4 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/app/services/pool.py b/app/services/pool.py
index 3b4197c..3c26e3d 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -1,6 +1,8 @@
 import asyncio
+import inspect
 from collections import deque
 
+from gemini_webapi import GeminiClient
 from loguru import logger
 
 from app.utils import g_config
@@ -22,12 +24,23 @@ def __init__(self) -> None:
             raise ValueError("No Gemini clients configured")
 
         for c in g_config.gemini.clients:
-            client = GeminiClientWrapper(
-                client_id=c.id,
-                secure_1psid=c.secure_1psid,
-                secure_1psidts=c.secure_1psidts,
-                proxy=c.proxy,
-            )
+            kwargs = {
+                "client_id": c.id,
+                "secure_1psid": c.secure_1psid,
+                "secure_1psidts": c.secure_1psidts,
+                "proxy": c.proxy,
+            }
+            if c.cookies:
+                sig = inspect.signature(GeminiClient.__init__)
+                if "cookies" in sig.parameters:
+                    kwargs["cookies"] = c.cookies
+                else:
+                    logger.debug(
+                        f"Ignoring 'cookies' in config for client {c.id} because "
+                        "the current version of gemini_webapi doesn't support it."
+                    )
+
+            client = GeminiClientWrapper(**kwargs)
             self._clients.append(client)
             self._id_map[c.id] = client
             self._round_robin.append(client)
diff --git a/app/utils/config.py b/app/utils/config.py
index 21d2891..e00c7d9 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -42,6 +42,9 @@ class GeminiClientSettings(BaseModel):
     secure_1psid: str = Field(..., description="Gemini Secure 1PSID")
     secure_1psidts: str = Field(..., description="Gemini Secure 1PSIDTS")
     proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client")
+    cookies: dict[str, str] | None = Field(
+        default=None, description="Optional custom cookies for this Gemini client"
+    )
 
     @field_validator("proxy", mode="before")
     @classmethod
@@ -51,6 +54,16 @@ def _blank_proxy_to_none(cls, value: str | None) -> str | None:
         stripped = value.strip()
         return stripped or None
 
+    @field_validator("cookies", mode="before")
+    @classmethod
+    def _parse_cookies(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("{"):
+            try:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError:
+                pass
+        return v
+
 
 class GeminiModelConfig(BaseModel):
     """Configuration for a custom Gemini model."""
@@ -67,7 +80,6 @@ def _parse_json_string(cls, v: Any) -> Any:
             try:
                 return orjson.loads(v)
             except orjson.JSONDecodeError:
-                # Return the original value to let Pydantic handle the error or type mismatch
                 return v
         return v
 
diff --git a/pyproject.toml b/pyproject.toml
index 9ef1f91..9a6dee9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.132.0",
+    "fastapi>=0.133.0",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
diff --git a/uv.lock b/uv.lock
index 6481763..8ded1fa 100644
--- a/uv.lock
+++ b/uv.lock
@@ -34,11 +34,11 @@ wheels = [
 
 [[package]]
 name = "certifi"
-version = "2026.1.4"
+version = "2026.2.25"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
 ]
 
 [[package]]
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.132.0"
+version = "0.133.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a0/55/f1b4d4e478a0a1b4b1113d0f610a1b08e539b69900f97fdc97155d62fdee/fastapi-0.132.0.tar.gz", hash = "sha256:ef687847936d8a57ea6ea04cf9a85fe5f2c6ba64e22bfa721467094b69d48d92", size = 372422, upload-time = "2026-02-23T17:56:22.218Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c2/04/ab382c7c03dd545f2c964d06e87ad0d5faa944a2434186ad9c285f5d87e0/fastapi-0.133.0.tar.gz", hash = "sha256:b900a2bf5685cdb0647a41d5900bdeafc3a9e8a28ac08c6246b76699e164d60d", size = 373265, upload-time = "2026-02-24T09:53:40.143Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/de/6171c3363bbc5e01686e200e0880647c9270daa476d91030435cf14d32f5/fastapi-0.132.0-py3-none-any.whl", hash = "sha256:3c487d5afce196fa8ea509ae1531e96ccd5cdd2fd6eae78b73e2c20fba706689", size = 104652, upload-time = "2026-02-23T17:56:20.836Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/b4/023e75a2ec3f5440e380df6caf4d28edc0806d007193e6fb0707237886a4/fastapi-0.133.0-py3-none-any.whl", hash = "sha256:0a78878483d60702a1dde864c24ab349a1a53ef4db6b6f74f8cd4a2b2bc67d2f", size = 104787, upload-time = "2026-02-24T09:53:41.404Z" },
 ]
 
 [[package]]
@@ -155,7 +155,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
-    { name = "fastapi", specifier = ">=0.132.0" },
+    { name = "fastapi", specifier = ">=0.133.0" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },

From eb3c286f7951282ba94929de68eaf732ba858b8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 26 Feb 2026 19:51:39 +0700
Subject: [PATCH 155/236] Revert "Add optional custom cookies parameter"

This reverts commit e00a8fa0261063e4cc82f4e22951da3ff17dc012.
---
 app/services/pool.py | 25 ++++++-------------------
 app/utils/config.py  | 14 +-------------
 pyproject.toml       |  2 +-
 uv.lock              | 14 +++++++-------
 4 files changed, 15 insertions(+), 40 deletions(-)

diff --git a/app/services/pool.py b/app/services/pool.py
index 3c26e3d..3b4197c 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -1,8 +1,6 @@
 import asyncio
-import inspect
 from collections import deque
 
-from gemini_webapi import GeminiClient
 from loguru import logger
 
 from app.utils import g_config
@@ -24,23 +22,12 @@ def __init__(self) -> None:
             raise ValueError("No Gemini clients configured")
 
         for c in g_config.gemini.clients:
-            kwargs = {
-                "client_id": c.id,
-                "secure_1psid": c.secure_1psid,
-                "secure_1psidts": c.secure_1psidts,
-                "proxy": c.proxy,
-            }
-            if c.cookies:
-                sig = inspect.signature(GeminiClient.__init__)
-                if "cookies" in sig.parameters:
-                    kwargs["cookies"] = c.cookies
-                else:
-                    logger.debug(
-                        f"Ignoring 'cookies' in config for client {c.id} because "
-                        "the current version of gemini_webapi doesn't support it."
-                    )
-
-            client = GeminiClientWrapper(**kwargs)
+            client = GeminiClientWrapper(
+                client_id=c.id,
+                secure_1psid=c.secure_1psid,
+                secure_1psidts=c.secure_1psidts,
+                proxy=c.proxy,
+            )
             self._clients.append(client)
             self._id_map[c.id] = client
             self._round_robin.append(client)
diff --git a/app/utils/config.py b/app/utils/config.py
index e00c7d9..21d2891 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -42,9 +42,6 @@ class GeminiClientSettings(BaseModel):
     secure_1psid: str = Field(..., description="Gemini Secure 1PSID")
     secure_1psidts: str = Field(..., description="Gemini Secure 1PSIDTS")
     proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client")
-    cookies: dict[str, str] | None = Field(
-        default=None, description="Optional custom cookies for this Gemini client"
-    )
 
     @field_validator("proxy", mode="before")
     @classmethod
@@ -54,16 +51,6 @@ def _blank_proxy_to_none(cls, value: str | None) -> str | None:
         stripped = value.strip()
         return stripped or None
 
-    @field_validator("cookies", mode="before")
-    @classmethod
-    def _parse_cookies(cls, v: Any) -> Any:
-        if isinstance(v, str) and v.strip().startswith("{"):
-            try:
-                return orjson.loads(v)
-            except orjson.JSONDecodeError:
-                pass
-        return v
-
 
 class GeminiModelConfig(BaseModel):
     """Configuration for a custom Gemini model."""
@@ -80,6 +67,7 @@ def _parse_json_string(cls, v: Any) -> Any:
             try:
                 return orjson.loads(v)
             except orjson.JSONDecodeError:
+                # Return the original value to let Pydantic handle the error or type mismatch
                 return v
         return v
 
diff --git a/pyproject.toml b/pyproject.toml
index 9a6dee9..9ef1f91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.133.0",
+    "fastapi>=0.132.0",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
diff --git a/uv.lock b/uv.lock
index 8ded1fa..6481763 100644
--- a/uv.lock
+++ b/uv.lock
@@ -34,11 +34,11 @@ wheels = [
 
 [[package]]
 name = "certifi"
-version = "2026.2.25"
+version = "2026.1.4"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
 [[package]]
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.133.0"
+version = "0.132.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c2/04/ab382c7c03dd545f2c964d06e87ad0d5faa944a2434186ad9c285f5d87e0/fastapi-0.133.0.tar.gz", hash = "sha256:b900a2bf5685cdb0647a41d5900bdeafc3a9e8a28ac08c6246b76699e164d60d", size = 373265, upload-time = "2026-02-24T09:53:40.143Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a0/55/f1b4d4e478a0a1b4b1113d0f610a1b08e539b69900f97fdc97155d62fdee/fastapi-0.132.0.tar.gz", hash = "sha256:ef687847936d8a57ea6ea04cf9a85fe5f2c6ba64e22bfa721467094b69d48d92", size = 372422, upload-time = "2026-02-23T17:56:22.218Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/b4/023e75a2ec3f5440e380df6caf4d28edc0806d007193e6fb0707237886a4/fastapi-0.133.0-py3-none-any.whl", hash = "sha256:0a78878483d60702a1dde864c24ab349a1a53ef4db6b6f74f8cd4a2b2bc67d2f", size = 104787, upload-time = "2026-02-24T09:53:41.404Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/de/6171c3363bbc5e01686e200e0880647c9270daa476d91030435cf14d32f5/fastapi-0.132.0-py3-none-any.whl", hash = "sha256:3c487d5afce196fa8ea509ae1531e96ccd5cdd2fd6eae78b73e2c20fba706689", size = 104652, upload-time = "2026-02-23T17:56:20.836Z" },
 ]
 
 [[package]]
@@ -155,7 +155,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
-    { name = "fastapi", specifier = ">=0.133.0" },
+    { name = "fastapi", specifier = ">=0.132.0" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },

From 29682fd61f8495ed652fe5f1e24fb4d848dd1d3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 26 Feb 2026 19:54:55 +0700
Subject: [PATCH 156/236] Update dependencies to latest versions

---
 pyproject.toml |  2 +-
 uv.lock        | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9ef1f91..68f4563 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.132.0",
+    "fastapi>=0.133.1",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
diff --git a/uv.lock b/uv.lock
index 6481763..4c2f563 100644
--- a/uv.lock
+++ b/uv.lock
@@ -34,11 +34,11 @@ wheels = [
 
 [[package]]
 name = "certifi"
-version = "2026.1.4"
+version = "2026.2.25"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
 ]
 
 [[package]]
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.132.0"
+version = "0.133.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a0/55/f1b4d4e478a0a1b4b1113d0f610a1b08e539b69900f97fdc97155d62fdee/fastapi-0.132.0.tar.gz", hash = "sha256:ef687847936d8a57ea6ea04cf9a85fe5f2c6ba64e22bfa721467094b69d48d92", size = 372422, upload-time = "2026-02-23T17:56:22.218Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/22/6f/0eafed8349eea1fa462238b54a624c8b408cd1ba2795c8e64aa6c34f8ab7/fastapi-0.133.1.tar.gz", hash = "sha256:ed152a45912f102592976fde6cbce7dae1a8a1053da94202e51dd35d184fadd6", size = 378741, upload-time = "2026-02-25T18:18:17.398Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/de/6171c3363bbc5e01686e200e0880647c9270daa476d91030435cf14d32f5/fastapi-0.132.0-py3-none-any.whl", hash = "sha256:3c487d5afce196fa8ea509ae1531e96ccd5cdd2fd6eae78b73e2c20fba706689", size = 104652, upload-time = "2026-02-23T17:56:20.836Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/c9/a175a7779f3599dfa4adfc97a6ce0e157237b3d7941538604aadaf97bfb6/fastapi-0.133.1-py3-none-any.whl", hash = "sha256:658f34ba334605b1617a65adf2ea6461901bdb9af3a3080d63ff791ecf7dc2e2", size = 109029, upload-time = "2026-02-25T18:18:18.578Z" },
 ]
 
 [[package]]
@@ -155,7 +155,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
-    { name = "fastapi", specifier = ">=0.132.0" },
+    { name = "fastapi", specifier = ">=0.133.1" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },

From 5fcd60b933b6d99b2e9e5f09764988271e212e55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 26 Feb 2026 21:29:30 +0700
Subject: [PATCH 157/236] Edit watchdog_timeout

---
 app/utils/config.py | 2 +-
 config/config.yaml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 21d2891..c4c81c7 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -85,7 +85,7 @@ class GeminiConfig(BaseModel):
     )
     timeout: int = Field(default=300, ge=30, description="Init timeout in seconds")
     watchdog_timeout: int = Field(
-        default=60, ge=10, le=75, description="Watchdog timeout in seconds (Not more than 75s)"
+        default=120, ge=30, description="Watchdog timeout in seconds"
     )
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
diff --git a/config/config.yaml b/config/config.yaml
index 3d5e6f4..56497ad 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -23,7 +23,7 @@ gemini:
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
   timeout: 300             # Init timeout in seconds (Not less than 30s)
-  watchdog_timeout: 60     # Watchdog timeout in seconds (Not more than 75s)
+  watchdog_timeout: 120    # Watchdog timeout in seconds (Not less than 30s)
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 540    # Refresh interval in seconds (Not less than 60s)
   verbose: false           # Enable verbose logging for Gemini requests

From 2305e2d46f2a79627798c5491d3e2ac929a7888c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 26 Feb 2026 21:38:47 +0700
Subject: [PATCH 158/236] Edit watchdog_timeout

---
 app/utils/config.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index c4c81c7..89119a1 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -84,9 +84,7 @@ class GeminiConfig(BaseModel):
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
     timeout: int = Field(default=300, ge=30, description="Init timeout in seconds")
-    watchdog_timeout: int = Field(
-        default=120, ge=30, description="Watchdog timeout in seconds"
-    )
+    watchdog_timeout: int = Field(default=120, ge=30, description="Watchdog timeout in seconds")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=540,

From c9693bd98a1eff8c5f3a18783e33e0337e2bf828 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 26 Feb 2026 22:13:21 +0700
Subject: [PATCH 159/236] Add optional custom cookies parameter

---
 app/services/pool.py | 25 +++++++++++++++++++------
 app/utils/config.py  | 14 +++++++++++++-
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/app/services/pool.py b/app/services/pool.py
index 3b4197c..3c26e3d 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -1,6 +1,8 @@
 import asyncio
+import inspect
 from collections import deque
 
+from gemini_webapi import GeminiClient
 from loguru import logger
 
 from app.utils import g_config
@@ -22,12 +24,23 @@ def __init__(self) -> None:
             raise ValueError("No Gemini clients configured")
 
         for c in g_config.gemini.clients:
-            client = GeminiClientWrapper(
-                client_id=c.id,
-                secure_1psid=c.secure_1psid,
-                secure_1psidts=c.secure_1psidts,
-                proxy=c.proxy,
-            )
+            kwargs = {
+                "client_id": c.id,
+                "secure_1psid": c.secure_1psid,
+                "secure_1psidts": c.secure_1psidts,
+                "proxy": c.proxy,
+            }
+            if c.cookies:
+                sig = inspect.signature(GeminiClient.__init__)
+                if "cookies" in sig.parameters:
+                    kwargs["cookies"] = c.cookies
+                else:
+                    logger.debug(
+                        f"Ignoring 'cookies' in config for client {c.id} because "
+                        "the current version of gemini_webapi doesn't support it."
+                    )
+
+            client = GeminiClientWrapper(**kwargs)
             self._clients.append(client)
             self._id_map[c.id] = client
             self._round_robin.append(client)
diff --git a/app/utils/config.py b/app/utils/config.py
index 89119a1..1aee013 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -42,6 +42,9 @@ class GeminiClientSettings(BaseModel):
     secure_1psid: str = Field(..., description="Gemini Secure 1PSID")
     secure_1psidts: str = Field(..., description="Gemini Secure 1PSIDTS")
     proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client")
+    cookies: dict[str, str] | None = Field(
+        default=None, description="Optional custom cookies for this Gemini client"
+    )
 
     @field_validator("proxy", mode="before")
     @classmethod
@@ -51,6 +54,16 @@ def _blank_proxy_to_none(cls, value: str | None) -> str | None:
         stripped = value.strip()
         return stripped or None
 
+    @field_validator("cookies", mode="before")
+    @classmethod
+    def _parse_cookies(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("{"):
+            try:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError:
+                pass
+        return v
+
 
 class GeminiModelConfig(BaseModel):
     """Configuration for a custom Gemini model."""
@@ -67,7 +80,6 @@ def _parse_json_string(cls, v: Any) -> Any:
             try:
                 return orjson.loads(v)
             except orjson.JSONDecodeError:
-                # Return the original value to let Pydantic handle the error or type mismatch
                 return v
         return v
 

From 1c7f99a47f291624fa681981ca90e397ea1d48fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 27 Feb 2026 23:42:29 +0700
Subject: [PATCH 160/236] Increase timeout settings for improved connection
 stability during peak hours

---
 .github/workflows/ruff.yaml |  2 +-
 app/services/client.py      |  3 ++-
 app/utils/config.py         |  6 +++---
 config/config.yaml          |  6 +++---
 pyproject.toml              |  2 +-
 uv.lock                     | 42 ++++++++++++++++++-------------------
 6 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml
index d451cdc..5e13127 100644
--- a/.github/workflows/ruff.yaml
+++ b/.github/workflows/ruff.yaml
@@ -24,7 +24,7 @@ jobs:
       - name: Install Ruff
         run: |
           python -m pip install --upgrade pip
-          pip install "ruff>=0.15.1"
+          pip install ruff
 
       - name: Run Ruff
         run: ruff check .
diff --git a/app/services/client.py b/app/services/client.py
index 49d9e87..b8f976b 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -33,7 +33,7 @@ async def init(
         timeout: float = cast(float, _UNSET),
         watchdog_timeout: float = cast(float, _UNSET),
         auto_close: bool = False,
-        close_delay: float = 300,
+        close_delay: float = cast(float, _UNSET),
         auto_refresh: bool = cast(bool, _UNSET),
         refresh_interval: float = cast(float, _UNSET),
         verbose: bool = cast(bool, _UNSET),
@@ -44,6 +44,7 @@ async def init(
         config = g_config.gemini
         timeout = cast(float, _resolve(timeout, config.timeout))
         watchdog_timeout = cast(float, _resolve(watchdog_timeout, config.watchdog_timeout))
+        close_delay = timeout
         auto_refresh = cast(bool, _resolve(auto_refresh, config.auto_refresh))
         refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
         verbose = cast(bool, _resolve(verbose, config.verbose))
diff --git a/app/utils/config.py b/app/utils/config.py
index 1aee013..7371623 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -95,11 +95,11 @@ class GeminiConfig(BaseModel):
         default="append",
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
-    timeout: int = Field(default=300, ge=30, description="Init timeout in seconds")
-    watchdog_timeout: int = Field(default=120, ge=30, description="Watchdog timeout in seconds")
+    timeout: int = Field(default=600, ge=30, description="Init timeout in seconds")
+    watchdog_timeout: int = Field(default=300, ge=30, description="Watchdog timeout in seconds")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
-        default=540,
+        default=600,
         ge=60,
         description="Interval in seconds to refresh Gemini cookies (Not less than 60s)",
     )
diff --git a/config/config.yaml b/config/config.yaml
index 56497ad..f38ef86 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -22,10 +22,10 @@ gemini:
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
-  timeout: 300             # Init timeout in seconds (Not less than 30s)
-  watchdog_timeout: 120    # Watchdog timeout in seconds (Not less than 30s)
+  timeout: 600             # Init timeout in seconds (Not less than 30s)
+  watchdog_timeout: 300    # Watchdog timeout in seconds (Not less than 30s)
   auto_refresh: true       # Auto-refresh session cookies
-  refresh_interval: 540    # Refresh interval in seconds (Not less than 60s)
+  refresh_interval: 600    # Refresh interval in seconds (Not less than 60s)
   verbose: false           # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
   model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
diff --git a/pyproject.toml b/pyproject.toml
index 68f4563..c99fbbd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@ Repository = "https://github.com/Nativu5/Gemini-FastAPI"
 [project.optional-dependencies]
 dev = [
     "pytest>=9.0.2",
-    "ruff>=0.15.2",
+    "ruff>=0.15.4",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index 4c2f563..077ff7a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -484,27 +484,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/06/04/eab13a954e763b0606f460443fcbf6bb5a0faf06890ea3754ff16523dce5/ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342", size = 4558148, upload-time = "2026-02-19T22:32:20.271Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2f/70/3a4dc6d09b13cb3e695f28307e5d889b2e1a66b7af9c5e257e796695b0e6/ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d", size = 10430565, upload-time = "2026-02-19T22:32:41.824Z" },
-    { url = "https://files.pythonhosted.org/packages/71/0b/bb8457b56185ece1305c666dc895832946d24055be90692381c31d57466d/ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e", size = 10820354, upload-time = "2026-02-19T22:32:07.366Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/c1/e0532d7f9c9e0b14c46f61b14afd563298b8b83f337b6789ddd987e46121/ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87", size = 10170767, upload-time = "2026-02-19T22:32:13.188Z" },
-    { url = "https://files.pythonhosted.org/packages/47/e8/da1aa341d3af017a21c7a62fb5ec31d4e7ad0a93ab80e3a508316efbcb23/ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9", size = 10529591, upload-time = "2026-02-19T22:32:02.547Z" },
-    { url = "https://files.pythonhosted.org/packages/93/74/184fbf38e9f3510231fbc5e437e808f0b48c42d1df9434b208821efcd8d6/ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80", size = 10260771, upload-time = "2026-02-19T22:32:36.938Z" },
-    { url = "https://files.pythonhosted.org/packages/05/ac/605c20b8e059a0bc4b42360414baa4892ff278cec1c91fff4be0dceedefd/ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f", size = 11045791, upload-time = "2026-02-19T22:32:31.642Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/52/db6e419908f45a894924d410ac77d64bdd98ff86901d833364251bd08e22/ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77", size = 11879271, upload-time = "2026-02-19T22:32:29.305Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/d8/7992b18f2008bdc9231d0f10b16df7dda964dbf639e2b8b4c1b4e91b83af/ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea", size = 11303707, upload-time = "2026-02-19T22:32:22.492Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/02/849b46184bcfdd4b64cde61752cc9a146c54759ed036edd11857e9b8443b/ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a", size = 11149151, upload-time = "2026-02-19T22:32:44.234Z" },
-    { url = "https://files.pythonhosted.org/packages/70/04/f5284e388bab60d1d3b99614a5a9aeb03e0f333847e2429bebd2aaa1feec/ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956", size = 11091132, upload-time = "2026-02-19T22:32:24.691Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/ae/88d844a21110e14d92cf73d57363fab59b727ebeabe78009b9ccb23500af/ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4", size = 10504717, upload-time = "2026-02-19T22:32:26.75Z" },
-    { url = "https://files.pythonhosted.org/packages/64/27/867076a6ada7f2b9c8292884ab44d08fd2ba71bd2b5364d4136f3cd537e1/ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de", size = 10263122, upload-time = "2026-02-19T22:32:10.036Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/ef/faf9321d550f8ebf0c6373696e70d1758e20ccdc3951ad7af00c0956be7c/ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c", size = 10735295, upload-time = "2026-02-19T22:32:39.227Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/55/e8089fec62e050ba84d71b70e7834b97709ca9b7aba10c1a0b196e493f97/ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8", size = 11241641, upload-time = "2026-02-19T22:32:34.617Z" },
-    { url = "https://files.pythonhosted.org/packages/23/01/1c30526460f4d23222d0fabd5888868262fd0e2b71a00570ca26483cd993/ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f", size = 10507885, upload-time = "2026-02-19T22:32:15.635Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/10/3d18e3bbdf8fc50bbb4ac3cc45970aa5a9753c5cb51bf9ed9a3cd8b79fa3/ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5", size = 11623725, upload-time = "2026-02-19T22:32:04.947Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" },
+version = "0.15.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/da/31/d6e536cdebb6568ae75a7f00e4b4819ae0ad2640c3604c305a0428680b0c/ruff-0.15.4.tar.gz", hash = "sha256:3412195319e42d634470cc97aa9803d07e9d5c9223b99bcb1518f0c725f26ae1", size = 4569550, upload-time = "2026-02-26T20:04:14.959Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/82/c11a03cfec3a4d26a0ea1e571f0f44be5993b923f905eeddfc397c13d360/ruff-0.15.4-py3-none-linux_armv6l.whl", hash = "sha256:a1810931c41606c686bae8b5b9a8072adac2f611bb433c0ba476acba17a332e0", size = 10453333, upload-time = "2026-02-26T20:04:20.093Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/5d/6a1f271f6e31dffb31855996493641edc3eef8077b883eaf007a2f1c2976/ruff-0.15.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5a1632c66672b8b4d3e1d1782859e98d6e0b4e70829530666644286600a33992", size = 10853356, upload-time = "2026-02-26T20:04:05.808Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/d8/0fab9f8842b83b1a9c2bf81b85063f65e93fb512e60effa95b0be49bfc54/ruff-0.15.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4386ba2cd6c0f4ff75252845906acc7c7c8e1ac567b7bc3d373686ac8c222ba", size = 10187434, upload-time = "2026-02-26T20:03:54.656Z" },
+    { url = "https://files.pythonhosted.org/packages/85/cc/cc220fd9394eff5db8d94dec199eec56dd6c9f3651d8869d024867a91030/ruff-0.15.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2496488bdfd3732747558b6f95ae427ff066d1fcd054daf75f5a50674411e75", size = 10535456, upload-time = "2026-02-26T20:03:52.738Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/0f/bced38fa5cf24373ec767713c8e4cadc90247f3863605fb030e597878661/ruff-0.15.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f1c4893841ff2d54cbda1b2860fa3260173df5ddd7b95d370186f8a5e66a4ac", size = 10287772, upload-time = "2026-02-26T20:04:08.138Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/90/58a1802d84fed15f8f281925b21ab3cecd813bde52a8ca033a4de8ab0e7a/ruff-0.15.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:820b8766bd65503b6c30aaa6331e8ef3a6e564f7999c844e9a547c40179e440a", size = 11049051, upload-time = "2026-02-26T20:04:03.53Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/ac/b7ad36703c35f3866584564dc15f12f91cb1a26a897dc2fd13d7cb3ae1af/ruff-0.15.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9fb74bab47139c1751f900f857fa503987253c3ef89129b24ed375e72873e85", size = 11890494, upload-time = "2026-02-26T20:04:10.497Z" },
+    { url = "https://files.pythonhosted.org/packages/93/3d/3eb2f47a39a8b0da99faf9c54d3eb24720add1e886a5309d4d1be73a6380/ruff-0.15.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f80c98765949c518142b3a50a5db89343aa90f2c2bf7799de9986498ae6176db", size = 11326221, upload-time = "2026-02-26T20:04:12.84Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/90/bf134f4c1e5243e62690e09d63c55df948a74084c8ac3e48a88468314da6/ruff-0.15.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451a2e224151729b3b6c9ffb36aed9091b2996fe4bdbd11f47e27d8f2e8888ec", size = 11168459, upload-time = "2026-02-26T20:04:00.969Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/e5/a64d27688789b06b5d55162aafc32059bb8c989c61a5139a36e1368285eb/ruff-0.15.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a8f157f2e583c513c4f5f896163a93198297371f34c04220daf40d133fdd4f7f", size = 11104366, upload-time = "2026-02-26T20:03:48.099Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/f6/32d1dcb66a2559763fc3027bdd65836cad9eb09d90f2ed6a63d8e9252b02/ruff-0.15.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:917cc68503357021f541e69b35361c99387cdbbf99bd0ea4aa6f28ca99ff5338", size = 10510887, upload-time = "2026-02-26T20:03:45.771Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/92/22d1ced50971c5b6433aed166fcef8c9343f567a94cf2b9d9089f6aa80fe/ruff-0.15.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e9737c8161da79fd7cfec19f1e35620375bd8b2a50c3e77fa3d2c16f574105cc", size = 10285939, upload-time = "2026-02-26T20:04:22.42Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/f4/7c20aec3143837641a02509a4668fb146a642fd1211846634edc17eb5563/ruff-0.15.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:291258c917539e18f6ba40482fe31d6f5ac023994ee11d7bdafd716f2aab8a68", size = 10765471, upload-time = "2026-02-26T20:03:58.924Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/09/6d2f7586f09a16120aebdff8f64d962d7c4348313c77ebb29c566cefc357/ruff-0.15.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3f83c45911da6f2cd5936c436cf86b9f09f09165f033a99dcf7477e34041cbc3", size = 11263382, upload-time = "2026-02-26T20:04:24.424Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/fa/2ef715a1cd329ef47c1a050e10dee91a9054b7ce2fcfdd6a06d139afb7ec/ruff-0.15.4-py3-none-win32.whl", hash = "sha256:65594a2d557d4ee9f02834fcdf0a28daa8b3b9f6cb2cb93846025a36db47ef22", size = 10506664, upload-time = "2026-02-26T20:03:50.56Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/a8/c688ef7e29983976820d18710f955751d9f4d4eb69df658af3d006e2ba3e/ruff-0.15.4-py3-none-win_amd64.whl", hash = "sha256:04196ad44f0df220c2ece5b0e959c2f37c777375ec744397d21d15b50a75264f", size = 11651048, upload-time = "2026-02-26T20:04:17.191Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/0a/9e1be9035b37448ce2e68c978f0591da94389ade5a5abafa4cf99985d1b2/ruff-0.15.4-py3-none-win_arm64.whl", hash = "sha256:60d5177e8cfc70e51b9c5fad936c634872a74209f934c1e79107d11787ad5453", size = 10966776, upload-time = "2026-02-26T20:03:56.908Z" },
 ]
 
 [[package]]

From 78addcea6f2a020ce4762aec0bdac10e43cf1e48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 28 Feb 2026 11:05:16 +0700
Subject: [PATCH 161/236] Fix formatting of streaming response for thought and
 text deltas

---
 app/server/chat.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index a8d07cc..a35070a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -879,7 +879,7 @@ async def generate_stream():
 
                 if t_delta := chunk.thoughts_delta:
                     if not last_chunk_was_thought and not full_thoughts:
-                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '<think>\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
                     full_thoughts += t_delta
                     data = {
                         "id": completion_id,
@@ -895,7 +895,7 @@ async def generate_stream():
 
                 if text_delta := chunk.text_delta:
                     if last_chunk_was_thought:
-                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '</think>\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '\n</think>\n\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
                         last_chunk_was_thought = False
                     full_text += text_delta
                     if visible_delta := suppressor.process(text_delta):
@@ -926,7 +926,7 @@ async def generate_stream():
                 full_thoughts = final_chunk.thoughts
 
         if last_chunk_was_thought:
-            yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '</think>\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '\n</think>\n\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
 
         if remaining_text := suppressor.flush():
             data = {
@@ -940,7 +940,7 @@ async def generate_stream():
             }
             yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        raw_output_with_think = f"<think>{full_thoughts}</think>\n" if full_thoughts else ""
+        raw_output_with_think = f"<think>\n{full_thoughts}\n</think>\n\n" if full_thoughts else ""
         raw_output_with_think += full_text
         assistant_text, storage_output, tool_calls = _process_llm_output(
             raw_output_with_think, full_text, structured_requirement
@@ -1072,13 +1072,13 @@ async def generate_stream():
                 all_outputs.append(chunk)
                 if t_delta := chunk.thoughts_delta:
                     if not last_chunk_was_thought and not full_thoughts:
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '<think>'}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '<think>\n'}).decode('utf-8')}\n\n"
                     full_thoughts += t_delta
                     yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': t_delta}).decode('utf-8')}\n\n"
                     last_chunk_was_thought = True
                 if text_delta := chunk.text_delta:
                     if last_chunk_was_thought:
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '</think>\n'}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '\n</think>\n\n'}).decode('utf-8')}\n\n"
                         last_chunk_was_thought = False
                     full_text += text_delta
                     if visible_delta := suppressor.process(text_delta):
@@ -1096,12 +1096,12 @@ async def generate_stream():
                 full_thoughts = final_chunk.thoughts
 
         if last_chunk_was_thought:
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '</think>\n'}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '\n</think>\n\n'}).decode('utf-8')}\n\n"
         if remaining_text := suppressor.flush():
             yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': remaining_text}).decode('utf-8')}\n\n"
         yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': 0}).decode('utf-8')}\n\n"
 
-        raw_output_with_think = f"<think>{full_thoughts}</think>\n" if full_thoughts else ""
+        raw_output_with_think = f"<think>\n{full_thoughts}\n</think>\n\n" if full_thoughts else ""
         raw_output_with_think += full_text
         assistant_text, storage_output, detected_tool_calls = _process_llm_output(
             raw_output_with_think, full_text, structured_requirement

From 0d7791afff62badc796e36c084a8a78159823692 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 28 Feb 2026 11:13:01 +0700
Subject: [PATCH 162/236] Update dependencies to latest versions

---
 pyproject.toml | 2 +-
 uv.lock        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c99fbbd..8638b03 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.133.1",
+    "fastapi>=0.134.0",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
diff --git a/uv.lock b/uv.lock
index 077ff7a..8fb1f99 100644
--- a/uv.lock
+++ b/uv.lock
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.133.1"
+version = "0.134.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/6f/0eafed8349eea1fa462238b54a624c8b408cd1ba2795c8e64aa6c34f8ab7/fastapi-0.133.1.tar.gz", hash = "sha256:ed152a45912f102592976fde6cbce7dae1a8a1053da94202e51dd35d184fadd6", size = 378741, upload-time = "2026-02-25T18:18:17.398Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/15/647ea81cb73b55b48fb095158a9cd64e42e9e4f1d34dbb5cc4a4939779d6/fastapi-0.134.0.tar.gz", hash = "sha256:3122b1ea0dbeaab48b5976e80b99ca7eda02be154bf03e126a33220e73255a9a", size = 385667, upload-time = "2026-02-27T21:18:12.931Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d2/c9/a175a7779f3599dfa4adfc97a6ce0e157237b3d7941538604aadaf97bfb6/fastapi-0.133.1-py3-none-any.whl", hash = "sha256:658f34ba334605b1617a65adf2ea6461901bdb9af3a3080d63ff791ecf7dc2e2", size = 109029, upload-time = "2026-02-25T18:18:18.578Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/e6/fd49c28a54b7d6f5c64045155e40f6cff9ed4920055043fb5ac7969f7f2f/fastapi-0.134.0-py3-none-any.whl", hash = "sha256:f4e7214f24b2262258492e05c48cf21125e4ffc427e30dd32fb4f74049a3d56a", size = 110404, upload-time = "2026-02-27T21:18:10.809Z" },
 ]
 
 [[package]]
@@ -163,7 +163,7 @@ requires-dist = [
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.2" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.4" },
     { name = "uvicorn", specifier = ">=0.41.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]

From 0c341827b9b74eca81d07643dd415cc46304e95f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 28 Feb 2026 20:26:54 +0700
Subject: [PATCH 163/236] Refactor `<think>` tag to `reasoning_content` and
 `reasoning_text`

---
 app/models/__init__.py |   6 +
 app/models/models.py   |  40 +++--
 app/server/chat.py     | 347 +++++++++++++++++++++++++++--------------
 app/services/lmdb.py   |  68 ++++----
 app/utils/helper.py    |   1 -
 5 files changed, 298 insertions(+), 164 deletions(-)

diff --git a/app/models/__init__.py b/app/models/__init__.py
index 3896de1..1378f1f 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -17,6 +17,9 @@
     ResponseInputItem,
     ResponseOutputContent,
     ResponseOutputMessage,
+    ResponseReasoning,
+    ResponseReasoningContentPart,
+    ResponseSummaryPart,
     ResponseToolCall,
     ResponseToolChoice,
     ResponseUsage,
@@ -47,6 +50,9 @@
     "ResponseInputItem",
     "ResponseOutputContent",
     "ResponseOutputMessage",
+    "ResponseReasoning",
+    "ResponseReasoningContentPart",
+    "ResponseSummaryPart",
     "ResponseToolCall",
     "ResponseToolChoice",
     "ResponseUsage",
diff --git a/app/models/models.py b/app/models/models.py
index 3b3e627..8dbe86c 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -171,7 +171,7 @@ class ConversationInStore(BaseModel):
 class ResponseInputContent(BaseModel):
     """Content item for Responses API input."""
 
-    type: Literal["input_text", "input_image", "input_file"]
+    type: Literal["input_text", "output_text", "reasoning_text", "input_image", "input_file"]
     text: str | None = Field(default=None)
     image_url: str | None = Field(default=None)
     detail: Literal["auto", "low", "high"] | None = Field(default=None)
@@ -180,14 +180,6 @@ class ResponseInputContent(BaseModel):
     filename: str | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
-    @model_validator(mode="before")
-    @classmethod
-    def normalize_output_text(cls, data: Any) -> Any:
-        """Allow output_text (from previous turns) to be treated as input_text."""
-        if isinstance(data, dict) and data.get("type") == "output_text":
-            data["type"] = "input_text"
-        return data
-
 
 class ResponseInputItem(BaseModel):
     """Single input item for Responses API."""
@@ -236,6 +228,8 @@ class ResponseUsage(BaseModel):
     input_tokens: int
     output_tokens: int
     total_tokens: int
+    input_tokens_details: dict[str, int] | None = Field(default=None)
+    output_tokens_details: dict[str, int] | None = Field(default=None)
 
 
 class ResponseOutputContent(BaseModel):
@@ -255,6 +249,30 @@ class ResponseOutputMessage(BaseModel):
     content: list[ResponseOutputContent]
 
 
+class ResponseSummaryPart(BaseModel):
+    """Summary part for reasoning."""
+
+    type: Literal["summary_text"] = Field(default="summary_text")
+    text: str
+
+
+class ResponseReasoningContentPart(BaseModel):
+    """Content part for reasoning."""
+
+    type: Literal["reasoning_text"] = Field(default="reasoning_text")
+    text: str
+
+
+class ResponseReasoning(BaseModel):
+    """Reasoning item returned by Responses API."""
+
+    id: str
+    type: Literal["reasoning"] = Field(default="reasoning")
+    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
+    summary: list[ResponseSummaryPart] | None = Field(default=None)
+    content: list[ResponseReasoningContentPart] | None = Field(default=None)
+
+
 class ResponseImageGenerationCall(BaseModel):
     """Image generation call record emitted in Responses API."""
 
@@ -285,7 +303,9 @@ class ResponseCreateResponse(BaseModel):
     object: Literal["response"] = Field(default="response")
     created_at: int
     model: str
-    output: list[ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall]
+    output: list[
+        ResponseReasoning | ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall
+    ]
     status: Literal[
         "in_progress",
         "completed",
diff --git a/app/server/chat.py b/app/server/chat.py
index a35070a..089db7c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -33,6 +33,8 @@
     ResponseInputItem,
     ResponseOutputContent,
     ResponseOutputMessage,
+    ResponseReasoning,
+    ResponseReasoningContentPart,
     ResponseToolCall,
     ResponseToolChoice,
     ResponseUsage,
@@ -120,8 +122,9 @@ def _calculate_usage(
     messages: list[Message],
     assistant_text: str | None,
     tool_calls: list[Any] | None,
-) -> tuple[int, int, int]:
-    """Calculate prompt, completion and total tokens consistently."""
+    thoughts: str | None = None,
+) -> tuple[int, int, int, int]:
+    """Calculate prompt, completion, total and reasoning tokens consistently."""
     prompt_tokens = sum(estimate_tokens(text_from_message(msg)) for msg in messages)
     tool_args_text = ""
     if tool_calls:
@@ -138,7 +141,15 @@ def _calculate_usage(
         )
 
     completion_tokens = estimate_tokens(completion_basis)
-    return prompt_tokens, completion_tokens, prompt_tokens + completion_tokens
+    reasoning_tokens = estimate_tokens(thoughts) if thoughts else 0
+    total_completion_tokens = completion_tokens + reasoning_tokens
+
+    return (
+        prompt_tokens,
+        total_completion_tokens,
+        prompt_tokens + total_completion_tokens,
+        reasoning_tokens,
+    )
 
 
 def _create_responses_standard_payload(
@@ -151,34 +162,50 @@ def _create_responses_standard_payload(
     usage: ResponseUsage,
     request: ResponseCreateRequest,
     normalized_input: Any,
+    full_thoughts: str | None = None,
 ) -> ResponseCreateResponse:
     """Unified factory for building ResponseCreateResponse objects."""
     message_id = f"msg_{uuid.uuid4().hex}"
-    tool_call_items: list[ResponseToolCall] = []
-    if detected_tool_calls:
-        tool_call_items = [
-            ResponseToolCall(
-                id=call.id if hasattr(call, "id") else call["id"],
+    reason_id = f"reason_{uuid.uuid4().hex}"
+
+    output_items: list[Any] = []
+    if full_thoughts:
+        output_items.append(
+            ResponseReasoning(
+                id=reason_id,
                 status="completed",
-                function=call.function if hasattr(call, "function") else call["function"],
+                content=[ResponseReasoningContentPart(text=full_thoughts)],
             )
-            for call in detected_tool_calls
-        ]
+        )
+
+    output_items.append(
+        ResponseOutputMessage(
+            id=message_id,
+            type="message",
+            role="assistant",
+            content=response_contents,
+        )
+    )
+
+    if detected_tool_calls:
+        output_items.extend(
+            [
+                ResponseToolCall(
+                    id=call.id if hasattr(call, "id") else call["id"],
+                    status="completed",
+                    function=call.function if hasattr(call, "function") else call["function"],
+                )
+                for call in detected_tool_calls
+            ]
+        )
+
+    output_items.extend(image_call_items)
 
     return ResponseCreateResponse(
         id=response_id,
         created_at=created_time,
         model=model_name,
-        output=[
-            ResponseOutputMessage(
-                id=message_id,
-                type="message",
-                role="assistant",
-                content=response_contents,
-            ),
-            *tool_call_items,
-            *image_call_items,
-        ],
+        output=output_items,
         status="completed",
         usage=usage,
         input=normalized_input or None,
@@ -196,6 +223,7 @@ def _create_chat_completion_standard_payload(
     tool_calls_payload: list[dict] | None,
     finish_reason: str,
     usage: dict,
+    reasoning_content: str | None = None,
 ) -> dict:
     """Unified factory for building Chat Completion response dictionaries."""
     return {
@@ -210,6 +238,7 @@ def _create_chat_completion_standard_payload(
                     "role": "assistant",
                     "content": visible_output or None,
                     "tool_calls": tool_calls_payload or None,
+                    "reasoning_content": reasoning_content or None,
                 },
                 "finish_reason": finish_reason,
             }
@@ -219,40 +248,41 @@ def _create_chat_completion_standard_payload(
 
 
 def _process_llm_output(
-    raw_output_with_think: str,
-    raw_output_clean: str,
+    thoughts: str | None,
+    raw_text: str,
     structured_requirement: StructuredOutputRequirement | None,
-) -> tuple[str, str, list[Any]]:
+) -> tuple[str | None, str, str, list[Any]]:
     """
     Post-process Gemini output to extract tool calls and prepare clean text for display and storage.
-    Returns: (visible_text, storage_output, tool_calls)
+    Returns: (thoughts, visible_text, storage_output, tool_calls)
     """
-    visible_with_think, tool_calls = extract_tool_calls(raw_output_with_think)
+    if thoughts:
+        thoughts = thoughts.strip()
+
+    visible_output, tool_calls = extract_tool_calls(raw_text)
     if tool_calls:
         logger.debug(f"Detected {len(tool_calls)} tool call(s) in model output.")
 
-    visible_output = visible_with_think.strip()
+    visible_output = visible_output.strip()
 
-    storage_output = remove_tool_call_blocks(raw_output_clean)
+    storage_output = remove_tool_call_blocks(raw_text)
     storage_output = storage_output.strip()
 
-    if structured_requirement:
-        cleaned_for_json = LMDBConversationStore.remove_think_tags(visible_output)
-        if cleaned_for_json:
-            try:
-                structured_payload = orjson.loads(cleaned_for_json)
-                canonical_output = orjson.dumps(structured_payload).decode("utf-8")
-                visible_output = canonical_output
-                storage_output = canonical_output
-                logger.debug(
-                    f"Structured response fulfilled (schema={structured_requirement.schema_name})."
-                )
-            except orjson.JSONDecodeError:
-                logger.warning(
-                    f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name})."
-                )
+    if structured_requirement and visible_output:
+        try:
+            structured_payload = orjson.loads(visible_output)
+            canonical_output = orjson.dumps(structured_payload).decode("utf-8")
+            visible_output = canonical_output
+            storage_output = canonical_output
+            logger.debug(
+                f"Structured response fulfilled (schema={structured_requirement.schema_name})."
+            )
+        except orjson.JSONDecodeError:
+            logger.warning(
+                f"Failed to decode JSON for structured response (schema={structured_requirement.schema_name})."
+            )
 
-    return visible_output, storage_output, tool_calls
+    return thoughts, visible_output, storage_output, tool_calls
 
 
 def _persist_conversation(
@@ -263,6 +293,7 @@ def _persist_conversation(
     messages: list[Message],
     storage_output: str | None,
     tool_calls: list[Any] | None,
+    thoughts: str | None = None,
 ) -> str | None:
     """Unified logic to save conversation history to LMDB."""
     try:
@@ -270,6 +301,7 @@ def _persist_conversation(
             role="assistant",
             content=storage_output or None,
             tool_calls=tool_calls or None,
+            reasoning_content=thoughts or None,
         )
         full_history = [*messages, current_assistant_message]
         cleaned_history = db.sanitize_messages(full_history)
@@ -515,14 +547,22 @@ def _response_items_to_messages(
             messages.append(Message(role=role, content=content))
         else:
             converted: list[ContentItem] = []
+            reasoning_parts: list[str] = []
             for part in content:
-                if part.type == "input_text":
+                if part.type in ("input_text", "output_text"):
                     text_value = part.text or ""
                     normalized_contents.append(
-                        ResponseInputContent(type="input_text", text=text_value)
+                        ResponseInputContent(type=part.type, text=text_value)
                     )
                     if text_value:
                         converted.append(ContentItem(type="text", text=text_value))
+                elif part.type == "reasoning_text":
+                    text_value = part.text or ""
+                    normalized_contents.append(
+                        ResponseInputContent(type="reasoning_text", text=text_value)
+                    )
+                    if text_value:
+                        reasoning_parts.append(text_value)
                 elif part.type == "input_image":
                     image_url = part.image_url
                     if image_url:
@@ -583,11 +623,16 @@ def _instructions_to_messages(
             instruction_messages.append(Message(role=role, content=content))
         else:
             converted: list[ContentItem] = []
+            reasoning_parts: list[str] = []
             for part in content:
-                if part.type == "input_text":
+                if part.type in ("input_text", "output_text"):
                     text_value = part.text or ""
                     if text_value:
                         converted.append(ContentItem(type="text", text=text_value))
+                elif part.type == "reasoning_text":
+                    text_value = part.text or ""
+                    if text_value:
+                        reasoning_parts.append(text_value)
                 elif part.type == "input_image":
                     image_url = part.image_url
                     if image_url:
@@ -609,7 +654,13 @@ def _instructions_to_messages(
                         file_info["url"] = part.file_url
                     if file_info:
                         converted.append(ContentItem(type="file", file=file_info))
-            instruction_messages.append(Message(role=role, content=converted or None))
+            instruction_messages.append(
+                Message(
+                    role=role,
+                    content=converted or None,
+                    reasoning_content="\n".join(reasoning_parts) if reasoning_parts else None,
+                )
+            )
 
     return instruction_messages
 
@@ -858,7 +909,6 @@ def _create_real_streaming_response(
     async def generate_stream():
         full_thoughts, full_text = "", ""
         has_started = False
-        last_chunk_was_thought = False
         all_outputs: list[ModelOutput] = []
         suppressor = StreamingOutputFilter()
         try:
@@ -878,8 +928,6 @@ async def generate_stream():
                     has_started = True
 
                 if t_delta := chunk.thoughts_delta:
-                    if not last_chunk_was_thought and not full_thoughts:
-                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '<think>\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
                     full_thoughts += t_delta
                     data = {
                         "id": completion_id,
@@ -887,16 +935,16 @@ async def generate_stream():
                         "created": created_time,
                         "model": model_name,
                         "choices": [
-                            {"index": 0, "delta": {"content": t_delta}, "finish_reason": None}
+                            {
+                                "index": 0,
+                                "delta": {"reasoning_content": t_delta},
+                                "finish_reason": None,
+                            }
                         ],
                     }
                     yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
-                    last_chunk_was_thought = True
 
                 if text_delta := chunk.text_delta:
-                    if last_chunk_was_thought:
-                        yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '\n</think>\n\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
-                        last_chunk_was_thought = False
                     full_text += text_delta
                     if visible_delta := suppressor.process(text_delta):
                         data = {
@@ -925,9 +973,6 @@ async def generate_stream():
             if final_chunk.thoughts:
                 full_thoughts = final_chunk.thoughts
 
-        if last_chunk_was_thought:
-            yield f"data: {orjson.dumps({'id': completion_id, 'object': 'chat.completion.chunk', 'created': created_time, 'model': model_name, 'choices': [{'index': 0, 'delta': {'content': '\n</think>\n\n'}, 'finish_reason': None}]}).decode('utf-8')}\n\n"
-
         if remaining_text := suppressor.flush():
             data = {
                 "id": completion_id,
@@ -940,10 +985,8 @@ async def generate_stream():
             }
             yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        raw_output_with_think = f"<think>\n{full_thoughts}\n</think>\n\n" if full_thoughts else ""
-        raw_output_with_think += full_text
-        assistant_text, storage_output, tool_calls = _process_llm_output(
-            raw_output_with_think, full_text, structured_requirement
+        _thoughts, assistant_text, storage_output, tool_calls = _process_llm_output(
+            full_thoughts, full_text, structured_requirement
         )
 
         images = []
@@ -1004,8 +1047,15 @@ async def generate_stream():
             }
             yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
 
-        p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, tool_calls)
-        usage = {"prompt_tokens": p_tok, "completion_tokens": c_tok, "total_tokens": t_tok}
+        p_tok, c_tok, t_tok, r_tok = _calculate_usage(
+            messages, assistant_text, tool_calls, full_thoughts
+        )
+        usage = {
+            "prompt_tokens": p_tok,
+            "completion_tokens": c_tok,
+            "total_tokens": t_tok,
+            "completion_tokens_details": {"reasoning_tokens": r_tok},
+        }
         data = {
             "id": completion_id,
             "object": "chat.completion.chunk",
@@ -1021,9 +1071,10 @@ async def generate_stream():
             model.model_name,
             client_wrapper.id,
             session.metadata,
-            messages,  # This should be the prepared messages
+            messages,
             storage_output,
             tool_calls,
+            full_thoughts,
         )
         yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
         yield "data: [DONE]\n\n"
@@ -1059,11 +1110,15 @@ def _create_responses_real_streaming_response(
 
     async def generate_stream():
         yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request.metadata, 'input': None, 'tools': request.tools, 'tool_choice': request.tool_choice}}).decode('utf-8')}\n\n"
-        message_id = f"msg_{uuid.uuid4().hex}"
-        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
 
         full_thoughts, full_text = "", ""
+        thought_item_id = f"reason_{uuid.uuid4().hex}"
+        message_item_id = f"msg_{uuid.uuid4().hex}"
+        thought_item_added = False
+        message_item_added = False
         last_chunk_was_thought = False
+        current_idx = 0
+
         all_outputs: list[ModelOutput] = []
         suppressor = StreamingOutputFilter()
 
@@ -1071,18 +1126,31 @@ async def generate_stream():
             async for chunk in generator:
                 all_outputs.append(chunk)
                 if t_delta := chunk.thoughts_delta:
-                    if not last_chunk_was_thought and not full_thoughts:
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '<think>\n'}).decode('utf-8')}\n\n"
+                    if not thought_item_added:
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': {'id': thought_item_id, 'type': 'reasoning', 'status': 'in_progress', 'content': []}}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.added', 'output_index': current_idx, 'part_index': 0, 'part': {'type': 'reasoning_text', 'text': ''}}).decode('utf-8')}\n\n"
+                        thought_item_added = True
+
                     full_thoughts += t_delta
-                    yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': t_delta}).decode('utf-8')}\n\n"
+                    yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': current_idx, 'part_index': 0, 'delta': t_delta}).decode('utf-8')}\n\n"
                     last_chunk_was_thought = True
+
                 if text_delta := chunk.text_delta:
                     if last_chunk_was_thought:
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '\n</think>\n\n'}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': {'id': thought_item_id, 'type': 'reasoning', 'status': 'completed', 'content': [{'type': 'reasoning_text', 'text': full_thoughts}]}}).decode('utf-8')}\n\n"
+                        current_idx += 1
                         last_chunk_was_thought = False
+
+                    if not message_item_added:
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': {'id': message_item_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.added', 'output_index': current_idx, 'part_index': 0, 'part': {'type': 'output_text', 'text': ''}}).decode('utf-8')}\n\n"
+                        message_item_added = True
+
                     full_text += text_delta
                     if visible_delta := suppressor.process(text_delta):
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': visible_delta}).decode('utf-8')}\n\n"
+                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': current_idx, 'part_index': 0, 'delta': visible_delta}).decode('utf-8')}\n\n"
         except Exception as e:
             logger.exception(f"Error during Responses API streaming: {e}")
             yield f"data: {orjson.dumps({**base_event, 'type': 'error', 'error': {'message': 'Streaming error.'}}).decode('utf-8')}\n\n"
@@ -1096,17 +1164,33 @@ async def generate_stream():
                 full_thoughts = final_chunk.thoughts
 
         if last_chunk_was_thought:
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': '\n</think>\n\n'}).decode('utf-8')}\n\n"
-        if remaining_text := suppressor.flush():
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': 0, 'delta': remaining_text}).decode('utf-8')}\n\n"
-        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': 0}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': {'id': thought_item_id, 'type': 'reasoning', 'status': 'completed', 'content': [{'type': 'reasoning_text', 'text': full_thoughts}]}}).decode('utf-8')}\n\n"
+            current_idx += 1
 
-        raw_output_with_think = f"<think>\n{full_thoughts}\n</think>\n\n" if full_thoughts else ""
-        raw_output_with_think += full_text
-        assistant_text, storage_output, detected_tool_calls = _process_llm_output(
-            raw_output_with_think, full_text, structured_requirement
+        remaining_from_suppressor = suppressor.flush()
+        if remaining_from_suppressor:
+            if not message_item_added:
+                yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': {'id': message_item_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
+                yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.added', 'output_index': current_idx, 'part_index': 0, 'part': {'type': 'output_text', 'text': ''}}).decode('utf-8')}\n\n"
+                message_item_added = True
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': current_idx, 'part_index': 0, 'delta': remaining_from_suppressor}).decode('utf-8')}\n\n"
+
+        # IMPORTANT: Process output now to get the final assistant_text
+        _thoughts, assistant_text, storage_output, detected_tool_calls = _process_llm_output(
+            full_thoughts, full_text, structured_requirement
         )
 
+        response_contents: list[ResponseOutputContent] = []
+        if message_item_added:
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
+
+            msg_content = [{"type": "output_text", "text": assistant_text}]
+            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': {'id': message_item_id, 'type': 'message', 'role': 'assistant', 'content': msg_content}}).decode('utf-8')}\n\n"
+            response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+            current_idx += 1
+
         images = []
         seen_urls = set()
         for out in all_outputs:
@@ -1116,7 +1200,7 @@ async def generate_stream():
                         images.append(img)
                         seen_urls.add(img.url)
 
-        response_contents, image_call_items = [], []
+        image_call_items: list[ResponseImageGenerationCall] = []
         seen_hashes = set()
         for image in images:
             try:
@@ -1132,25 +1216,20 @@ async def generate_stream():
                     img_id = fname
                     img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
 
-                image_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
-                image_call_items.append(
-                    ResponseImageGenerationCall(
-                        id=img_id,
-                        result=b64,
-                        output_format=img_format,
-                        size=f"{w}x{h}" if w and h else None,
-                    )
+                img_item = ResponseImageGenerationCall(
+                    id=img_id,
+                    result=b64,
+                    output_format=img_format,
+                    size=f"{w}x{h}" if w and h else None,
                 )
-                response_contents.append(ResponseOutputContent(type="output_text", text=image_url))
+                image_call_items.append(img_item)
+
+                yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': img_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
+                yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': img_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
+                current_idx += 1
             except Exception as exc:
                 logger.warning(f"Failed to process image in stream: {exc}")
 
-        if assistant_text:
-            response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
-        if not response_contents:
-            response_contents.append(ResponseOutputContent(type="output_text", text=""))
-
-        # Aggregate images for storage
         image_markdown = ""
         for img_call in image_call_items:
             fname = f"{img_call.id}.{img_call.output_format}"
@@ -1160,21 +1239,28 @@ async def generate_stream():
         if image_markdown:
             storage_output += image_markdown
 
-        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': 0, 'item': {'id': message_id, 'type': 'message', 'role': 'assistant', 'content': [c.model_dump(mode='json') for c in response_contents]}}).decode('utf-8')}\n\n"
-
-        current_idx = 1
         for call in detected_tool_calls:
             tc_item = ResponseToolCall(id=call.id, status="completed", function=call.function)
             yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
             yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
             current_idx += 1
-        for img_call in image_call_items:
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': img_call.model_dump(mode='json')}).decode('utf-8')}\n\n"
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': img_call.model_dump(mode='json')}).decode('utf-8')}\n\n"
-            current_idx += 1
 
-        p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, detected_tool_calls)
-        usage = ResponseUsage(input_tokens=p_tok, output_tokens=c_tok, total_tokens=t_tok)
+        p_tok, c_tok, t_tok, r_tok = _calculate_usage(
+            messages, assistant_text, detected_tool_calls, full_thoughts
+        )
+        usage = ResponseUsage(
+            input_tokens=p_tok,
+            output_tokens=c_tok,
+            total_tokens=t_tok,
+            output_tokens_details={"reasoning_tokens": r_tok},
+        )
+
+        # Ensure we have at least one content item if none was created
+        if not response_contents:
+            response_contents.append(
+                ResponseOutputContent(type="output_text", text=assistant_text or "")
+            )
+
         payload = _create_responses_standard_payload(
             response_id,
             created_time,
@@ -1185,6 +1271,7 @@ async def generate_stream():
             usage,
             request,
             None,
+            full_thoughts,
         )
         _persist_conversation(
             db,
@@ -1194,8 +1281,10 @@ async def generate_stream():
             messages,
             storage_output,
             detected_tool_calls,
+            full_thoughts,
         )
         yield f"data: {orjson.dumps({**base_event, 'type': 'response.completed', 'response': payload.model_dump(mode='json')}).decode('utf-8')}\n\n"
+        yield f"data: {orjson.dumps({**base_event, 'type': 'response.done'})}\n\n"
         yield "data: [DONE]\n\n"
 
     return StreamingResponse(generate_stream(), media_type="text/event-stream")
@@ -1301,7 +1390,7 @@ async def create_chat_completion(
         )
 
     try:
-        raw_with_t = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=True)
+        thoughts = resp_or_stream.thoughts
         raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
     except Exception as exc:
         logger.exception("Gemini output parsing failed.")
@@ -1309,8 +1398,8 @@ async def create_chat_completion(
             status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
         ) from exc
 
-    visible_output, storage_output, tool_calls = _process_llm_output(
-        raw_with_t, raw_clean, structured_requirement
+    thoughts, visible_output, storage_output, tool_calls = _process_llm_output(
+        thoughts, raw_clean, structured_requirement
     )
 
     # Process images for OpenAI non-streaming flow
@@ -1338,8 +1427,15 @@ async def create_chat_completion(
     if tool_calls_payload:
         logger.debug(f"Detected tool calls: {reprlib.repr(tool_calls_payload)}")
 
-    p_tok, c_tok, t_tok = _calculate_usage(request.messages, visible_output, tool_calls)
-    usage = {"prompt_tokens": p_tok, "completion_tokens": c_tok, "total_tokens": t_tok}
+    p_tok, c_tok, t_tok, r_tok = _calculate_usage(
+        request.messages, visible_output, tool_calls, thoughts
+    )
+    usage = {
+        "prompt_tokens": p_tok,
+        "completion_tokens": c_tok,
+        "total_tokens": t_tok,
+        "completion_tokens_details": {"reasoning_tokens": r_tok},
+    }
     payload = _create_chat_completion_standard_payload(
         completion_id,
         created_time,
@@ -1348,6 +1444,7 @@ async def create_chat_completion(
         tool_calls_payload,
         "tool_calls" if tool_calls else "stop",
         usage,
+        thoughts,
     )
     _persist_conversation(
         db,
@@ -1357,6 +1454,7 @@ async def create_chat_completion(
         msgs,  # Use prepared messages 'msgs'
         storage_output,
         tool_calls,
+        thoughts,
     )
     return payload
 
@@ -1470,15 +1568,17 @@ async def create_response(
         )
 
     try:
-        raw_t = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=True)
-        raw_c = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
+        thoughts = resp_or_stream.thoughts
+        raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
     except Exception as exc:
         logger.exception("Gemini parsing failed")
         raise HTTPException(
             status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
         ) from exc
 
-    assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
+    thoughts, assistant_text, storage_output, tool_calls = _process_llm_output(
+        thoughts, raw_clean, struct_req
+    )
     images = resp_or_stream.images or []
     if (
         request.tool_choice is not None and request.tool_choice.type == "image_generation"
@@ -1533,8 +1633,13 @@ async def create_response(
     if image_markdown:
         storage_output += image_markdown
 
-    p_tok, c_tok, t_tok = _calculate_usage(messages, assistant_text, tool_calls)
-    usage = ResponseUsage(input_tokens=p_tok, output_tokens=c_tok, total_tokens=t_tok)
+    p_tok, c_tok, t_tok, r_tok = _calculate_usage(messages, assistant_text, tool_calls, thoughts)
+    usage = ResponseUsage(
+        input_tokens=p_tok,
+        output_tokens=c_tok,
+        total_tokens=t_tok,
+        output_tokens_details={"reasoning_tokens": r_tok},
+    )
     payload = _create_responses_standard_payload(
         response_id,
         created_time,
@@ -1545,8 +1650,16 @@ async def create_response(
         usage,
         request,
         norm_input,
+        thoughts,
     )
     _persist_conversation(
-        db, model.model_name, client.id, session.metadata, messages, storage_output, tool_calls
+        db,
+        model.model_name,
+        client.id,
+        session.metadata,
+        messages,
+        storage_output,
+        tool_calls,
+        thoughts,
     )
     return payload
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 8c4edb4..07f0a23 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -12,7 +12,6 @@
 from app.models import ContentItem, ConversationInStore, Message
 from app.utils import g_config
 from app.utils.helper import (
-    THINK_TAGS_RE,
     extract_tool_calls,
     normalize_llm_text,
     remove_tool_call_blocks,
@@ -42,7 +41,6 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     text = normalize_llm_text(text)
     text = unescape_text(text)
 
-    text = LMDBConversationStore.remove_think_tags(text)
     text = remove_tool_call_blocks(text)
 
     if fuzzy:
@@ -60,6 +58,9 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
         "role": message.role,
         "name": message.name or None,
         "tool_call_id": message.tool_call_id or None,
+        "reasoning_content": _normalize_text(message.reasoning_content)
+        if message.reasoning_content
+        else None,
     }
 
     content = message.content
@@ -584,21 +585,23 @@ def __del__(self):
         """Cleanup on destruction."""
         self.close()
 
-    @staticmethod
-    def remove_think_tags(text: str) -> str:
-        """Remove all <think>...</think> tags and strip whitespace."""
-        if not text:
-            return text
-        cleaned_content = THINK_TAGS_RE.sub("", text)
-        return cleaned_content.strip()
-
     @staticmethod
     def sanitize_messages(messages: list[Message]) -> list[Message]:
         """Clean all messages of internal markers, hints and normalize tool calls."""
         cleaned_messages = []
         for msg in messages:
+            update_data = {}
+            content_changed = False
+
+            # Normalize reasoning_content
+            if msg.reasoning_content:
+                norm_reasoning = _normalize_text(msg.reasoning_content)
+                if norm_reasoning != msg.reasoning_content:
+                    update_data["reasoning_content"] = norm_reasoning
+                    content_changed = True
+
             if isinstance(msg.content, str):
-                text = LMDBConversationStore.remove_think_tags(msg.content)
+                text = msg.content
                 tool_calls = msg.tool_calls
 
                 if msg.role == "assistant" and not tool_calls:
@@ -608,48 +611,41 @@ def sanitize_messages(messages: list[Message]) -> list[Message]:
 
                 normalized_content = text.strip() or None
 
-                if normalized_content != msg.content or tool_calls != msg.tool_calls:
-                    cleaned_msg = msg.model_copy(
-                        update={
-                            "content": normalized_content,
-                            "tool_calls": tool_calls or None,
-                        }
-                    )
-                    cleaned_messages.append(cleaned_msg)
-                else:
-                    cleaned_messages.append(msg)
+                if normalized_content != msg.content:
+                    update_data["content"] = normalized_content
+                    content_changed = True
+                if tool_calls != msg.tool_calls:
+                    update_data["tool_calls"] = tool_calls or None
+                    content_changed = True
+
             elif isinstance(msg.content, list):
                 new_content = []
                 all_extracted_calls = list(msg.tool_calls or [])
-                changed = False
+                list_changed = False
 
                 for item in msg.content:
                     if isinstance(item, ContentItem) and item.type == "text" and item.text:
-                        text = LMDBConversationStore.remove_think_tags(item.text)
+                        text = item.text
                         if msg.role == "assistant" and not msg.tool_calls:
                             text, extracted = extract_tool_calls(text)
                             if extracted:
                                 all_extracted_calls.extend(extracted)
-                                changed = True
+                                list_changed = True
                         else:
                             text = strip_system_hints(text)
 
                         if text != item.text:
-                            changed = True
+                            list_changed = True
                             item = item.model_copy(update={"text": text.strip() or None})
                     new_content.append(item)
 
-                if changed:
-                    cleaned_messages.append(
-                        msg.model_copy(
-                            update={
-                                "content": new_content,
-                                "tool_calls": all_extracted_calls or None,
-                            }
-                        )
-                    )
-                else:
-                    cleaned_messages.append(msg)
+                if list_changed:
+                    update_data["content"] = new_content
+                    update_data["tool_calls"] = all_extracted_calls or None
+                    content_changed = True
+
+            if content_changed:
+                cleaned_messages.append(msg.model_copy(update=update_data))
             else:
                 cleaned_messages.append(msg)
         return cleaned_messages
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 9a930b7..187f310 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -86,7 +86,6 @@
 HINT_END_RE = (
     re.compile(rf"\s*{TOOL_HINT_END_ESC}\n?", re.IGNORECASE) if TOOL_HINT_END_ESC else None
 )
-THINK_TAGS_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
 
 # --- Streaming Specific Patterns ---
 _START_PATTERNS = {

From c73ebe27382a4d9cf24f986a2e627cd50ca0da3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 28 Feb 2026 21:57:43 +0700
Subject: [PATCH 164/236] Fix streaming response generation

---
 app/server/chat.py | 479 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 373 insertions(+), 106 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 089db7c..7d3a6cf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1109,129 +1109,370 @@ def _create_responses_real_streaming_response(
     }
 
     async def generate_stream():
-        yield f"data: {orjson.dumps({**base_event, 'type': 'response.created', 'response': {'id': response_id, 'object': 'response', 'created_at': created_time, 'model': model_name, 'status': 'in_progress', 'metadata': request.metadata, 'input': None, 'tools': request.tools, 'tool_choice': request.tool_choice}}).decode('utf-8')}\n\n"
+        yield f"data: {
+            orjson.dumps(
+                {
+                    **base_event,
+                    'type': 'response.created',
+                    'response': {
+                        'id': response_id,
+                        'object': 'response',
+                        'created_at': created_time,
+                        'model': model_name,
+                        'status': 'in_progress',
+                        'metadata': request.metadata,
+                        'input': None,
+                        'tools': request.tools,
+                        'tool_choice': request.tool_choice,
+                    },
+                }
+            ).decode()
+        }\n\n"
+
+        full_thoughts = ""
+        full_text = ""
+        all_outputs: list[ModelOutput] = []
 
-        full_thoughts, full_text = "", ""
         thought_item_id = f"reason_{uuid.uuid4().hex}"
         message_item_id = f"msg_{uuid.uuid4().hex}"
-        thought_item_added = False
-        message_item_added = False
-        last_chunk_was_thought = False
-        current_idx = 0
 
-        all_outputs: list[ModelOutput] = []
+        thought_open = False
+        message_open = False
+        current_index = 0
+
         suppressor = StreamingOutputFilter()
 
         try:
             async for chunk in generator:
                 all_outputs.append(chunk)
-                if t_delta := chunk.thoughts_delta:
-                    if not thought_item_added:
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': {'id': thought_item_id, 'type': 'reasoning', 'status': 'in_progress', 'content': []}}).decode('utf-8')}\n\n"
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.added', 'output_index': current_idx, 'part_index': 0, 'part': {'type': 'reasoning_text', 'text': ''}}).decode('utf-8')}\n\n"
-                        thought_item_added = True
+                if chunk.thoughts_delta:
+                    if not thought_open:
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.output_item.added',
+                                    'output_index': current_index,
+                                    'item': {
+                                        'id': thought_item_id,
+                                        'type': 'reasoning',
+                                        'status': 'in_progress',
+                                        'content': [],
+                                    },
+                                }
+                            ).decode()
+                        }\n\n"
 
-                    full_thoughts += t_delta
-                    yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': current_idx, 'part_index': 0, 'delta': t_delta}).decode('utf-8')}\n\n"
-                    last_chunk_was_thought = True
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.content_part.added',
+                                    'output_index': current_index,
+                                    'part_index': 0,
+                                    'part': {'type': 'reasoning_text', 'text': ''},
+                                }
+                            ).decode()
+                        }\n\n"
 
-                if text_delta := chunk.text_delta:
-                    if last_chunk_was_thought:
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': {'id': thought_item_id, 'type': 'reasoning', 'status': 'completed', 'content': [{'type': 'reasoning_text', 'text': full_thoughts}]}}).decode('utf-8')}\n\n"
-                        current_idx += 1
-                        last_chunk_was_thought = False
-
-                    if not message_item_added:
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': {'id': message_item_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.added', 'output_index': current_idx, 'part_index': 0, 'part': {'type': 'output_text', 'text': ''}}).decode('utf-8')}\n\n"
-                        message_item_added = True
+                        thought_open = True
 
-                    full_text += text_delta
-                    if visible_delta := suppressor.process(text_delta):
-                        yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': current_idx, 'part_index': 0, 'delta': visible_delta}).decode('utf-8')}\n\n"
-        except Exception as e:
-            logger.exception(f"Error during Responses API streaming: {e}")
-            yield f"data: {orjson.dumps({**base_event, 'type': 'error', 'error': {'message': 'Streaming error.'}}).decode('utf-8')}\n\n"
+                    full_thoughts += chunk.thoughts_delta
+
+                    yield f"data: {
+                        orjson.dumps(
+                            {
+                                **base_event,
+                                'type': 'response.output_text.delta',
+                                'output_index': current_index,
+                                'part_index': 0,
+                                'delta': chunk.thoughts_delta,
+                            }
+                        ).decode()
+                    }\n\n"
+
+                if chunk.text_delta:
+                    if thought_open:
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.output_text.done',
+                                    'output_index': current_index,
+                                    'part_index': 0,
+                                }
+                            ).decode()
+                        }\n\n"
+
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.content_part.done',
+                                    'output_index': current_index,
+                                    'part_index': 0,
+                                }
+                            ).decode()
+                        }\n\n"
+
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.output_item.done',
+                                    'output_index': current_index,
+                                    'item': {
+                                        'id': thought_item_id,
+                                        'type': 'reasoning',
+                                        'status': 'completed',
+                                        'content': [
+                                            {'type': 'reasoning_text', 'text': full_thoughts}
+                                        ],
+                                    },
+                                }
+                            ).decode()
+                        }\n\n"
+
+                        current_index += 1
+                        thought_open = False
+
+                    if not message_open:
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.output_item.added',
+                                    'output_index': current_index,
+                                    'item': {
+                                        'id': message_item_id,
+                                        'type': 'message',
+                                        'role': 'assistant',
+                                        'content': [],
+                                    },
+                                }
+                            ).decode()
+                        }\n\n"
+
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.content_part.added',
+                                    'output_index': current_index,
+                                    'part_index': 0,
+                                    'part': {'type': 'output_text', 'text': ''},
+                                }
+                            ).decode()
+                        }\n\n"
+
+                        message_open = True
+
+                    full_text += chunk.text_delta
+
+                    if visible := suppressor.process(chunk.text_delta):
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.output_text.delta',
+                                    'output_index': current_index,
+                                    'part_index': 0,
+                                    'delta': visible,
+                                }
+                            ).decode()
+                        }\n\n"
+
+        except Exception:
+            logger.exception("Responses streaming error")
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'error',
+                        'error': {'message': 'Streaming error.'},
+                    }
+                ).decode()
+            }\n\n"
             return
 
         if all_outputs:
-            final_chunk = all_outputs[-1]
-            if final_chunk.text:
-                full_text = final_chunk.text
-            if final_chunk.thoughts:
-                full_thoughts = final_chunk.thoughts
-
-        if last_chunk_was_thought:
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': {'id': thought_item_id, 'type': 'reasoning', 'status': 'completed', 'content': [{'type': 'reasoning_text', 'text': full_thoughts}]}}).decode('utf-8')}\n\n"
-            current_idx += 1
+            last = all_outputs[-1]
+            if last.text:
+                full_text = last.text
+            if last.thoughts:
+                full_thoughts = last.thoughts
+
+        if thought_open:
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.output_text.done',
+                        'output_index': current_index,
+                        'part_index': 0,
+                    }
+                ).decode()
+            }\n\n"
+
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.content_part.done',
+                        'output_index': current_index,
+                        'part_index': 0,
+                    }
+                ).decode()
+            }\n\n"
+
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.output_item.done',
+                        'output_index': current_index,
+                        'item': {
+                            'id': thought_item_id,
+                            'type': 'reasoning',
+                            'status': 'completed',
+                            'content': [{'type': 'reasoning_text', 'text': full_thoughts}],
+                        },
+                    }
+                ).decode()
+            }\n\n"
+
+            current_index += 1
+
+        remaining = suppressor.flush()
+        if remaining and message_open:
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.output_text.delta',
+                        'output_index': current_index,
+                        'part_index': 0,
+                        'delta': remaining,
+                    }
+                ).decode()
+            }\n\n"
+
+        if message_open:
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.output_text.done',
+                        'output_index': current_index,
+                        'part_index': 0,
+                    }
+                ).decode()
+            }\n\n"
+
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.content_part.done',
+                        'output_index': current_index,
+                        'part_index': 0,
+                    }
+                ).decode()
+            }\n\n"
+
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.output_item.done',
+                        'output_index': current_index,
+                        'item': {
+                            'id': message_item_id,
+                            'type': 'message',
+                            'role': 'assistant',
+                            'content': [{'type': 'output_text', 'text': full_text}],
+                        },
+                    }
+                ).decode()
+            }\n\n"
 
-        remaining_from_suppressor = suppressor.flush()
-        if remaining_from_suppressor:
-            if not message_item_added:
-                yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': {'id': message_item_id, 'type': 'message', 'role': 'assistant', 'content': []}}).decode('utf-8')}\n\n"
-                yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.added', 'output_index': current_idx, 'part_index': 0, 'part': {'type': 'output_text', 'text': ''}}).decode('utf-8')}\n\n"
-                message_item_added = True
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.delta', 'output_index': current_idx, 'part_index': 0, 'delta': remaining_from_suppressor}).decode('utf-8')}\n\n"
+            current_index += 1
 
-        # IMPORTANT: Process output now to get the final assistant_text
         _thoughts, assistant_text, storage_output, detected_tool_calls = _process_llm_output(
             full_thoughts, full_text, structured_requirement
         )
 
-        response_contents: list[ResponseOutputContent] = []
-        if message_item_added:
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_text.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.content_part.done', 'output_index': current_idx, 'part_index': 0}).decode('utf-8')}\n\n"
-
-            msg_content = [{"type": "output_text", "text": assistant_text}]
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': {'id': message_item_id, 'type': 'message', 'role': 'assistant', 'content': msg_content}}).decode('utf-8')}\n\n"
-            response_contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
-            current_idx += 1
+        image_items: list[ResponseImageGenerationCall] = []
+        final_response_contents: list[ResponseOutputContent] = []
+        seen_hashes = set()
 
-        images = []
-        seen_urls = set()
         for out in all_outputs:
             if out.images:
-                for img in out.images:
-                    if img.url not in seen_urls:
-                        images.append(img)
-                        seen_urls.add(img.url)
+                for image in out.images:
+                    try:
+                        b64, w, h, fname, fhash = await _image_to_base64(image, image_store)
+                        if fhash in seen_hashes:
+                            continue
+                        seen_hashes.add(fhash)
+
+                        if "." in fname:
+                            img_id, fmt = fname.rsplit(".", 1)
+                        else:
+                            img_id = fname
+                            fmt = "png"
+
+                        img_item = ResponseImageGenerationCall(
+                            id=img_id,
+                            result=b64,
+                            output_format=fmt,
+                            size=f"{w}x{h}" if w and h else None,
+                        )
 
-        image_call_items: list[ResponseImageGenerationCall] = []
-        seen_hashes = set()
-        for image in images:
-            try:
-                b64, w, h, fname, fhash = await _image_to_base64(image, image_store)
-                if fhash in seen_hashes:
-                    (image_store / fname).unlink(missing_ok=True)
-                    continue
-                seen_hashes.add(fhash)
+                        image_url = (
+                            f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
+                        )
+                        final_response_contents.append(
+                            ResponseOutputContent(type="output_text", text=image_url)
+                        )
 
-                if "." in fname:
-                    img_id, img_format = fname.rsplit(".", 1)
-                else:
-                    img_id = fname
-                    img_format = "png" if isinstance(image, GeneratedImage) else "jpeg"
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.output_item.added',
+                                    'output_index': current_index,
+                                    'item': img_item.model_dump(mode='json'),
+                                }
+                            ).decode()
+                        }\n\n"
 
-                img_item = ResponseImageGenerationCall(
-                    id=img_id,
-                    result=b64,
-                    output_format=img_format,
-                    size=f"{w}x{h}" if w and h else None,
-                )
-                image_call_items.append(img_item)
+                        yield f"data: {
+                            orjson.dumps(
+                                {
+                                    **base_event,
+                                    'type': 'response.output_item.done',
+                                    'output_index': current_index,
+                                    'item': img_item.model_dump(mode='json'),
+                                }
+                            ).decode()
+                        }\n\n"
 
-                yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': img_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
-                yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': img_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
-                current_idx += 1
-            except Exception as exc:
-                logger.warning(f"Failed to process image in stream: {exc}")
+                        current_index += 1
+                        image_items.append(img_item)
+
+                    except Exception:
+                        logger.warning("Image processing failed")
+
+        if assistant_text:
+            final_response_contents.append(
+                ResponseOutputContent(type="output_text", text=assistant_text)
+            )
+
+        if not final_response_contents:
+            final_response_contents.append(ResponseOutputContent(type="output_text", text=""))
 
         image_markdown = ""
-        for img_call in image_call_items:
+        for img_call in image_items:
             fname = f"{img_call.id}.{img_call.output_format}"
             img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
             image_markdown += f"\n\n{img_url}"
@@ -1241,13 +1482,35 @@ async def generate_stream():
 
         for call in detected_tool_calls:
             tc_item = ResponseToolCall(id=call.id, status="completed", function=call.function)
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.added', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
-            yield f"data: {orjson.dumps({**base_event, 'type': 'response.output_item.done', 'output_index': current_idx, 'item': tc_item.model_dump(mode='json')}).decode('utf-8')}\n\n"
-            current_idx += 1
+
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.output_item.added',
+                        'output_index': current_index,
+                        'item': tc_item.model_dump(mode='json'),
+                    }
+                ).decode()
+            }\n\n"
+
+            yield f"data: {
+                orjson.dumps(
+                    {
+                        **base_event,
+                        'type': 'response.output_item.done',
+                        'output_index': current_index,
+                        'item': tc_item.model_dump(mode='json'),
+                    }
+                ).decode()
+            }\n\n"
+
+            current_index += 1
 
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
             messages, assistant_text, detected_tool_calls, full_thoughts
         )
+
         usage = ResponseUsage(
             input_tokens=p_tok,
             output_tokens=c_tok,
@@ -1255,24 +1518,19 @@ async def generate_stream():
             output_tokens_details={"reasoning_tokens": r_tok},
         )
 
-        # Ensure we have at least one content item if none was created
-        if not response_contents:
-            response_contents.append(
-                ResponseOutputContent(type="output_text", text=assistant_text or "")
-            )
-
         payload = _create_responses_standard_payload(
             response_id,
             created_time,
             model_name,
             detected_tool_calls,
-            image_call_items,
-            response_contents,
+            image_items,
+            final_response_contents,
             usage,
             request,
             None,
             full_thoughts,
         )
+
         _persist_conversation(
             db,
             model.model_name,
@@ -1283,8 +1541,17 @@ async def generate_stream():
             detected_tool_calls,
             full_thoughts,
         )
-        yield f"data: {orjson.dumps({**base_event, 'type': 'response.completed', 'response': payload.model_dump(mode='json')}).decode('utf-8')}\n\n"
-        yield f"data: {orjson.dumps({**base_event, 'type': 'response.done'})}\n\n"
+
+        yield f"data: {
+            orjson.dumps(
+                {
+                    **base_event,
+                    'type': 'response.completed',
+                    'response': payload.model_dump(mode='json'),
+                }
+            ).decode()
+        }\n\n"
+
         yield "data: [DONE]\n\n"
 
     return StreamingResponse(generate_stream(), media_type="text/event-stream")

From ab706d8c580e7c9778fda3be7a33e8f7da6016a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 28 Feb 2026 22:28:56 +0700
Subject: [PATCH 165/236] Update image storage configuration to use dynamic
 path from settings

---
 app/server/middleware.py | 2 +-
 app/utils/config.py      | 4 ++++
 config/config.yaml       | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/app/server/middleware.py b/app/server/middleware.py
index b5bc55b..2fa016b 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -13,7 +13,7 @@
 from app.utils import g_config
 
 # Persistent directory for storing generated images
-IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images"
+IMAGE_STORE_DIR = Path(g_config.storage.images_path)
 IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
 
 
diff --git a/app/utils/config.py b/app/utils/config.py
index 7371623..7dd55f4 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -164,6 +164,10 @@ class StorageConfig(BaseModel):
         default="data/lmdb",
         description="Path to the storage directory where data will be saved",
     )
+    images_path: str = Field(
+        default="data/images",
+        description="Path to the directory where generated images will be stored",
+    )
     max_size: int = Field(
         default=1024**2 * 256,  # 256 MB
         ge=1,
diff --git a/config/config.yaml b/config/config.yaml
index f38ef86..bd9fbc0 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -33,6 +33,7 @@ gemini:
 
 storage:
   path: "data/lmdb"        # Database storage path
+  images_path: "data/images" # Image storage path
   max_size: 268435456      # Maximum database size (256 MB)
   retention_days: 14       # Number of days to retain conversations before cleanup
 

From 74badb615c4a6629d5dc0081fea6eb357f9c8b94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 1 Mar 2026 11:09:01 +0700
Subject: [PATCH 166/236] Fix streaming response generation

---
 app/models/models.py |   1 +
 app/server/chat.py   | 738 ++++++++++++++++++++++---------------------
 2 files changed, 371 insertions(+), 368 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 8dbe86c..e310c54 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -245,6 +245,7 @@ class ResponseOutputMessage(BaseModel):
 
     id: str
     type: Literal["message"]
+    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
     role: Literal["assistant"]
     content: list[ResponseOutputContent]
 
diff --git a/app/server/chat.py b/app/server/chat.py
index 7d3a6cf..0a12c8c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -20,6 +20,8 @@
 
 from app.models import (
     ChatCompletionRequest,
+    ChatCompletionResponse,
+    Choice,
     ContentItem,
     ConversationInStore,
     Message,
@@ -34,12 +36,14 @@
     ResponseOutputContent,
     ResponseOutputMessage,
     ResponseReasoning,
-    ResponseReasoningContentPart,
+    ResponseSummaryPart,
     ResponseToolCall,
     ResponseToolChoice,
     ResponseUsage,
     Tool,
+    ToolCall,
     ToolChoiceFunction,
+    Usage,
 )
 from app.server.middleware import (
     get_image_store_dir,
@@ -165,16 +169,17 @@ def _create_responses_standard_payload(
     full_thoughts: str | None = None,
 ) -> ResponseCreateResponse:
     """Unified factory for building ResponseCreateResponse objects."""
-    message_id = f"msg_{uuid.uuid4().hex}"
-    reason_id = f"reason_{uuid.uuid4().hex}"
+    message_id = f"msg_{uuid.uuid4().hex[:24]}"
+    reason_id = f"rs_{uuid.uuid4().hex[:24]}"
 
     output_items: list[Any] = []
     if full_thoughts:
         output_items.append(
             ResponseReasoning(
                 id=reason_id,
+                type="reasoning",
                 status="completed",
-                content=[ResponseReasoningContentPart(text=full_thoughts)],
+                summary=[ResponseSummaryPart(type="summary_text", text=full_thoughts)],
             )
         )
 
@@ -182,6 +187,7 @@ def _create_responses_standard_payload(
         ResponseOutputMessage(
             id=message_id,
             type="message",
+            status="completed",
             role="assistant",
             content=response_contents,
         )
@@ -192,6 +198,7 @@ def _create_responses_standard_payload(
             [
                 ResponseToolCall(
                     id=call.id if hasattr(call, "id") else call["id"],
+                    type="tool_call",
                     status="completed",
                     function=call.function if hasattr(call, "function") else call["function"],
                 )
@@ -203,15 +210,16 @@ def _create_responses_standard_payload(
 
     return ResponseCreateResponse(
         id=response_id,
+        object="response",
         created_at=created_time,
         model=model_name,
         output=output_items,
         status="completed",
         usage=usage,
         input=normalized_input or None,
-        metadata=request.metadata or None,
-        tools=request.tools,
-        tool_choice=request.tool_choice,
+        metadata=request.metadata or {},
+        tools=request.tools or [],
+        tool_choice=request.tool_choice or "auto",
     )
 
 
@@ -224,27 +232,34 @@ def _create_chat_completion_standard_payload(
     finish_reason: str,
     usage: dict,
     reasoning_content: str | None = None,
-) -> dict:
-    """Unified factory for building Chat Completion response dictionaries."""
-    return {
-        "id": completion_id,
-        "object": "chat.completion",
-        "created": created_time,
-        "model": model_name,
-        "choices": [
-            {
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": visible_output or None,
-                    "tool_calls": tool_calls_payload or None,
-                    "reasoning_content": reasoning_content or None,
-                },
-                "finish_reason": finish_reason,
-            }
+) -> ChatCompletionResponse:
+    """Unified factory for building Chat Completion response objects."""
+    # Convert tool calls to Model objects if they are dicts
+    tool_calls = None
+    if tool_calls_payload:
+        tool_calls = [ToolCall.model_validate(tc) for tc in tool_calls_payload]
+
+    message = Message(
+        role="assistant",
+        content=visible_output or None,
+        tool_calls=tool_calls,
+        reasoning_content=reasoning_content or None,
+    )
+
+    return ChatCompletionResponse(
+        id=completion_id,
+        object="chat.completion",
+        created=created_time,
+        model=model_name,
+        choices=[
+            Choice(
+                index=0,
+                message=message,
+                finish_reason=finish_reason,
+            )
         ],
-        "usage": usage,
-    }
+        usage=Usage(**usage),
+    )
 
 
 def _process_llm_output(
@@ -994,7 +1009,6 @@ async def generate_stream():
         for out in all_outputs:
             if out.images:
                 for img in out.images:
-                    # Use the image URL as a stable identifier across chunks
                     if img.url not in seen_urls:
                         images.append(img)
                         seen_urls.add(img.url)
@@ -1006,7 +1020,6 @@ async def generate_stream():
                 image_store = get_image_store_dir()
                 _, _, _, fname, fhash = await _image_to_base64(image, image_store)
                 if fhash in seen_hashes:
-                    # Duplicate content, delete the file and skip
                     (image_store / fname).unlink(missing_ok=True)
                     continue
                 seen_hashes.add(fhash)
@@ -1019,7 +1032,6 @@ async def generate_stream():
         if image_markdown:
             assistant_text += image_markdown
             storage_output += image_markdown
-            # Send the image Markdown as a final text chunk before usage
             data = {
                 "id": completion_id,
                 "object": "chat.completion.chunk",
@@ -1050,12 +1062,12 @@ async def generate_stream():
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
             messages, assistant_text, tool_calls, full_thoughts
         )
-        usage = {
-            "prompt_tokens": p_tok,
-            "completion_tokens": c_tok,
-            "total_tokens": t_tok,
-            "completion_tokens_details": {"reasoning_tokens": r_tok},
-        }
+        usage = Usage(
+            prompt_tokens=p_tok,
+            completion_tokens=c_tok,
+            total_tokens=t_tok,
+            completion_tokens_details={"reasoning_tokens": r_tok},
+        )
         data = {
             "id": completion_id,
             "object": "chat.completion.chunk",
@@ -1064,7 +1076,7 @@ async def generate_stream():
             "choices": [
                 {"index": 0, "delta": {}, "finish_reason": "tool_calls" if tool_calls else "stop"}
             ],
-            "usage": usage,
+            "usage": usage.model_dump(mode="json"),
         }
         _persist_conversation(
             db,
@@ -1099,7 +1111,7 @@ def _create_responses_real_streaming_response(
 ) -> StreamingResponse:
     """
     Create a real-time streaming response for the Responses API.
-    Ensures final accumulated text and thoughts are synchronized.
+    Ensures final accumulated text and thoughts are synchronized and follow the formal event stream spec.
     """
     base_event = {
         "id": response_id,
@@ -1109,190 +1121,205 @@ def _create_responses_real_streaming_response(
     }
 
     async def generate_stream():
-        yield f"data: {
-            orjson.dumps(
-                {
-                    **base_event,
-                    'type': 'response.created',
-                    'response': {
-                        'id': response_id,
-                        'object': 'response',
-                        'created_at': created_time,
-                        'model': model_name,
-                        'status': 'in_progress',
-                        'metadata': request.metadata,
-                        'input': None,
-                        'tools': request.tools,
-                        'tool_choice': request.tool_choice,
-                    },
-                }
-            ).decode()
-        }\n\n"
-
-        full_thoughts = ""
-        full_text = ""
+        seq = 0
+
+        def make_event(etype: str, data: dict) -> str:
+            nonlocal seq
+            data["sequence_number"] = seq
+            seq += 1
+            return f"event: {etype}\ndata: {orjson.dumps(data).decode()}\n\n"
+
+        yield make_event(
+            "response.created",
+            {
+                **base_event,
+                "type": "response.created",
+                "response": {
+                    "id": response_id,
+                    "object": "response",
+                    "created_at": created_time,
+                    "model": model_name,
+                    "status": "in_progress",
+                    "metadata": request.metadata or {},
+                    "input": None,
+                    "tools": request.tools or [],
+                    "tool_choice": request.tool_choice or "auto",
+                    "output": [],
+                    "usage": None,
+                },
+            },
+        )
+
+        yield make_event(
+            "response.in_progress",
+            {
+                **base_event,
+                "type": "response.in_progress",
+                "response": {
+                    "id": response_id,
+                    "object": "response",
+                    "created_at": created_time,
+                    "model": model_name,
+                    "status": "in_progress",
+                    "metadata": request.metadata or {},
+                    "output": [],
+                },
+            },
+        )
+
+        full_thoughts, full_text = "", ""
         all_outputs: list[ModelOutput] = []
 
-        thought_item_id = f"reason_{uuid.uuid4().hex}"
-        message_item_id = f"msg_{uuid.uuid4().hex}"
+        thought_item_id = f"rs_{uuid.uuid4().hex[:24]}"
+        message_item_id = f"msg_{uuid.uuid4().hex[:24]}"
 
-        thought_open = False
-        message_open = False
+        thought_open, message_open = False, False
         current_index = 0
-
         suppressor = StreamingOutputFilter()
 
         try:
             async for chunk in generator:
                 all_outputs.append(chunk)
+
                 if chunk.thoughts_delta:
                     if not thought_open:
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.output_item.added',
-                                    'output_index': current_index,
-                                    'item': {
-                                        'id': thought_item_id,
-                                        'type': 'reasoning',
-                                        'status': 'in_progress',
-                                        'content': [],
-                                    },
-                                }
-                            ).decode()
-                        }\n\n"
-
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.content_part.added',
-                                    'output_index': current_index,
-                                    'part_index': 0,
-                                    'part': {'type': 'reasoning_text', 'text': ''},
-                                }
-                            ).decode()
-                        }\n\n"
+                        yield make_event(
+                            "response.output_item.added",
+                            {
+                                **base_event,
+                                "type": "response.output_item.added",
+                                "output_index": current_index,
+                                "item": ResponseReasoning(
+                                    id=thought_item_id,
+                                    type="reasoning",
+                                    status="in_progress",
+                                    summary=[],
+                                ).model_dump(mode="json"),
+                            },
+                        )
 
+                        yield make_event(
+                            "response.reasoning_summary_part.added",
+                            {
+                                **base_event,
+                                "type": "response.reasoning_summary_part.added",
+                                "item_id": thought_item_id,
+                                "output_index": current_index,
+                                "summary_index": 0,
+                                "part": ResponseSummaryPart(text="").model_dump(mode="json"),
+                            },
+                        )
                         thought_open = True
 
                     full_thoughts += chunk.thoughts_delta
-
-                    yield f"data: {
-                        orjson.dumps(
-                            {
-                                **base_event,
-                                'type': 'response.output_text.delta',
-                                'output_index': current_index,
-                                'part_index': 0,
-                                'delta': chunk.thoughts_delta,
-                            }
-                        ).decode()
-                    }\n\n"
+                    yield make_event(
+                        "response.reasoning_summary_text.delta",
+                        {
+                            **base_event,
+                            "type": "response.reasoning_summary_text.delta",
+                            "item_id": thought_item_id,
+                            "output_index": current_index,
+                            "summary_index": 0,
+                            "delta": chunk.thoughts_delta,
+                        },
+                    )
 
                 if chunk.text_delta:
                     if thought_open:
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.output_text.done',
-                                    'output_index': current_index,
-                                    'part_index': 0,
-                                }
-                            ).decode()
-                        }\n\n"
-
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.content_part.done',
-                                    'output_index': current_index,
-                                    'part_index': 0,
-                                }
-                            ).decode()
-                        }\n\n"
-
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.output_item.done',
-                                    'output_index': current_index,
-                                    'item': {
-                                        'id': thought_item_id,
-                                        'type': 'reasoning',
-                                        'status': 'completed',
-                                        'content': [
-                                            {'type': 'reasoning_text', 'text': full_thoughts}
-                                        ],
-                                    },
-                                }
-                            ).decode()
-                        }\n\n"
-
+                        yield make_event(
+                            "response.reasoning_summary_text.done",
+                            {
+                                **base_event,
+                                "type": "response.reasoning_summary_text.done",
+                                "item_id": thought_item_id,
+                                "output_index": current_index,
+                                "summary_index": 0,
+                                "text": full_thoughts,
+                            },
+                        )
+                        yield make_event(
+                            "response.reasoning_summary_part.done",
+                            {
+                                **base_event,
+                                "type": "response.reasoning_summary_part.done",
+                                "item_id": thought_item_id,
+                                "output_index": current_index,
+                                "summary_index": 0,
+                                "part": ResponseSummaryPart(text=full_thoughts).model_dump(
+                                    mode="json"
+                                ),
+                            },
+                        )
+                        yield make_event(
+                            "response.output_item.done",
+                            {
+                                **base_event,
+                                "type": "response.output_item.done",
+                                "output_index": current_index,
+                                "item": ResponseReasoning(
+                                    id=thought_item_id,
+                                    type="reasoning",
+                                    status="completed",
+                                    summary=[ResponseSummaryPart(text=full_thoughts)],
+                                ).model_dump(mode="json"),
+                            },
+                        )
                         current_index += 1
                         thought_open = False
 
                     if not message_open:
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.output_item.added',
-                                    'output_index': current_index,
-                                    'item': {
-                                        'id': message_item_id,
-                                        'type': 'message',
-                                        'role': 'assistant',
-                                        'content': [],
-                                    },
-                                }
-                            ).decode()
-                        }\n\n"
-
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.content_part.added',
-                                    'output_index': current_index,
-                                    'part_index': 0,
-                                    'part': {'type': 'output_text', 'text': ''},
-                                }
-                            ).decode()
-                        }\n\n"
+                        yield make_event(
+                            "response.output_item.added",
+                            {
+                                **base_event,
+                                "type": "response.output_item.added",
+                                "output_index": current_index,
+                                "item": ResponseOutputMessage(
+                                    id=message_item_id,
+                                    type="message",
+                                    status="in_progress",
+                                    role="assistant",
+                                    content=[],
+                                ).model_dump(mode="json"),
+                            },
+                        )
 
+                        yield make_event(
+                            "response.content_part.added",
+                            {
+                                **base_event,
+                                "type": "response.content_part.added",
+                                "item_id": message_item_id,
+                                "output_index": current_index,
+                                "content_index": 0,
+                                "part": ResponseOutputContent(
+                                    type="output_text", text=""
+                                ).model_dump(mode="json"),
+                            },
+                        )
                         message_open = True
 
                     full_text += chunk.text_delta
-
                     if visible := suppressor.process(chunk.text_delta):
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.output_text.delta',
-                                    'output_index': current_index,
-                                    'part_index': 0,
-                                    'delta': visible,
-                                }
-                            ).decode()
-                        }\n\n"
+                        yield make_event(
+                            "response.output_text.delta",
+                            {
+                                **base_event,
+                                "type": "response.output_text.delta",
+                                "item_id": message_item_id,
+                                "output_index": current_index,
+                                "content_index": 0,
+                                "delta": visible,
+                                "logprobs": [],
+                            },
+                        )
 
         except Exception:
             logger.exception("Responses streaming error")
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'error',
-                        'error': {'message': 'Streaming error.'},
-                    }
-                ).decode()
-            }\n\n"
+            yield make_event(
+                "error",
+                {**base_event, "type": "error", "error": {"message": "Streaming error."}},
+            )
             return
 
         if all_outputs:
@@ -1302,106 +1329,105 @@ async def generate_stream():
             if last.thoughts:
                 full_thoughts = last.thoughts
 
-        if thought_open:
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.output_text.done',
-                        'output_index': current_index,
-                        'part_index': 0,
-                    }
-                ).decode()
-            }\n\n"
-
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.content_part.done',
-                        'output_index': current_index,
-                        'part_index': 0,
-                    }
-                ).decode()
-            }\n\n"
-
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.output_item.done',
-                        'output_index': current_index,
-                        'item': {
-                            'id': thought_item_id,
-                            'type': 'reasoning',
-                            'status': 'completed',
-                            'content': [{'type': 'reasoning_text', 'text': full_thoughts}],
-                        },
-                    }
-                ).decode()
-            }\n\n"
-
-            current_index += 1
-
         remaining = suppressor.flush()
         if remaining and message_open:
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.output_text.delta',
-                        'output_index': current_index,
-                        'part_index': 0,
-                        'delta': remaining,
-                    }
-                ).decode()
-            }\n\n"
-
-        if message_open:
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.output_text.done',
-                        'output_index': current_index,
-                        'part_index': 0,
-                    }
-                ).decode()
-            }\n\n"
-
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.content_part.done',
-                        'output_index': current_index,
-                        'part_index': 0,
-                    }
-                ).decode()
-            }\n\n"
-
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.output_item.done',
-                        'output_index': current_index,
-                        'item': {
-                            'id': message_item_id,
-                            'type': 'message',
-                            'role': 'assistant',
-                            'content': [{'type': 'output_text', 'text': full_text}],
-                        },
-                    }
-                ).decode()
-            }\n\n"
+            yield make_event(
+                "response.output_text.delta",
+                {
+                    **base_event,
+                    "type": "response.output_text.delta",
+                    "item_id": message_item_id,
+                    "output_index": current_index,
+                    "content_index": 0,
+                    "delta": remaining,
+                    "logprobs": [],
+                },
+            )
 
+        if thought_open:
+            yield make_event(
+                "response.reasoning_summary_text.done",
+                {
+                    **base_event,
+                    "type": "response.reasoning_summary_text.done",
+                    "item_id": thought_item_id,
+                    "output_index": current_index,
+                    "summary_index": 0,
+                    "text": full_thoughts,
+                },
+            )
+            yield make_event(
+                "response.reasoning_summary_part.done",
+                {
+                    **base_event,
+                    "type": "response.reasoning_summary_part.done",
+                    "item_id": thought_item_id,
+                    "output_index": current_index,
+                    "summary_index": 0,
+                    "part": ResponseSummaryPart(text=full_thoughts).model_dump(mode="json"),
+                },
+            )
+            yield make_event(
+                "response.output_item.done",
+                {
+                    **base_event,
+                    "type": "response.output_item.done",
+                    "output_index": current_index,
+                    "item": ResponseReasoning(
+                        id=thought_item_id,
+                        type="reasoning",
+                        status="completed",
+                        summary=[ResponseSummaryPart(text=full_thoughts)],
+                    ).model_dump(mode="json"),
+                },
+            )
             current_index += 1
 
         _thoughts, assistant_text, storage_output, detected_tool_calls = _process_llm_output(
             full_thoughts, full_text, structured_requirement
         )
 
+        if message_open:
+            yield make_event(
+                "response.output_text.done",
+                {
+                    **base_event,
+                    "type": "response.output_text.done",
+                    "item_id": message_item_id,
+                    "output_index": current_index,
+                    "content_index": 0,
+                },
+            )
+            yield make_event(
+                "response.content_part.done",
+                {
+                    **base_event,
+                    "type": "response.content_part.done",
+                    "item_id": message_item_id,
+                    "output_index": current_index,
+                    "content_index": 0,
+                    "part": ResponseOutputContent(
+                        type="output_text", text=assistant_text
+                    ).model_dump(mode="json"),
+                },
+            )
+            yield make_event(
+                "response.output_item.done",
+                {
+                    **base_event,
+                    "type": "response.output_item.done",
+                    "output_index": current_index,
+                    "item": ResponseOutputMessage(
+                        id=message_item_id,
+                        type="message",
+                        status="completed",
+                        role="assistant",
+                        content=[ResponseOutputContent(type="output_text", text=assistant_text)],
+                    ).model_dump(mode="json"),
+                },
+            )
+            current_index += 1
+
         image_items: list[ResponseImageGenerationCall] = []
         final_response_contents: list[ResponseOutputContent] = []
         seen_hashes = set()
@@ -1415,11 +1441,9 @@ async def generate_stream():
                             continue
                         seen_hashes.add(fhash)
 
-                        if "." in fname:
-                            img_id, fmt = fname.rsplit(".", 1)
-                        else:
-                            img_id = fname
-                            fmt = "png"
+                        parts = fname.rsplit(".", 1)
+                        img_id = parts[0]
+                        fmt = parts[1] if len(parts) > 1 else "png"
 
                         img_item = ResponseImageGenerationCall(
                             id=img_id,
@@ -1435,89 +1459,67 @@ async def generate_stream():
                             ResponseOutputContent(type="output_text", text=image_url)
                         )
 
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.output_item.added',
-                                    'output_index': current_index,
-                                    'item': img_item.model_dump(mode='json'),
-                                }
-                            ).decode()
-                        }\n\n"
-
-                        yield f"data: {
-                            orjson.dumps(
-                                {
-                                    **base_event,
-                                    'type': 'response.output_item.done',
-                                    'output_index': current_index,
-                                    'item': img_item.model_dump(mode='json'),
-                                }
-                            ).decode()
-                        }\n\n"
+                        yield make_event(
+                            "response.output_item.added",
+                            {
+                                **base_event,
+                                "type": "response.output_item.added",
+                                "output_index": current_index,
+                                "item": img_item.model_dump(mode="json"),
+                            },
+                        )
 
+                        yield make_event(
+                            "response.output_item.done",
+                            {
+                                **base_event,
+                                "type": "response.output_item.done",
+                                "output_index": current_index,
+                                "item": img_item.model_dump(mode="json"),
+                            },
+                        )
                         current_index += 1
                         image_items.append(img_item)
-
+                        storage_output += f"\n\n{image_url}"
                     except Exception:
-                        logger.warning("Image processing failed")
-
-        if assistant_text:
-            final_response_contents.append(
-                ResponseOutputContent(type="output_text", text=assistant_text)
-            )
-
-        if not final_response_contents:
-            final_response_contents.append(ResponseOutputContent(type="output_text", text=""))
-
-        image_markdown = ""
-        for img_call in image_items:
-            fname = f"{img_call.id}.{img_call.output_format}"
-            img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
-            image_markdown += f"\n\n{img_url}"
-
-        if image_markdown:
-            storage_output += image_markdown
+                        logger.warning("Image processing failed in stream")
 
         for call in detected_tool_calls:
             tc_item = ResponseToolCall(id=call.id, status="completed", function=call.function)
-
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.output_item.added',
-                        'output_index': current_index,
-                        'item': tc_item.model_dump(mode='json'),
-                    }
-                ).decode()
-            }\n\n"
-
-            yield f"data: {
-                orjson.dumps(
-                    {
-                        **base_event,
-                        'type': 'response.output_item.done',
-                        'output_index': current_index,
-                        'item': tc_item.model_dump(mode='json'),
-                    }
-                ).decode()
-            }\n\n"
-
+            yield make_event(
+                "response.output_item.added",
+                {
+                    **base_event,
+                    "type": "response.output_item.added",
+                    "output_index": current_index,
+                    "item": tc_item.model_dump(mode="json"),
+                },
+            )
+            yield make_event(
+                "response.output_item.done",
+                {
+                    **base_event,
+                    "type": "response.output_item.done",
+                    "output_index": current_index,
+                    "item": tc_item.model_dump(mode="json"),
+                },
+            )
             current_index += 1
 
+        if assistant_text:
+            final_response_contents.insert(
+                0, ResponseOutputContent(type="output_text", text=assistant_text)
+            )
+
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
             messages, assistant_text, detected_tool_calls, full_thoughts
         )
-
         usage = ResponseUsage(
             input_tokens=p_tok,
             output_tokens=c_tok,
             total_tokens=t_tok,
             output_tokens_details={"reasoning_tokens": r_tok},
         )
-
         payload = _create_responses_standard_payload(
             response_id,
             created_time,
@@ -1530,7 +1532,6 @@ async def generate_stream():
             None,
             full_thoughts,
         )
-
         _persist_conversation(
             db,
             model.model_name,
@@ -1542,15 +1543,14 @@ async def generate_stream():
             full_thoughts,
         )
 
-        yield f"data: {
-            orjson.dumps(
-                {
-                    **base_event,
-                    'type': 'response.completed',
-                    'response': payload.model_dump(mode='json'),
-                }
-            ).decode()
-        }\n\n"
+        yield make_event(
+            "response.completed",
+            {
+                **base_event,
+                "type": "response.completed",
+                "response": payload.model_dump(mode="json"),
+            },
+        )
 
         yield "data: [DONE]\n\n"
 
@@ -1862,11 +1862,13 @@ async def create_response(
                 continue
             seen_hashes.add(fhash)
 
-            if "." in fname:
-                img_id, img_format = fname.rsplit(".", 1)
-            else:
-                img_id = fname
-                img_format = "png" if isinstance(img, GeneratedImage) else "jpeg"
+            parts = fname.rsplit(".", 1)
+            img_id = parts[0]
+            img_format = (
+                parts[1]
+                if len(parts) > 1
+                else ("png" if isinstance(img, GeneratedImage) else "jpeg")
+            )
 
             contents.append(
                 ResponseOutputContent(

From 2dec77c302c644e2cc62adc3f8f40b357970f123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 1 Mar 2026 11:42:09 +0700
Subject: [PATCH 167/236] Fix streaming response generation

---
 app/models/__init__.py |  4 ++++
 app/models/models.py   | 29 ++++++++++++++++++++++-------
 app/server/chat.py     |  8 ++++++++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/app/models/__init__.py b/app/models/__init__.py
index 1378f1f..7f95131 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -20,6 +20,8 @@
     ResponseReasoning,
     ResponseReasoningContentPart,
     ResponseSummaryPart,
+    ResponseTextConfig,
+    ResponseTextFormat,
     ResponseToolCall,
     ResponseToolChoice,
     ResponseUsage,
@@ -53,6 +55,8 @@
     "ResponseReasoning",
     "ResponseReasoningContentPart",
     "ResponseSummaryPart",
+    "ResponseTextConfig",
+    "ResponseTextFormat",
     "ResponseToolCall",
     "ResponseToolChoice",
     "ResponseUsage",
diff --git a/app/models/models.py b/app/models/models.py
index e310c54..fccfba1 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -228,8 +228,8 @@ class ResponseUsage(BaseModel):
     input_tokens: int
     output_tokens: int
     total_tokens: int
-    input_tokens_details: dict[str, int] | None = Field(default=None)
-    output_tokens_details: dict[str, int] | None = Field(default=None)
+    input_tokens_details: dict[str, Any] = Field(default_factory=lambda: {"cached_tokens": 0})
+    output_tokens_details: dict[str, Any] = Field(default_factory=lambda: {"reasoning_tokens": 0})
 
 
 class ResponseOutputContent(BaseModel):
@@ -238,6 +238,7 @@ class ResponseOutputContent(BaseModel):
     type: Literal["output_text"]
     text: str | None = Field(default="")
     annotations: list[dict[str, Any]] = Field(default_factory=list)
+    logprobs: list[dict[str, Any]] | None = Field(default=None)
 
 
 class ResponseOutputMessage(BaseModel):
@@ -269,7 +270,7 @@ class ResponseReasoning(BaseModel):
 
     id: str
     type: Literal["reasoning"] = Field(default="reasoning")
-    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(default=None)
     summary: list[ResponseSummaryPart] | None = Field(default=None)
     content: list[ResponseReasoningContentPart] | None = Field(default=None)
 
@@ -297,12 +298,25 @@ class ResponseToolCall(BaseModel):
     function: FunctionCall
 
 
+class ResponseTextFormat(BaseModel):
+    """Text format configuration for Responses API."""
+
+    type: Literal["text", "json_schema"] = Field(default="text")
+
+
+class ResponseTextConfig(BaseModel):
+    """Text configuration for Responses API."""
+
+    format: ResponseTextFormat = Field(default_factory=ResponseTextFormat)
+
+
 class ResponseCreateResponse(BaseModel):
     """Responses API response payload."""
 
     id: str
     object: Literal["response"] = Field(default="response")
     created_at: int
+    completed_at: int | None = Field(default=None)
     model: str
     output: list[
         ResponseReasoning | ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall
@@ -315,12 +329,13 @@ class ResponseCreateResponse(BaseModel):
         "cancelled",
         "requires_action",
     ] = Field(default="completed")
-    tool_choice: str | ResponseToolChoice | None = Field(default=None)
-    tools: list[Tool | ResponseImageTool] | None = Field(default=None)
-    usage: ResponseUsage
+    tool_choice: str | ToolChoiceFunction | ResponseToolChoice = Field(default="auto")
+    tools: list[Tool | ResponseImageTool] = Field(default_factory=list)
+    usage: ResponseUsage | None = Field(default=None)
     error: dict[str, Any] | None = Field(default=None)
-    metadata: dict[str, Any] | None = Field(default=None)
+    metadata: dict[str, Any] = Field(default_factory=dict)
     input: str | list[ResponseInputItem] | None = Field(default=None)
+    text: ResponseTextConfig | None = Field(default_factory=ResponseTextConfig)
 
 
 # Rebuild models with forward references
diff --git a/app/server/chat.py b/app/server/chat.py
index 0a12c8c..8d8be91 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -37,6 +37,7 @@
     ResponseOutputMessage,
     ResponseReasoning,
     ResponseSummaryPart,
+    ResponseTextConfig,
     ResponseToolCall,
     ResponseToolChoice,
     ResponseUsage,
@@ -171,6 +172,7 @@ def _create_responses_standard_payload(
     """Unified factory for building ResponseCreateResponse objects."""
     message_id = f"msg_{uuid.uuid4().hex[:24]}"
     reason_id = f"rs_{uuid.uuid4().hex[:24]}"
+    now_ts = int(datetime.now(tz=UTC).timestamp())
 
     output_items: list[Any] = []
     if full_thoughts:
@@ -208,10 +210,15 @@ def _create_responses_standard_payload(
 
     output_items.extend(image_call_items)
 
+    text_config = ResponseTextConfig()
+    if request.response_format and request.response_format.get("type") == "json_schema":
+        text_config.format.type = "json_schema"
+
     return ResponseCreateResponse(
         id=response_id,
         object="response",
         created_at=created_time,
+        completed_at=now_ts,
         model=model_name,
         output=output_items,
         status="completed",
@@ -220,6 +227,7 @@ def _create_responses_standard_payload(
         metadata=request.metadata or {},
         tools=request.tools or [],
         tool_choice=request.tool_choice or "auto",
+        text=text_config,
     )
 
 

From e4da69e5138866960ee27669f431306279446752 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 1 Mar 2026 18:22:57 +0700
Subject: [PATCH 168/236] Remove optional custom cookies parameter

---
 app/services/pool.py | 25 ++++++-------------------
 app/utils/config.py  | 13 -------------
 pyproject.toml       |  2 +-
 uv.lock              |  8 ++++----
 4 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/app/services/pool.py b/app/services/pool.py
index 3c26e3d..3b4197c 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -1,8 +1,6 @@
 import asyncio
-import inspect
 from collections import deque
 
-from gemini_webapi import GeminiClient
 from loguru import logger
 
 from app.utils import g_config
@@ -24,23 +22,12 @@ def __init__(self) -> None:
             raise ValueError("No Gemini clients configured")
 
         for c in g_config.gemini.clients:
-            kwargs = {
-                "client_id": c.id,
-                "secure_1psid": c.secure_1psid,
-                "secure_1psidts": c.secure_1psidts,
-                "proxy": c.proxy,
-            }
-            if c.cookies:
-                sig = inspect.signature(GeminiClient.__init__)
-                if "cookies" in sig.parameters:
-                    kwargs["cookies"] = c.cookies
-                else:
-                    logger.debug(
-                        f"Ignoring 'cookies' in config for client {c.id} because "
-                        "the current version of gemini_webapi doesn't support it."
-                    )
-
-            client = GeminiClientWrapper(**kwargs)
+            client = GeminiClientWrapper(
+                client_id=c.id,
+                secure_1psid=c.secure_1psid,
+                secure_1psidts=c.secure_1psidts,
+                proxy=c.proxy,
+            )
             self._clients.append(client)
             self._id_map[c.id] = client
             self._round_robin.append(client)
diff --git a/app/utils/config.py b/app/utils/config.py
index 7dd55f4..69af2e1 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -42,9 +42,6 @@ class GeminiClientSettings(BaseModel):
     secure_1psid: str = Field(..., description="Gemini Secure 1PSID")
     secure_1psidts: str = Field(..., description="Gemini Secure 1PSIDTS")
     proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client")
-    cookies: dict[str, str] | None = Field(
-        default=None, description="Optional custom cookies for this Gemini client"
-    )
 
     @field_validator("proxy", mode="before")
     @classmethod
@@ -54,16 +51,6 @@ def _blank_proxy_to_none(cls, value: str | None) -> str | None:
         stripped = value.strip()
         return stripped or None
 
-    @field_validator("cookies", mode="before")
-    @classmethod
-    def _parse_cookies(cls, v: Any) -> Any:
-        if isinstance(v, str) and v.strip().startswith("{"):
-            try:
-                return orjson.loads(v)
-            except orjson.JSONDecodeError:
-                pass
-        return v
-
 
 class GeminiModelConfig(BaseModel):
     """Configuration for a custom Gemini model."""
diff --git a/pyproject.toml b/pyproject.toml
index 8638b03..90a2d5c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.134.0",
+    "fastapi>=0.135.0",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
diff --git a/uv.lock b/uv.lock
index 8fb1f99..470045b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.134.0"
+version = "0.135.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/96/15/647ea81cb73b55b48fb095158a9cd64e42e9e4f1d34dbb5cc4a4939779d6/fastapi-0.134.0.tar.gz", hash = "sha256:3122b1ea0dbeaab48b5976e80b99ca7eda02be154bf03e126a33220e73255a9a", size = 385667, upload-time = "2026-02-27T21:18:12.931Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/b5/386a9579a299a32365b34097e4eac6a0544ce0d7aa4bb95ce0d71607a999/fastapi-0.135.0.tar.gz", hash = "sha256:bd37903acf014d1284bda027096e460814dca9699f9dacfe11c275749d949f4d", size = 393855, upload-time = "2026-03-01T09:28:46.714Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e3/e6/fd49c28a54b7d6f5c64045155e40f6cff9ed4920055043fb5ac7969f7f2f/fastapi-0.134.0-py3-none-any.whl", hash = "sha256:f4e7214f24b2262258492e05c48cf21125e4ffc427e30dd32fb4f74049a3d56a", size = 110404, upload-time = "2026-02-27T21:18:10.809Z" },
+    { url = "https://files.pythonhosted.org/packages/56/38/fa5dd0e677e1e2e38f858933c4a125e80103e551151f1f661dd4f227210d/fastapi-0.135.0-py3-none-any.whl", hash = "sha256:31e2ddc78d6406c6f7d5d7b9996a057985e2600fbe7e9ba6ace8205d48dff688", size = 114496, upload-time = "2026-03-01T09:28:48.162Z" },
 ]
 
 [[package]]
@@ -155,7 +155,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
-    { name = "fastapi", specifier = ">=0.133.1" },
+    { name = "fastapi", specifier = ">=0.135.0" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },

From 54dd826542db2cdb54e50d7329547eff0940df34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 2 Mar 2026 23:45:01 +0700
Subject: [PATCH 169/236] Change `watchdog_timeout` to 90s

---
 app/utils/config.py |  2 +-
 config/config.yaml  |  2 +-
 pyproject.toml      |  2 +-
 uv.lock             | 14 +++++++-------
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index 69af2e1..fec8eee 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -83,7 +83,7 @@ class GeminiConfig(BaseModel):
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
     timeout: int = Field(default=600, ge=30, description="Init timeout in seconds")
-    watchdog_timeout: int = Field(default=300, ge=30, description="Watchdog timeout in seconds")
+    watchdog_timeout: int = Field(default=90, ge=30, description="Watchdog timeout in seconds")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
         default=600,
diff --git a/config/config.yaml b/config/config.yaml
index bd9fbc0..71301d0 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -23,7 +23,7 @@ gemini:
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
   timeout: 600             # Init timeout in seconds (Not less than 30s)
-  watchdog_timeout: 300    # Watchdog timeout in seconds (Not less than 30s)
+  watchdog_timeout: 90     # Watchdog timeout in seconds (Not less than 30s)
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 600    # Refresh interval in seconds (Not less than 60s)
   verbose: false           # Enable verbose logging for Gemini requests
diff --git a/pyproject.toml b/pyproject.toml
index 90a2d5c..83a1b87 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.135.0",
+    "fastapi>=0.135.1",
     "gemini-webapi>=1.19.2",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
diff --git a/uv.lock b/uv.lock
index 470045b..fd1a7df 100644
--- a/uv.lock
+++ b/uv.lock
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.135.0"
+version = "0.135.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a2/b5/386a9579a299a32365b34097e4eac6a0544ce0d7aa4bb95ce0d71607a999/fastapi-0.135.0.tar.gz", hash = "sha256:bd37903acf014d1284bda027096e460814dca9699f9dacfe11c275749d949f4d", size = 393855, upload-time = "2026-03-01T09:28:46.714Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/7b/f8e0211e9380f7195ba3f3d40c292594fd81ba8ec4629e3854c353aaca45/fastapi-0.135.1.tar.gz", hash = "sha256:d04115b508d936d254cea545b7312ecaa58a7b3a0f84952535b4c9afae7668cd", size = 394962, upload-time = "2026-03-01T18:18:29.369Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/38/fa5dd0e677e1e2e38f858933c4a125e80103e551151f1f661dd4f227210d/fastapi-0.135.0-py3-none-any.whl", hash = "sha256:31e2ddc78d6406c6f7d5d7b9996a057985e2600fbe7e9ba6ace8205d48dff688", size = 114496, upload-time = "2026-03-01T09:28:48.162Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/72/42e900510195b23a56bde950d26a51f8b723846bfcaa0286e90287f0422b/fastapi-0.135.1-py3-none-any.whl", hash = "sha256:46e2fc5745924b7c840f71ddd277382af29ce1cdb7d5eab5bf697e3fb9999c9e", size = 116999, upload-time = "2026-03-01T18:18:30.831Z" },
 ]
 
 [[package]]
@@ -155,7 +155,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
-    { name = "fastapi", specifier = ">=0.135.0" },
+    { name = "fastapi", specifier = ">=0.135.1" },
     { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
@@ -457,11 +457,11 @@ wheels = [
 
 [[package]]
 name = "python-dotenv"
-version = "1.2.1"
+version = "1.2.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" },
 ]
 
 [[package]]

From 91a64cdc8efd66bad42e204950582b86b4e4b32a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Mar 2026 21:57:03 +0700
Subject: [PATCH 170/236] Refactored Models to improving type safety.

---
 app/models/__init__.py   |  92 +++++---
 app/models/models.py     | 458 +++++++++++++++++++++++++--------------
 app/server/chat.py       | 443 ++++++++++++++++++++++---------------
 app/server/middleware.py |   2 +-
 app/services/client.py   |  35 +--
 app/services/lmdb.py     | 124 +++++++----
 app/utils/config.py      |   2 +-
 app/utils/helper.py      |  14 +-
 scripts/dump_lmdb.py     |   5 +-
 9 files changed, 732 insertions(+), 443 deletions(-)

diff --git a/app/models/__init__.py b/app/models/__init__.py
index 7f95131..1e02dcb 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -1,69 +1,93 @@
 from .models import (
+    ChatCompletionAssistantContentItem,
+    ChatCompletionChoice,
+    ChatCompletionContentItem,
+    ChatCompletionFunctionTool,
+    ChatCompletionMessage,
+    ChatCompletionMessageToolCall,
+    ChatCompletionNamedToolChoice,
+    ChatCompletionNamedToolChoiceFunction,
     ChatCompletionRequest,
+    ChatCompletionRequestContentItem,
     ChatCompletionResponse,
-    Choice,
-    ContentItem,
+    CompletionUsage,
     ConversationInStore,
     FunctionCall,
+    FunctionCallOutput,
+    FunctionDefinition,
+    FunctionTool,
     HealthCheckResponse,
-    Message,
+    ImageGeneration,
+    ImageGenerationCall,
     ModelData,
     ModelListResponse,
+    ReasoningTextContent,
     ResponseCreateRequest,
     ResponseCreateResponse,
-    ResponseImageGenerationCall,
-    ResponseImageTool,
-    ResponseInputContent,
-    ResponseInputItem,
+    ResponseFormatText,
+    ResponseFormatTextJSONSchemaConfig,
+    ResponseFunctionToolCall,
+    ResponseInputFile,
+    ResponseInputImage,
+    ResponseInputMessage,
+    ResponseInputMessageContentList,
+    ResponseInputText,
     ResponseOutputContent,
     ResponseOutputMessage,
-    ResponseReasoning,
-    ResponseReasoningContentPart,
-    ResponseSummaryPart,
+    ResponseOutputRefusal,
+    ResponseOutputText,
+    ResponseReasoningItem,
     ResponseTextConfig,
-    ResponseTextFormat,
     ResponseToolCall,
-    ResponseToolChoice,
     ResponseUsage,
-    Tool,
-    ToolCall,
+    SummaryTextContent,
     ToolChoiceFunction,
-    ToolChoiceFunctionDetail,
-    ToolFunctionDefinition,
-    Usage,
+    ToolChoiceTypes,
 )
 
 __all__ = [
+    "ChatCompletionAssistantContentItem",
+    "ChatCompletionChoice",
+    "ChatCompletionContentItem",
+    "ChatCompletionFunctionTool",
+    "ChatCompletionMessage",
+    "ChatCompletionMessageToolCall",
+    "ChatCompletionNamedToolChoice",
+    "ChatCompletionNamedToolChoiceFunction",
     "ChatCompletionRequest",
+    "ChatCompletionRequestContentItem",
     "ChatCompletionResponse",
-    "Choice",
-    "ContentItem",
+    "CompletionUsage",
     "ConversationInStore",
     "FunctionCall",
+    "FunctionCallOutput",
+    "FunctionDefinition",
+    "FunctionTool",
     "HealthCheckResponse",
-    "Message",
+    "ImageGeneration",
+    "ImageGenerationCall",
     "ModelData",
     "ModelListResponse",
+    "ReasoningTextContent",
     "ResponseCreateRequest",
     "ResponseCreateResponse",
-    "ResponseImageGenerationCall",
-    "ResponseImageTool",
-    "ResponseInputContent",
-    "ResponseInputItem",
+    "ResponseFormatText",
+    "ResponseFormatTextJSONSchemaConfig",
+    "ResponseFunctionToolCall",
+    "ResponseInputFile",
+    "ResponseInputImage",
+    "ResponseInputMessage",
+    "ResponseInputMessageContentList",
+    "ResponseInputText",
     "ResponseOutputContent",
     "ResponseOutputMessage",
-    "ResponseReasoning",
-    "ResponseReasoningContentPart",
-    "ResponseSummaryPart",
+    "ResponseOutputRefusal",
+    "ResponseOutputText",
+    "ResponseReasoningItem",
     "ResponseTextConfig",
-    "ResponseTextFormat",
     "ResponseToolCall",
-    "ResponseToolChoice",
     "ResponseUsage",
-    "Tool",
-    "ToolCall",
+    "SummaryTextContent",
     "ToolChoiceFunction",
-    "ToolChoiceFunctionDetail",
-    "ToolFunctionDefinition",
-    "Usage",
+    "ToolChoiceTypes",
 ]
diff --git a/app/models/models.py b/app/models/models.py
index fccfba1..7284b5f 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -6,92 +6,99 @@
 from pydantic import BaseModel, Field, model_validator
 
 
-class ContentItem(BaseModel):
-    """Individual content item (text, image, or file) within a message."""
+class FunctionCall(BaseModel):
+    """Executed function call payload."""
+
+    name: str
+    arguments: str
+
+
+class FunctionDefinition(BaseModel):
+    """Schema of a callable function exposed to the model."""
+
+    name: str
+    description: str | None = Field(default=None)
+    parameters: dict[str, Any] | None = Field(default=None)
+
+
+class ChatCompletionRequestContentItem(BaseModel):
+    """Content item for user / system / tool messages."""
 
     type: Literal["text", "image_url", "file", "input_audio"]
     text: str | None = Field(default=None)
     image_url: dict[str, Any] | None = Field(default=None)
     input_audio: dict[str, Any] | None = Field(default=None)
     file: dict[str, Any] | None = Field(default=None)
-    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
-class Message(BaseModel):
-    """Message model"""
+class ChatCompletionAssistantContentItem(BaseModel):
+    """Content item for assistant messages.
 
-    role: str
-    content: str | list[ContentItem] | None = Field(default=None)
-    name: str | None = Field(default=None)
-    tool_calls: list[ToolCall] | None = Field(default=None)
-    tool_call_id: str | None = Field(default=None)
+    ``refusal`` is an official OpenAI content part.
+    ``reasoning`` is a community extension used to persist chain-of-thought
+    text for reusable-session matching.
+    """
+
+    type: Literal["text", "refusal", "reasoning"]
+    text: str | None = Field(default=None)
     refusal: str | None = Field(default=None)
-    reasoning_content: str | None = Field(default=None)
-    audio: dict[str, Any] | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
 
-    @model_validator(mode="after")
-    def normalize_role(self) -> Message:
-        """Normalize 'developer' role to 'system' for Gemini compatibility."""
-        if self.role == "developer":
-            self.role = "system"
-        return self
 
+ChatCompletionContentItem = ChatCompletionRequestContentItem | ChatCompletionAssistantContentItem
 
-class Choice(BaseModel):
-    """Choice model"""
 
-    index: int
-    message: Message
-    finish_reason: str
-    logprobs: dict[str, Any] | None = Field(default=None)
-
-
-class FunctionCall(BaseModel):
-    """Function call payload"""
-
-    name: str
-    arguments: str
-
-
-class ToolCall(BaseModel):
-    """Tool call item"""
+class ChatCompletionMessageToolCall(BaseModel):
+    """A single tool call emitted by the assistant."""
 
     id: str
     type: Literal["function"]
     function: FunctionCall
 
 
-class ToolFunctionDefinition(BaseModel):
-    """Function definition for tool."""
+class ChatCompletionMessage(BaseModel):
+    """A single message in a Chat Completions conversation."""
 
-    name: str
-    description: str | None = Field(default=None)
-    parameters: dict[str, Any] | None = Field(default=None)
+    role: Literal["developer", "system", "user", "assistant", "tool", "function"]
+    content: (
+        str | list[ChatCompletionRequestContentItem | ChatCompletionAssistantContentItem] | None
+    ) = Field(default=None)
+    name: str | None = Field(default=None)
+    tool_calls: list[ChatCompletionMessageToolCall] | None = Field(default=None)
+    tool_call_id: str | None = Field(default=None)
+    refusal: str | None = Field(default=None)
+    reasoning_content: str | None = Field(default=None)
+    audio: dict[str, Any] | None = Field(default=None)
+    annotations: list[dict[str, Any]] = Field(default_factory=list)
+
+    @model_validator(mode="after")
+    def normalize_role(self) -> ChatCompletionMessage:
+        """Normalize ``developer`` role to ``system`` for Gemini compatibility."""
+        if self.role == "developer":
+            self.role = "system"
+        return self
 
 
-class Tool(BaseModel):
-    """Tool specification."""
+class ChatCompletionFunctionTool(BaseModel):
+    """A function tool for the Chat Completions API."""
 
     type: Literal["function"]
-    function: ToolFunctionDefinition
-
+    function: FunctionDefinition
 
-class ToolChoiceFunctionDetail(BaseModel):
-    """Detail of a tool choice function."""
 
+class ChatCompletionNamedToolChoiceFunction(BaseModel):
     name: str
 
 
-class ToolChoiceFunction(BaseModel):
-    """Tool choice forcing a specific function."""
+class ChatCompletionNamedToolChoice(BaseModel):
+    """Forces the model to call a specific named function."""
 
     type: Literal["function"]
-    function: ToolChoiceFunctionDetail
+    function: ChatCompletionNamedToolChoiceFunction
 
 
-class Usage(BaseModel):
-    """Usage statistics model"""
+class CompletionUsage(BaseModel):
+    """Token-usage statistics for a Chat Completions response."""
 
     prompt_tokens: int
     completion_tokens: int
@@ -100,130 +107,155 @@ class Usage(BaseModel):
     completion_tokens_details: dict[str, int] | None = Field(default=None)
 
 
-class ModelData(BaseModel):
-    """Model data model"""
+class ChatCompletionChoice(BaseModel):
+    """A single completion choice."""
 
-    id: str
-    object: str = "model"
-    created: int
-    owned_by: str = "google"
+    index: int
+    message: ChatCompletionMessage
+    finish_reason: Literal["stop", "length", "tool_calls", "content_filter"]
+    logprobs: dict[str, Any] | None = Field(default=None)
 
 
 class ChatCompletionRequest(BaseModel):
-    """Chat completion request model"""
+    """Request body for POST /v1/chat/completions."""
 
     model: str
-    messages: list[Message]
+    messages: list[ChatCompletionMessage]
     stream: bool | None = Field(default=False)
-    user: str | None = Field(default=None)
-    temperature: float | None = Field(default=0.7)
-    top_p: float | None = Field(default=1.0)
-    max_tokens: int | None = Field(default=None)
-    tools: list[Tool] | None = Field(default=None)
-    tool_choice: (
-        Literal["none"] | Literal["auto"] | Literal["required"] | ToolChoiceFunction | None
-    ) = Field(default=None)
+    stream_options: dict[str, Any] | None = Field(default=None)
+    prompt_cache_key: str | None = Field(default=None)
+    temperature: float | None = Field(default=1, ge=0, le=2)
+    top_p: float | None = Field(default=1, ge=0, le=1)
+    max_completion_tokens: int | None = Field(default=None)
+    tools: list[ChatCompletionFunctionTool] | None = Field(default=None)
+    tool_choice: Literal["none", "auto", "required"] | ChatCompletionNamedToolChoice | None = Field(
+        default=None
+    )
     response_format: dict[str, Any] | None = Field(default=None)
+    parallel_tool_calls: bool | None = Field(default=True)
 
 
 class ChatCompletionResponse(BaseModel):
-    """Chat completion response model"""
+    """Response body for POST /v1/chat/completions."""
 
     id: str
     object: str = "chat.completion"
     created: int
     model: str
-    choices: list[Choice]
-    usage: Usage
+    choices: list[ChatCompletionChoice]
+    usage: CompletionUsage
+    system_fingerprint: str | None = Field(default=None)
 
 
-class ModelListResponse(BaseModel):
-    """Model list model"""
+class ResponseInputText(BaseModel):
+    """Text content item in a Responses API input message."""
 
-    object: str = "list"
-    data: list[ModelData]
+    type: Literal["input_text"]
+    text: str | None = Field(default=None)
 
 
-class HealthCheckResponse(BaseModel):
-    """Health check response model"""
+class ResponseInputImage(BaseModel):
+    """Image content item in a Responses API input message."""
 
-    ok: bool
-    storage: dict[str, Any] | None = Field(default=None)
-    clients: dict[str, bool] | None = Field(default=None)
-    error: str | None = Field(default=None)
+    type: Literal["input_image"]
+    detail: Literal["auto", "low", "high"] | None = Field(default=None)
+    file_id: str | None = Field(default=None)
+    image_url: str | None = Field(default=None)
 
 
-class ConversationInStore(BaseModel):
-    """Conversation model for storing in the database."""
+class ResponseInputFile(BaseModel):
+    """File content item in a Responses API input message."""
 
-    created_at: datetime | None = Field(default=None)
-    updated_at: datetime | None = Field(default=None)
+    type: Literal["input_file"]
+    file_id: str | None = Field(default=None)
+    file_url: str | None = Field(default=None)
+    file_data: str | None = Field(default=None)
+    filename: str | None = Field(default=None)
 
-    # Gemini Web API does not support changing models once a conversation is created.
-    model: str = Field(..., description="Model used for the conversation")
-    client_id: str = Field(..., description="Identifier of the Gemini client")
-    metadata: list[str | None] = Field(
-        ..., description="Metadata for Gemini API to locate the conversation"
-    )
-    messages: list[Message] = Field(..., description="Message contents in the conversation")
 
+class ResponseInputMessageContentList(BaseModel):
+    """Normalised content item stored on ``ResponseInputMessage`` server-side.
 
-class ResponseInputContent(BaseModel):
-    """Content item for Responses API input."""
+    Superset of all input content types (text, image, file, reasoning) so they
+    can be represented in a single model after round-tripping through the server.
+    """
 
     type: Literal["input_text", "output_text", "reasoning_text", "input_image", "input_file"]
     text: str | None = Field(default=None)
     image_url: str | None = Field(default=None)
     detail: Literal["auto", "low", "high"] | None = Field(default=None)
+    file_id: str | None = Field(default=None)
     file_url: str | None = Field(default=None)
     file_data: str | None = Field(default=None)
     filename: str | None = Field(default=None)
-    annotations: list[dict[str, Any]] = Field(default_factory=list)
 
 
-class ResponseInputItem(BaseModel):
-    """Single input item for Responses API."""
+class ResponseInputMessage(BaseModel):
+    """A single conversation turn in a Responses API input list."""
 
     type: Literal["message"] | None = Field(default="message")
-    role: Literal["user", "assistant", "system", "developer"]
-    content: str | list[ResponseInputContent]
+    role: Literal["user", "system", "developer", "assistant"]
+    content: str | list[ResponseInputText | ResponseInputImage | ResponseInputFile]
+    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
+
 
+class ResponseFunctionToolCall(BaseModel):
+    """An assistant function-call item replayed as part of the input history."""
 
-class ResponseToolChoice(BaseModel):
-    """Tool choice enforcing a specific tool in Responses API."""
+    type: Literal["function_call"] | None = Field(default="function_call")
+    id: str | None = Field(default=None)
+    call_id: str
+    name: str
+    arguments: str
+    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
 
-    type: Literal["function", "image_generation"]
-    function: ToolChoiceFunctionDetail | None = Field(default=None)
 
+class FunctionCallOutput(BaseModel):
+    """A tool-result item providing function output back to the model."""
 
-class ResponseImageTool(BaseModel):
-    """Image generation tool specification for Responses API."""
+    type: Literal["function_call_output"] | None = Field(default="function_call_output")
+    id: str | None = Field(default=None)
+    call_id: str
+    output: str | list[ResponseInputText | ResponseInputImage | ResponseInputFile]
+    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
+
+
+class FunctionTool(BaseModel):
+    """A function tool for the Responses API (flat schema)."""
+
+    type: Literal["function"]
+    name: str
+    description: str | None = Field(default=None)
+    parameters: dict[str, Any] | None = Field(default=None)
+    strict: bool | None = Field(default=None)
+
+
+class ImageGeneration(BaseModel):
+    """Image-generation built-in tool for the Responses API."""
 
     type: Literal["image_generation"]
+    action: Literal["generate", "edit", "auto"] = Field(default="auto")
     model: str | None = Field(default=None)
-    output_format: str | None = Field(default=None)
+    output_format: Literal["png", "webp", "jpeg"] = Field(default="png")
+    quality: Literal["low", "medium", "high", "auto"] = Field(default="auto")
+    size: str = Field(default="auto")
 
 
-class ResponseCreateRequest(BaseModel):
-    """Responses API request payload."""
+class ToolChoiceFunction(BaseModel):
+    """Forces the model to call a specific named function (Responses API)."""
 
-    model: str
-    input: str | list[ResponseInputItem]
-    instructions: str | list[ResponseInputItem] | None = Field(default=None)
-    temperature: float | None = Field(default=0.7)
-    top_p: float | None = Field(default=1.0)
-    max_output_tokens: int | None = Field(default=None)
-    stream: bool | None = Field(default=False)
-    tool_choice: str | ResponseToolChoice | None = Field(default=None)
-    tools: list[Tool | ResponseImageTool] | None = Field(default=None)
-    store: bool | None = Field(default=None)
-    user: str | None = Field(default=None)
-    response_format: dict[str, Any] | None = Field(default=None)
-    metadata: dict[str, Any] | None = Field(default=None)
+    type: Literal["function"]
+    name: str
+
+
+class ToolChoiceTypes(BaseModel):
+    """Forces the model to use a specific built-in tool type."""
+
+    type: Literal["image_generation"]
 
 
 class ResponseUsage(BaseModel):
-    """Usage statistics for Responses API."""
+    """Token-usage statistics for a Responses API response."""
 
     input_tokens: int
     output_tokens: int
@@ -232,51 +264,73 @@ class ResponseUsage(BaseModel):
     output_tokens_details: dict[str, Any] = Field(default_factory=lambda: {"reasoning_tokens": 0})
 
 
-class ResponseOutputContent(BaseModel):
-    """Content item for Responses API output."""
+class ResponseOutputText(BaseModel):
+    """Text content part inside a Responses API output message."""
 
     type: Literal["output_text"]
-    text: str | None = Field(default="")
+    text: str | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
     logprobs: list[dict[str, Any]] | None = Field(default=None)
 
 
+class ResponseOutputRefusal(BaseModel):
+    """Refusal content part inside a Responses API output message."""
+
+    type: Literal["refusal"]
+    refusal: str | None = Field(default=None)
+
+
+ResponseOutputContent = ResponseOutputText | ResponseOutputRefusal
+
+
 class ResponseOutputMessage(BaseModel):
-    """Assistant message returned by Responses API."""
+    """Assistant message output item in a Responses API response."""
 
     id: str
     type: Literal["message"]
     status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
     role: Literal["assistant"]
-    content: list[ResponseOutputContent]
+    content: list[ResponseOutputText | ResponseOutputRefusal]
 
 
-class ResponseSummaryPart(BaseModel):
-    """Summary part for reasoning."""
+class SummaryTextContent(BaseModel):
+    """Summary text part inside a reasoning item."""
 
     type: Literal["summary_text"] = Field(default="summary_text")
     text: str
 
 
-class ResponseReasoningContentPart(BaseModel):
-    """Content part for reasoning."""
+class ReasoningTextContent(BaseModel):
+    """Full reasoning text part inside a reasoning item."""
 
     type: Literal["reasoning_text"] = Field(default="reasoning_text")
     text: str
 
 
-class ResponseReasoning(BaseModel):
-    """Reasoning item returned by Responses API."""
+class ResponseReasoningItem(BaseModel):
+    """A reasoning output item emitted by a thinking model."""
 
     id: str
     type: Literal["reasoning"] = Field(default="reasoning")
     status: Literal["in_progress", "completed", "incomplete"] | None = Field(default=None)
-    summary: list[ResponseSummaryPart] | None = Field(default=None)
-    content: list[ResponseReasoningContentPart] | None = Field(default=None)
+    summary: list[SummaryTextContent] | None = Field(default=None)
+    content: list[ReasoningTextContent] | None = Field(default=None)
+    encrypted_content: str | None = Field(default=None)
+
+
+class ResponseToolCall(BaseModel):
+    """A function-call output item emitted by the model."""
+
+    id: str
+    type: Literal["function_call"] = Field(default="function_call")
+    call_id: str
+    name: str
+    arguments: str
+    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
 
 
-class ResponseImageGenerationCall(BaseModel):
-    """Image generation call record emitted in Responses API."""
+class ImageGenerationCall(BaseModel):
+    """An image-generation output item emitted by the Responses API."""
 
     id: str
     type: Literal["image_generation_call"] = Field(default="image_generation_call")
@@ -287,31 +341,67 @@ class ResponseImageGenerationCall(BaseModel):
     revised_prompt: str | None = Field(default=None)
 
 
-class ResponseToolCall(BaseModel):
-    """Tool call record emitted in Responses API."""
+class ResponseFormatText(BaseModel):
+    """Plain-text output format."""
+
+    type: Literal["text"] = Field(default="text")
 
-    id: str
-    type: Literal["tool_call"] = Field(default="tool_call")
-    status: Literal["in_progress", "completed", "failed", "requires_action"] = Field(
-        default="completed"
-    )
-    function: FunctionCall
 
+class ResponseFormatTextJSONSchemaConfig(BaseModel):
+    """JSON-schema-constrained output format."""
 
-class ResponseTextFormat(BaseModel):
-    """Text format configuration for Responses API."""
+    model_config = {"protected_namespaces": (), "arbitrary_types_allowed": True}
 
-    type: Literal["text", "json_schema"] = Field(default="text")
+    type: Literal["json_schema"] = Field(default="json_schema")
+    name: str | None = Field(default=None)
+    schema_: dict[str, Any] | None = Field(
+        default=None, alias="schema", serialization_alias="schema"
+    )
+    description: str | None = Field(default=None)
 
 
 class ResponseTextConfig(BaseModel):
-    """Text configuration for Responses API."""
+    """Top-level text configuration block in a Responses API response."""
+
+    format: ResponseFormatText | ResponseFormatTextJSONSchemaConfig = Field(
+        default_factory=ResponseFormatText
+    )
 
-    format: ResponseTextFormat = Field(default_factory=ResponseTextFormat)
+
+class ResponseCreateRequest(BaseModel):
+    """Request body for POST /v1/responses."""
+
+    model: str
+    input: (
+        str
+        | list[
+            ResponseInputMessage
+            | ResponseOutputMessage
+            | ResponseReasoningItem
+            | ResponseFunctionToolCall
+            | FunctionCallOutput
+            | ImageGenerationCall
+        ]
+    )
+    instructions: str | None = Field(default=None)
+    temperature: float | None = Field(default=1, ge=0, le=2)
+    top_p: float | None = Field(default=1, ge=0, le=1)
+    max_output_tokens: int | None = Field(default=None)
+    stream: bool | None = Field(default=False)
+    stream_options: dict[str, Any] | None = Field(default=None)
+    tool_choice: (
+        Literal["none", "auto", "required"] | ToolChoiceFunction | ToolChoiceTypes | None
+    ) = Field(default=None)
+    tools: list[FunctionTool | ImageGeneration] | None = Field(default=None)
+    store: bool | None = Field(default=None)
+    prompt_cache_key: str | None = Field(default=None)
+    response_format: dict[str, Any] | None = Field(default=None)
+    metadata: dict[str, Any] | None = Field(default=None)
+    parallel_tool_calls: bool | None = Field(default=True)
 
 
 class ResponseCreateResponse(BaseModel):
-    """Responses API response payload."""
+    """Response body for POST /v1/responses."""
 
     id: str
     object: Literal["response"] = Field(default="response")
@@ -319,26 +409,64 @@ class ResponseCreateResponse(BaseModel):
     completed_at: int | None = Field(default=None)
     model: str
     output: list[
-        ResponseReasoning | ResponseOutputMessage | ResponseImageGenerationCall | ResponseToolCall
+        ResponseReasoningItem
+        | ResponseOutputMessage
+        | ResponseFunctionToolCall
+        | ImageGenerationCall
     ]
-    status: Literal[
-        "in_progress",
-        "completed",
-        "failed",
-        "incomplete",
-        "cancelled",
-        "requires_action",
-    ] = Field(default="completed")
-    tool_choice: str | ToolChoiceFunction | ResponseToolChoice = Field(default="auto")
-    tools: list[Tool | ResponseImageTool] = Field(default_factory=list)
+    status: Literal["completed", "failed", "in_progress", "cancelled", "queued", "incomplete"] = (
+        Field(default="completed")
+    )
+    tool_choice: (
+        Literal["none", "auto", "required"] | ToolChoiceFunction | ToolChoiceTypes | None
+    ) = Field(default=None)
+    tools: list[FunctionTool | ImageGeneration] = Field(default_factory=list)
     usage: ResponseUsage | None = Field(default=None)
     error: dict[str, Any] | None = Field(default=None)
     metadata: dict[str, Any] = Field(default_factory=dict)
-    input: str | list[ResponseInputItem] | None = Field(default=None)
     text: ResponseTextConfig | None = Field(default_factory=ResponseTextConfig)
 
 
-# Rebuild models with forward references
-Message.model_rebuild()
-ToolCall.model_rebuild()
+class ModelData(BaseModel):
+    """Single model entry in the model list."""
+
+    id: str
+    object: str = "model"
+    created: int
+    owned_by: str = "google"
+
+
+class ModelListResponse(BaseModel):
+    """Response body for GET /v1/models."""
+
+    object: str = "list"
+    data: list[ModelData]
+
+
+class HealthCheckResponse(BaseModel):
+    """Response body for the health check endpoint."""
+
+    ok: bool
+    storage: dict[str, Any] | None = Field(default=None)
+    clients: dict[str, bool] | None = Field(default=None)
+    error: str | None = Field(default=None)
+
+
+class ConversationInStore(BaseModel):
+    """Persisted conversation record stored in LMDB."""
+
+    created_at: datetime | None = Field(default=None)
+    updated_at: datetime | None = Field(default=None)
+    model: str = Field(..., description="Model used for the conversation")
+    client_id: str = Field(..., description="Identifier of the Gemini client")
+    metadata: list[str | None] = Field(
+        ..., description="Metadata for Gemini API to locate the conversation"
+    )
+    messages: list[ChatCompletionMessage] = Field(
+        ..., description="Message contents in the conversation"
+    )
+
+
+ChatCompletionMessage.model_rebuild()
+ChatCompletionMessageToolCall.model_rebuild()
 ChatCompletionRequest.model_rebuild()
diff --git a/app/server/chat.py b/app/server/chat.py
index 8d8be91..0386bd3 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -7,7 +7,7 @@
 from dataclasses import dataclass
 from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
@@ -19,32 +19,38 @@
 from loguru import logger
 
 from app.models import (
+    ChatCompletionChoice,
+    ChatCompletionContentItem,
+    ChatCompletionFunctionTool,
+    ChatCompletionMessage,
+    ChatCompletionMessageToolCall,
+    ChatCompletionNamedToolChoice,
     ChatCompletionRequest,
+    ChatCompletionRequestContentItem,
     ChatCompletionResponse,
-    Choice,
-    ContentItem,
+    CompletionUsage,
     ConversationInStore,
-    Message,
+    FunctionCall,
+    FunctionCallOutput,
+    FunctionTool,
+    ImageGeneration,
+    ImageGenerationCall,
     ModelData,
     ModelListResponse,
     ResponseCreateRequest,
     ResponseCreateResponse,
-    ResponseImageGenerationCall,
-    ResponseImageTool,
-    ResponseInputContent,
-    ResponseInputItem,
+    ResponseFormatTextJSONSchemaConfig,
+    ResponseFunctionToolCall,
+    ResponseInputMessage,
     ResponseOutputContent,
     ResponseOutputMessage,
-    ResponseReasoning,
-    ResponseSummaryPart,
+    ResponseOutputText,
+    ResponseReasoningItem,
     ResponseTextConfig,
-    ResponseToolCall,
-    ResponseToolChoice,
     ResponseUsage,
-    Tool,
-    ToolCall,
+    SummaryTextContent,
     ToolChoiceFunction,
-    Usage,
+    ToolChoiceTypes,
 )
 from app.server.middleware import (
     get_image_store_dir,
@@ -124,7 +130,7 @@ async def _image_to_base64(
 
 
 def _calculate_usage(
-    messages: list[Message],
+    messages: list[ChatCompletionMessage],
     assistant_text: str | None,
     tool_calls: list[Any] | None,
     thoughts: str | None = None,
@@ -162,11 +168,10 @@ def _create_responses_standard_payload(
     created_time: int,
     model_name: str,
     detected_tool_calls: list[Any] | None,
-    image_call_items: list[ResponseImageGenerationCall],
+    image_call_items: list[ImageGenerationCall],
     response_contents: list[ResponseOutputContent],
     usage: ResponseUsage,
     request: ResponseCreateRequest,
-    normalized_input: Any,
     full_thoughts: str | None = None,
 ) -> ResponseCreateResponse:
     """Unified factory for building ResponseCreateResponse objects."""
@@ -177,11 +182,11 @@ def _create_responses_standard_payload(
     output_items: list[Any] = []
     if full_thoughts:
         output_items.append(
-            ResponseReasoning(
+            ResponseReasoningItem(
                 id=reason_id,
                 type="reasoning",
                 status="completed",
-                summary=[ResponseSummaryPart(type="summary_text", text=full_thoughts)],
+                summary=[SummaryTextContent(type="summary_text", text=full_thoughts)],
             )
         )
 
@@ -198,11 +203,20 @@ def _create_responses_standard_payload(
     if detected_tool_calls:
         output_items.extend(
             [
-                ResponseToolCall(
+                ResponseFunctionToolCall(
                     id=call.id if hasattr(call, "id") else call["id"],
-                    type="tool_call",
+                    call_id=call.id if hasattr(call, "id") else call["id"],
+                    name=(
+                        call.function.name
+                        if hasattr(call, "function")
+                        else call["function"]["name"]
+                    ),
+                    arguments=(
+                        call.function.arguments
+                        if hasattr(call, "function")
+                        else call["function"]["arguments"]
+                    ),
                     status="completed",
-                    function=call.function if hasattr(call, "function") else call["function"],
                 )
                 for call in detected_tool_calls
             ]
@@ -212,7 +226,7 @@ def _create_responses_standard_payload(
 
     text_config = ResponseTextConfig()
     if request.response_format and request.response_format.get("type") == "json_schema":
-        text_config.format.type = "json_schema"
+        text_config.format = ResponseFormatTextJSONSchemaConfig()
 
     return ResponseCreateResponse(
         id=response_id,
@@ -223,10 +237,9 @@ def _create_responses_standard_payload(
         output=output_items,
         status="completed",
         usage=usage,
-        input=normalized_input or None,
         metadata=request.metadata or {},
         tools=request.tools or [],
-        tool_choice=request.tool_choice or "auto",
+        tool_choice=request.tool_choice if request.tool_choice is not None else "auto",
         text=text_config,
     )
 
@@ -237,7 +250,7 @@ def _create_chat_completion_standard_payload(
     model_name: str,
     visible_output: str | None,
     tool_calls_payload: list[dict] | None,
-    finish_reason: str,
+    finish_reason: Literal["stop", "length", "tool_calls", "content_filter"],
     usage: dict,
     reasoning_content: str | None = None,
 ) -> ChatCompletionResponse:
@@ -245,9 +258,9 @@ def _create_chat_completion_standard_payload(
     # Convert tool calls to Model objects if they are dicts
     tool_calls = None
     if tool_calls_payload:
-        tool_calls = [ToolCall.model_validate(tc) for tc in tool_calls_payload]
+        tool_calls = [ChatCompletionMessageToolCall.model_validate(tc) for tc in tool_calls_payload]
 
-    message = Message(
+    message = ChatCompletionMessage(
         role="assistant",
         content=visible_output or None,
         tool_calls=tool_calls,
@@ -260,13 +273,13 @@ def _create_chat_completion_standard_payload(
         created=created_time,
         model=model_name,
         choices=[
-            Choice(
+            ChatCompletionChoice(
                 index=0,
                 message=message,
                 finish_reason=finish_reason,
             )
         ],
-        usage=Usage(**usage),
+        usage=CompletionUsage(**usage),
     )
 
 
@@ -313,14 +326,14 @@ def _persist_conversation(
     model_name: str,
     client_id: str,
     metadata: list[str | None],
-    messages: list[Message],
+    messages: list[ChatCompletionMessage],
     storage_output: str | None,
     tool_calls: list[Any] | None,
     thoughts: str | None = None,
 ) -> str | None:
     """Unified logic to save conversation history to LMDB."""
     try:
-        current_assistant_message = Message(
+        current_assistant_message = ChatCompletionMessage(
             role="assistant",
             content=storage_output or None,
             tool_calls=tool_calls or None,
@@ -397,8 +410,14 @@ def _build_structured_requirement(
 
 
 def _build_tool_prompt(
-    tools: list[Tool],
-    tool_choice: str | ToolChoiceFunction | None,
+    tools: list[ChatCompletionFunctionTool],
+    tool_choice: (
+        Literal["none", "auto", "required"]
+        | ChatCompletionNamedToolChoice
+        | ToolChoiceFunction
+        | ToolChoiceTypes
+        | None
+    ),
 ) -> str:
     """Generate a system prompt describing available tools and the PascalCase protocol."""
     if not tools:
@@ -429,7 +448,7 @@ def _build_tool_prompt(
         lines.append(
             "You must call at least one tool before responding to the user. Do not provide a final user-facing answer until a tool call has been issued."
         )
-    elif isinstance(tool_choice, ToolChoiceFunction):
+    elif isinstance(tool_choice, ChatCompletionNamedToolChoice):
         target = tool_choice.function.name
         lines.append(
             f"You are required to call the tool named `{target}`. Do not call any other tool."
@@ -441,8 +460,8 @@ def _build_tool_prompt(
 
 
 def _build_image_generation_instruction(
-    tools: list[ResponseImageTool] | None,
-    tool_choice: ResponseToolChoice | None,
+    tools: list[ImageGeneration] | None,
+    tool_choice: ToolChoiceFunction | None,
 ) -> str | None:
     """Construct explicit guidance so Gemini emits images when requested."""
     has_forced_choice = tool_choice is not None and tool_choice.type == "image_generation"
@@ -467,7 +486,7 @@ def _build_image_generation_instruction(
     return "\n\n".join(instructions)
 
 
-def _append_tool_hint_to_last_user_message(messages: list[Message]) -> None:
+def _append_tool_hint_to_last_user_message(messages: list[ChatCompletionMessage]) -> None:
     """Ensure the last user message carries the tool wrap hint."""
     for msg in reversed(messages):
         if msg.role != "user" or msg.content is None:
@@ -482,24 +501,28 @@ def _append_tool_hint_to_last_user_message(messages: list[Message]) -> None:
             for part in reversed(msg.content):
                 if getattr(part, "type", None) != "text":
                     continue
-                text_value = part.text or ""
+                text_value = getattr(part, "text", "") or ""
                 if TOOL_HINT_STRIPPED in text_value:
                     return
                 part.text = f"{text_value}\n{TOOL_WRAP_HINT}"
                 return
 
             messages_text = TOOL_WRAP_HINT.strip()
-            msg.content.append(ContentItem(type="text", text=messages_text))
+            msg.content.append(ChatCompletionRequestContentItem(type="text", text=messages_text))
             return
 
 
 def _prepare_messages_for_model(
-    source_messages: list[Message],
-    tools: list[Tool] | None,
-    tool_choice: str | ToolChoiceFunction | None,
+    source_messages: list[ChatCompletionMessage],
+    tools: list[ChatCompletionFunctionTool] | None,
+    tool_choice: Literal["none", "auto", "required"]
+    | ChatCompletionNamedToolChoice
+    | ToolChoiceFunction
+    | ToolChoiceTypes
+    | None,
     extra_instructions: list[str] | None = None,
     inject_system_defaults: bool = True,
-) -> list[Message]:
+) -> list[ChatCompletionMessage]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
 
@@ -541,7 +564,7 @@ def _prepare_messages_for_model(
             separator = "\n\n" if existing else ""
             prepared[0].content = f"{existing}{separator}{combined_instructions}"
     else:
-        prepared.insert(0, Message(role="system", content=combined_instructions))
+        prepared.insert(0, ChatCompletionMessage(role="system", content=combined_instructions))
 
     if tools and tool_choice != "none" and not tool_prompt_injected:
         _append_tool_hint_to_last_user_message(prepared)
@@ -550,92 +573,134 @@ def _prepare_messages_for_model(
 
 
 def _response_items_to_messages(
-    items: str | list[ResponseInputItem],
-) -> tuple[list[Message], str | list[ResponseInputItem]]:
-    """Convert Responses API input items into internal Message objects and normalized input."""
-    messages: list[Message] = []
+    items: Any,
+) -> list[ChatCompletionMessage]:
+    """Convert Responses API input items into internal Message objects."""
+    messages: list[ChatCompletionMessage] = []
 
     if isinstance(items, str):
-        messages.append(Message(role="user", content=items))
+        messages.append(ChatCompletionMessage(role="user", content=items))
         logger.debug("Normalized Responses input: single string message.")
-        return messages, items
+        return messages
 
-    normalized_input: list[ResponseInputItem] = []
     for item in items:
-        role = item.role
-        content = item.content
-        normalized_contents: list[ResponseInputContent] = []
-        if isinstance(content, str):
-            normalized_contents.append(ResponseInputContent(type="input_text", text=content))
-            messages.append(Message(role=role, content=content))
-        else:
-            converted: list[ContentItem] = []
-            reasoning_parts: list[str] = []
-            for part in content:
-                if part.type in ("input_text", "output_text"):
-                    text_value = part.text or ""
-                    normalized_contents.append(
-                        ResponseInputContent(type=part.type, text=text_value)
-                    )
-                    if text_value:
-                        converted.append(ContentItem(type="text", text=text_value))
-                elif part.type == "reasoning_text":
-                    text_value = part.text or ""
-                    normalized_contents.append(
-                        ResponseInputContent(type="reasoning_text", text=text_value)
-                    )
-                    if text_value:
-                        reasoning_parts.append(text_value)
-                elif part.type == "input_image":
-                    image_url = part.image_url
-                    if image_url:
-                        normalized_contents.append(
-                            ResponseInputContent(
-                                type="input_image",
-                                image_url=image_url,
-                                detail=part.detail if part.detail else "auto",
+        if isinstance(item, (ResponseInputMessage, ResponseOutputMessage)):
+            role = item.role
+            content = item.content
+            if isinstance(content, str):
+                messages.append(ChatCompletionMessage(role=role, content=content))
+            else:
+                converted: list[ChatCompletionContentItem] = []
+                reasoning_parts: list[str] = []
+                for part in content:
+                    if part.type in ("input_text", "output_text"):
+                        text_value = getattr(part, "text", "") or ""
+                        if text_value:
+                            converted.append(
+                                ChatCompletionRequestContentItem(type="text", text=text_value)
                             )
-                        )
-                        converted.append(
-                            ContentItem(
-                                type="image_url",
-                                image_url={
-                                    "url": image_url,
-                                    "detail": part.detail if part.detail else "auto",
-                                },
+                    elif part.type == "reasoning_text":
+                        text_value = getattr(part, "text", "") or ""
+                        if text_value:
+                            reasoning_parts.append(text_value)
+                    elif part.type == "input_image":
+                        image_url = getattr(part, "image_url", None)
+                        if image_url:
+                            converted.append(
+                                ChatCompletionRequestContentItem(
+                                    type="image_url",
+                                    image_url={
+                                        "url": image_url,
+                                        "detail": getattr(part, "detail", "auto") or "auto",
+                                    },
+                                )
                             )
+                    elif part.type == "input_file":
+                        file_url = getattr(part, "file_url", None)
+                        file_data = getattr(part, "file_data", None)
+                        if file_url or file_data:
+                            file_info = {}
+                            if file_data:
+                                file_info["file_data"] = file_data
+                                file_info["filename"] = getattr(part, "filename", None)
+                            if file_url:
+                                file_info["url"] = file_url
+                            converted.append(
+                                ChatCompletionRequestContentItem(type="file", file=file_info)
+                            )
+                reasoning_val = "\n\n".join(reasoning_parts) if reasoning_parts else None
+                messages.append(
+                    ChatCompletionMessage(
+                        role=role,
+                        content=converted or None,
+                        reasoning_content=reasoning_val,
+                    )
+                )
+
+        elif isinstance(item, ResponseFunctionToolCall):
+            messages.append(
+                ChatCompletionMessage(
+                    role="assistant",
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            id=item.call_id,
+                            type="function",
+                            function=FunctionCall(name=item.name, arguments=item.arguments),
                         )
-                elif part.type == "input_file":
-                    if part.file_url or part.file_data:
-                        normalized_contents.append(part)
-                        file_info = {}
-                        if part.file_data:
-                            file_info["file_data"] = part.file_data
-                            file_info["filename"] = part.filename
-                        if part.file_url:
-                            file_info["url"] = part.file_url
-                        converted.append(ContentItem(type="file", file=file_info))
-            messages.append(Message(role=role, content=converted or None))
-
-        normalized_input.append(
-            ResponseInputItem(type="message", role=item.role, content=normalized_contents or [])
-        )
+                    ],
+                )
+            )
+        elif isinstance(item, FunctionCallOutput):
+            output_content = str(item.output) if isinstance(item.output, list) else item.output
+            messages.append(
+                ChatCompletionMessage(
+                    role="tool",
+                    tool_call_id=item.call_id,
+                    content=output_content,
+                )
+            )
+        elif isinstance(item, ResponseReasoningItem):
+            reasoning_val = None
+            if item.content:
+                reasoning_val = "\n\n".join(x.text for x in item.content if x.text)
+            messages.append(
+                ChatCompletionMessage(
+                    role="assistant",
+                    reasoning_content=reasoning_val,
+                )
+            )
+        elif isinstance(item, ImageGenerationCall):
+            messages.append(
+                ChatCompletionMessage(
+                    role="assistant",
+                    content=item.result or None,
+                )
+            )
 
-    logger.debug(f"Normalized Responses input: {len(normalized_input)} message items.")
-    return messages, normalized_input
+        else:
+            if hasattr(item, "role"):
+                messages.append(
+                    ChatCompletionMessage(
+                        role=item.role,
+                        content=str(getattr(item, "content", "")),
+                    )
+                )
+
+    logger.debug(f"Normalized Responses input: {len(messages)} message items.")
+    return messages
 
 
 def _instructions_to_messages(
-    instructions: str | list[ResponseInputItem] | None,
-) -> list[Message]:
+    instructions: str | list[ResponseInputMessage] | None,
+) -> list[ChatCompletionMessage]:
     """Normalize instructions payload into Message objects."""
     if not instructions:
         return []
 
     if isinstance(instructions, str):
-        return [Message(role="system", content=instructions)]
+        return [ChatCompletionMessage(role="system", content=instructions)]
 
-    instruction_messages: list[Message] = []
+    instruction_messages: list[ChatCompletionMessage] = []
     for item in instructions:
         if item.type and item.type != "message":
             continue
@@ -643,42 +708,48 @@ def _instructions_to_messages(
         role = item.role
         content = item.content
         if isinstance(content, str):
-            instruction_messages.append(Message(role=role, content=content))
+            instruction_messages.append(ChatCompletionMessage(role=role, content=content))
         else:
-            converted: list[ContentItem] = []
+            converted: list[ChatCompletionContentItem] = []
             reasoning_parts: list[str] = []
             for part in content:
                 if part.type in ("input_text", "output_text"):
-                    text_value = part.text or ""
+                    text_value = getattr(part, "text", "") or ""
                     if text_value:
-                        converted.append(ContentItem(type="text", text=text_value))
+                        converted.append(
+                            ChatCompletionRequestContentItem(type="text", text=text_value)
+                        )
                 elif part.type == "reasoning_text":
-                    text_value = part.text or ""
+                    text_value = getattr(part, "text", "") or ""
                     if text_value:
                         reasoning_parts.append(text_value)
                 elif part.type == "input_image":
-                    image_url = part.image_url
+                    image_url = getattr(part, "image_url", None)
                     if image_url:
                         converted.append(
-                            ContentItem(
+                            ChatCompletionRequestContentItem(
                                 type="image_url",
                                 image_url={
                                     "url": image_url,
-                                    "detail": part.detail if part.detail else "auto",
+                                    "detail": getattr(part, "detail", "auto") or "auto",
                                 },
                             )
                         )
                 elif part.type == "input_file":
-                    file_info = {}
-                    if part.file_data:
-                        file_info["file_data"] = part.file_data
-                        file_info["filename"] = part.filename
-                    if part.file_url:
-                        file_info["url"] = part.file_url
-                    if file_info:
-                        converted.append(ContentItem(type="file", file=file_info))
+                    file_data = getattr(part, "file_data", None)
+                    file_url = getattr(part, "file_url", None)
+                    if file_data or file_url:
+                        file_info = {}
+                        if file_data:
+                            file_info["file_data"] = file_data
+                            file_info["filename"] = getattr(part, "filename", None)
+                        if file_url:
+                            file_info["url"] = file_url
+                        converted.append(
+                            ChatCompletionRequestContentItem(type="file", file=file_info)
+                        )
             instruction_messages.append(
-                Message(
+                ChatCompletionMessage(
                     role=role,
                     content=converted or None,
                     reasoning_content="\n".join(reasoning_parts) if reasoning_parts else None,
@@ -712,7 +783,7 @@ def _get_available_models() -> list[ModelData]:
     for m in custom_models:
         models_data.append(
             ModelData(
-                id=m.model_name,
+                id=m.model_name or "",
                 created=now,
                 owned_by="custom",
             )
@@ -742,8 +813,8 @@ async def _find_reusable_session(
     db: LMDBConversationStore,
     pool: GeminiClientPool,
     model: Model,
-    messages: list[Message],
-) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[Message]]:
+    messages: list[ChatCompletionMessage],
+) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[ChatCompletionMessage]]:
     """Find an existing chat session matching the longest suitable history prefix."""
     if len(messages) < 2:
         return None, None, messages
@@ -786,7 +857,7 @@ async def _find_reusable_session(
 async def _send_with_split(
     session: ChatSession,
     text: str,
-    files: list[Path | str | io.BytesIO] | None = None,
+    files: list[str | Path | bytes | io.BytesIO] | None = None,
     stream: bool = False,
 ) -> AsyncGenerator[ModelOutput] | ModelOutput:
     """Send text to Gemini, splitting or converting to attachment if too long."""
@@ -805,7 +876,7 @@ async def _send_with_split(
     file_obj = io.BytesIO(text.encode("utf-8"))
     file_obj.name = "message.txt"
     try:
-        final_files = list(files) if files else []
+        final_files: list[str | Path | bytes | io.BytesIO] = list(files) if files else []
         final_files.append(file_obj)
         instruction = (
             "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
@@ -874,7 +945,7 @@ def process(self, chunk: str) -> str:
             if self._is_outputting():
                 output.append(pre_text)
 
-            if matched_group.endswith("_START"):
+            if matched_group and matched_group.endswith("_START"):
                 m_type = matched_group.split("_")[0]
                 if m_type == "TAG":
                     self.stack.append("IN_TAG_HEADER")
@@ -916,7 +987,7 @@ def _create_real_streaming_response(
     completion_id: str,
     created_time: int,
     model_name: str,
-    messages: list[Message],
+    messages: list[ChatCompletionMessage],
     db: LMDBConversationStore,
     model: Model,
     client_wrapper: GeminiClientWrapper,
@@ -1070,7 +1141,7 @@ async def generate_stream():
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
             messages, assistant_text, tool_calls, full_thoughts
         )
-        usage = Usage(
+        usage = CompletionUsage(
             prompt_tokens=p_tok,
             completion_tokens=c_tok,
             total_tokens=t_tok,
@@ -1107,7 +1178,7 @@ def _create_responses_real_streaming_response(
     response_id: str,
     created_time: int,
     model_name: str,
-    messages: list[Message],
+    messages: list[ChatCompletionMessage],
     db: LMDBConversationStore,
     model: Model,
     client_wrapper: GeminiClientWrapper,
@@ -1197,7 +1268,7 @@ def make_event(etype: str, data: dict) -> str:
                                 **base_event,
                                 "type": "response.output_item.added",
                                 "output_index": current_index,
-                                "item": ResponseReasoning(
+                                "item": ResponseReasoningItem(
                                     id=thought_item_id,
                                     type="reasoning",
                                     status="in_progress",
@@ -1214,7 +1285,7 @@ def make_event(etype: str, data: dict) -> str:
                                 "item_id": thought_item_id,
                                 "output_index": current_index,
                                 "summary_index": 0,
-                                "part": ResponseSummaryPart(text="").model_dump(mode="json"),
+                                "part": SummaryTextContent(text="").model_dump(mode="json"),
                             },
                         )
                         thought_open = True
@@ -1253,7 +1324,7 @@ def make_event(etype: str, data: dict) -> str:
                                 "item_id": thought_item_id,
                                 "output_index": current_index,
                                 "summary_index": 0,
-                                "part": ResponseSummaryPart(text=full_thoughts).model_dump(
+                                "part": SummaryTextContent(text=full_thoughts).model_dump(
                                     mode="json"
                                 ),
                             },
@@ -1264,11 +1335,11 @@ def make_event(etype: str, data: dict) -> str:
                                 **base_event,
                                 "type": "response.output_item.done",
                                 "output_index": current_index,
-                                "item": ResponseReasoning(
+                                "item": ResponseReasoningItem(
                                     id=thought_item_id,
                                     type="reasoning",
                                     status="completed",
-                                    summary=[ResponseSummaryPart(text=full_thoughts)],
+                                    summary=[SummaryTextContent(text=full_thoughts)],
                                 ).model_dump(mode="json"),
                             },
                         )
@@ -1300,9 +1371,9 @@ def make_event(etype: str, data: dict) -> str:
                                 "item_id": message_item_id,
                                 "output_index": current_index,
                                 "content_index": 0,
-                                "part": ResponseOutputContent(
-                                    type="output_text", text=""
-                                ).model_dump(mode="json"),
+                                "part": ResponseOutputText(type="output_text", text="").model_dump(
+                                    mode="json"
+                                ),
                             },
                         )
                         message_open = True
@@ -1372,7 +1443,7 @@ def make_event(etype: str, data: dict) -> str:
                     "item_id": thought_item_id,
                     "output_index": current_index,
                     "summary_index": 0,
-                    "part": ResponseSummaryPart(text=full_thoughts).model_dump(mode="json"),
+                    "part": SummaryTextContent(text=full_thoughts).model_dump(mode="json"),
                 },
             )
             yield make_event(
@@ -1381,11 +1452,11 @@ def make_event(etype: str, data: dict) -> str:
                     **base_event,
                     "type": "response.output_item.done",
                     "output_index": current_index,
-                    "item": ResponseReasoning(
+                    "item": ResponseReasoningItem(
                         id=thought_item_id,
                         type="reasoning",
                         status="completed",
-                        summary=[ResponseSummaryPart(text=full_thoughts)],
+                        summary=[SummaryTextContent(text=full_thoughts)],
                     ).model_dump(mode="json"),
                 },
             )
@@ -1414,9 +1485,9 @@ def make_event(etype: str, data: dict) -> str:
                     "item_id": message_item_id,
                     "output_index": current_index,
                     "content_index": 0,
-                    "part": ResponseOutputContent(
-                        type="output_text", text=assistant_text
-                    ).model_dump(mode="json"),
+                    "part": ResponseOutputText(type="output_text", text=assistant_text).model_dump(
+                        mode="json"
+                    ),
                 },
             )
             yield make_event(
@@ -1430,13 +1501,13 @@ def make_event(etype: str, data: dict) -> str:
                         type="message",
                         status="completed",
                         role="assistant",
-                        content=[ResponseOutputContent(type="output_text", text=assistant_text)],
+                        content=[ResponseOutputText(type="output_text", text=assistant_text)],
                     ).model_dump(mode="json"),
                 },
             )
             current_index += 1
 
-        image_items: list[ResponseImageGenerationCall] = []
+        image_items: list[ImageGenerationCall] = []
         final_response_contents: list[ResponseOutputContent] = []
         seen_hashes = set()
 
@@ -1453,7 +1524,7 @@ def make_event(etype: str, data: dict) -> str:
                         img_id = parts[0]
                         fmt = parts[1] if len(parts) > 1 else "png"
 
-                        img_item = ResponseImageGenerationCall(
+                        img_item = ImageGenerationCall(
                             id=img_id,
                             result=b64,
                             output_format=fmt,
@@ -1464,7 +1535,7 @@ def make_event(etype: str, data: dict) -> str:
                             f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
                         )
                         final_response_contents.append(
-                            ResponseOutputContent(type="output_text", text=image_url)
+                            ResponseOutputText(type="output_text", text=image_url)
                         )
 
                         yield make_event(
@@ -1493,7 +1564,13 @@ def make_event(etype: str, data: dict) -> str:
                         logger.warning("Image processing failed in stream")
 
         for call in detected_tool_calls:
-            tc_item = ResponseToolCall(id=call.id, status="completed", function=call.function)
+            tc_item = ResponseFunctionToolCall(
+                id=call.id,
+                call_id=call.id,
+                name=call.function.name,
+                arguments=call.function.arguments,
+                status="completed",
+            )
             yield make_event(
                 "response.output_item.added",
                 {
@@ -1516,7 +1593,7 @@ def make_event(etype: str, data: dict) -> str:
 
         if assistant_text:
             final_response_contents.insert(
-                0, ResponseOutputContent(type="output_text", text=assistant_text)
+                0, ResponseOutputText(type="output_text", text=assistant_text)
             )
 
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
@@ -1537,7 +1614,6 @@ def make_event(etype: str, data: dict) -> str:
             final_response_contents,
             usage,
             request,
-            None,
             full_thoughts,
         )
         _persist_conversation(
@@ -1643,13 +1719,15 @@ async def create_chat_completion(
             f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}"
         )
         resp_or_stream = await _send_with_split(
-            session, m_input, files=files, stream=request.stream
+            session, m_input, files=files, stream=bool(request.stream)
         )
     except Exception as e:
         logger.exception("Gemini API error")
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
+        # Narrow for type checker
+        assert not isinstance(resp_or_stream, ModelOutput)
         return _create_real_streaming_response(
             resp_or_stream,
             completion_id,
@@ -1664,6 +1742,9 @@ async def create_chat_completion(
             structured_requirement,
         )
 
+    # Narrow for type checker
+    assert isinstance(resp_or_stream, ModelOutput)
+
     try:
         thoughts = resp_or_stream.thoughts
         raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
@@ -1743,33 +1824,35 @@ async def create_response(
     image_store: Path = Depends(get_image_store_dir),
 ):
     base_url = str(raw_request.base_url)
-    base_messages, norm_input = _response_items_to_messages(request.input)
+    base_messages = _response_items_to_messages(request.input)
     struct_req = _build_structured_requirement(request.response_format)
     extra_instr = [struct_req.instruction] if struct_req else []
 
     standard_tools, image_tools = [], []
     if request.tools:
         for t in request.tools:
-            if isinstance(t, Tool):
+            if isinstance(t, FunctionTool):
                 standard_tools.append(t)
-            elif isinstance(t, ResponseImageTool):
+            elif isinstance(t, ImageGeneration):
                 image_tools.append(t)
             elif isinstance(t, dict):
                 if t.get("type") == "function":
-                    standard_tools.append(Tool.model_validate(t))
+                    standard_tools.append(FunctionTool.model_validate(t))
                 elif t.get("type") == "image_generation":
-                    image_tools.append(ResponseImageTool.model_validate(t))
+                    image_tools.append(ImageGeneration.model_validate(t))
 
     img_instr = _build_image_generation_instruction(
         image_tools,
-        request.tool_choice if isinstance(request.tool_choice, ResponseToolChoice) else None,
+        request.tool_choice if isinstance(request.tool_choice, ToolChoiceFunction) else None,
     )
     if img_instr:
         extra_instr.append(img_instr)
     preface = _instructions_to_messages(request.instructions)
     conv_messages = [*preface, *base_messages] if preface else base_messages
     model_tool_choice = (
-        request.tool_choice if isinstance(request.tool_choice, (str, ToolChoiceFunction)) else None
+        request.tool_choice
+        if isinstance(request.tool_choice, (str, ChatCompletionNamedToolChoice))
+        else None
     )
 
     messages = _prepare_messages_for_model(
@@ -1788,7 +1871,7 @@ async def create_response(
     if session:
         msgs = _prepare_messages_for_model(
             remain,
-            request.tools,
+            request.tools,  # type: ignore
             request.tool_choice,
             None,
             False,
@@ -1819,13 +1902,15 @@ async def create_response(
             f"Client ID: {client.id}, Input length: {len(m_input)}, files count: {len(files)}"
         )
         resp_or_stream = await _send_with_split(
-            session, m_input, files=files, stream=request.stream
+            session, m_input, files=files, stream=bool(request.stream)
         )
     except Exception as e:
         logger.exception("Gemini API error")
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
+        # Narrow for type checker
+        assert not isinstance(resp_or_stream, ModelOutput)
         return _create_responses_real_streaming_response(
             resp_or_stream,
             response_id,
@@ -1842,6 +1927,9 @@ async def create_response(
             struct_req,
         )
 
+    # Narrow for type checker
+    assert isinstance(resp_or_stream, ModelOutput)
+
     try:
         thoughts = resp_or_stream.thoughts
         raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
@@ -1856,7 +1944,9 @@ async def create_response(
     )
     images = resp_or_stream.images or []
     if (
-        request.tool_choice is not None and request.tool_choice.type == "image_generation"
+        request.tool_choice is not None
+        and hasattr(request.tool_choice, "type")
+        and request.tool_choice.type == "image_generation"
     ) and not images:
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
 
@@ -1879,13 +1969,13 @@ async def create_response(
             )
 
             contents.append(
-                ResponseOutputContent(
+                ResponseOutputText(
                     type="output_text",
                     text=f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})",
                 )
             )
             img_calls.append(
-                ResponseImageGenerationCall(
+                ImageGenerationCall(
                     id=img_id,
                     result=b64,
                     output_format=img_format,
@@ -1896,9 +1986,9 @@ async def create_response(
             logger.warning(f"Image error: {e}")
 
     if assistant_text:
-        contents.append(ResponseOutputContent(type="output_text", text=assistant_text))
+        contents.append(ResponseOutputText(type="output_text", text=assistant_text))
     if not contents:
-        contents.append(ResponseOutputContent(type="output_text", text=""))
+        contents.append(ResponseOutputText(type="output_text", text=""))
 
     # Aggregate images for storage
     image_markdown = ""
@@ -1926,7 +2016,6 @@ async def create_response(
         contents,
         usage,
         request,
-        norm_input,
         thoughts,
     )
     _persist_conversation(
diff --git a/app/server/middleware.py b/app/server/middleware.py
index 2fa016b..457ac0f 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -113,7 +113,7 @@ def add_cors_middleware(app: FastAPI):
     if g_config.cors.enabled:
         cors = g_config.cors
         app.add_middleware(
-            CORSMiddleware,
+            CORSMiddleware,  # type: ignore
             allow_origins=cors.allow_origins,
             allow_credentials=cors.allow_credentials,
             allow_methods=cors.allow_methods,
diff --git a/app/services/client.py b/app/services/client.py
index b8f976b..77dfdf6 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,3 +1,4 @@
+import io
 from pathlib import Path
 from typing import Any, cast
 
@@ -5,7 +6,7 @@
 from gemini_webapi import GeminiClient, ModelOutput
 from loguru import logger
 
-from app.models import Message
+from app.models import ChatCompletionMessage
 from app.utils import g_config
 from app.utils.helper import (
     add_tag,
@@ -28,7 +29,7 @@ def __init__(self, client_id: str, **kwargs):
         super().__init__(**kwargs)
         self.id = client_id
 
-    async def init(
+    async def init(  # type: ignore
         self,
         timeout: float = cast(float, _UNSET),
         watchdog_timeout: float = cast(float, _UNSET),
@@ -68,7 +69,10 @@ def running(self) -> bool:
 
     @staticmethod
     async def process_message(
-        message: Message, tempdir: Path | None = None, tagged: bool = True, wrap_tool: bool = True
+        message: ChatCompletionMessage,
+        tempdir: Path | None = None,
+        tagged: bool = True,
+        wrap_tool: bool = True,
     ) -> tuple[str, list[Path | str]]:
         """
         Process a Message into Gemini API format using the PascalCase technical protocol.
@@ -83,22 +87,25 @@ async def process_message(
         elif isinstance(message.content, list):
             for item in message.content:
                 if item.type == "text":
-                    if item.text or message.role == "tool":
-                        text_fragments.append(item.text or "")
+                    item_text = getattr(item, "text", "") or ""
+                    if item_text or message.role == "tool":
+                        text_fragments.append(item_text)
                 elif item.type == "image_url":
-                    if not item.image_url:
+                    item_image_url = getattr(item, "image_url", None)
+                    if not item_image_url:
                         raise ValueError("Image URL cannot be empty")
-                    if url := item.image_url.get("url", None):
+                    if url := item_image_url.get("url", None):
                         files.append(await save_url_to_tempfile(url, tempdir))
                     else:
                         raise ValueError("Image URL must contain 'url' key")
                 elif item.type == "file":
-                    if not item.file:
+                    item_file = getattr(item, "file", None)
+                    if not item_file:
                         raise ValueError("File cannot be empty")
-                    if file_data := item.file.get("file_data", None):
-                        filename = item.file.get("filename", "")
+                    if file_data := item_file.get("file_data", None):
+                        filename = item_file.get("filename", "")
                         files.append(await save_file_to_tempfile(file_data, filename, tempdir))
-                    elif url := item.file.get("url", None):
+                    elif url := item_file.get("url", None):
                         files.append(await save_url_to_tempfile(url, tempdir))
                     else:
                         raise ValueError("File must contain 'file_data' or 'url' key")
@@ -154,10 +161,10 @@ async def process_message(
 
     @staticmethod
     async def process_conversation(
-        messages: list[Message], tempdir: Path | None = None
-    ) -> tuple[str, list[Path | str]]:
+        messages: list[ChatCompletionMessage], tempdir: Path | None = None
+    ) -> tuple[str, list[str | Path | bytes | io.BytesIO]]:
         conversation: list[str] = []
-        files: list[Path | str] = []
+        files: list[str | Path | bytes | io.BytesIO] = []
 
         i = 0
         while i < len(messages):
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 07f0a23..7a915d4 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -1,5 +1,6 @@
 import hashlib
 import string
+from collections.abc import Generator
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
@@ -7,9 +8,15 @@
 
 import lmdb
 import orjson
+from lmdb import Environment, Error, Transaction
 from loguru import logger
 
-from app.models import ContentItem, ConversationInStore, Message
+from app.models import (
+    ChatCompletionAssistantContentItem,
+    ChatCompletionMessage,
+    ChatCompletionRequestContentItem,
+    ConversationInStore,
+)
 from app.utils import g_config
 from app.utils.helper import (
     extract_tool_calls,
@@ -50,7 +57,7 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     return text.strip() if text.strip() else None
 
 
-def _hash_message(message: Message, fuzzy: bool = False) -> str:
+def _hash_message(message: ChatCompletionMessage, fuzzy: bool = False) -> str:
     """
     Generate a stable, canonical hash for a single message.
     """
@@ -69,33 +76,36 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
     elif isinstance(content, str):
         core_data["content"] = _normalize_text(content, fuzzy=fuzzy)
     elif isinstance(content, list):
+        _ContentItem = (ChatCompletionRequestContentItem, ChatCompletionAssistantContentItem)
         text_parts = []
         for item in content:
             text_val = ""
-            if isinstance(item, ContentItem) and item.type == "text":
-                text_val = item.text
+            if isinstance(item, _ContentItem) and item.type == "text":
+                text_val = item.text or ""
             elif isinstance(item, dict) and item.get("type") == "text":
-                text_val = item.get("text")
+                text_val = item.get("text") or ""
 
             if text_val:
                 normalized_part = _normalize_text(text_val, fuzzy=fuzzy)
                 if normalized_part:
                     text_parts.append(normalized_part)
-            elif isinstance(item, (ContentItem, dict)):
-                item_type = item.type if isinstance(item, ContentItem) else item.get("type")
+            elif isinstance(item, _ContentItem):
+                item_type = item.type
                 if item_type == "image_url":
-                    url = (
-                        item.image_url.get("url")
-                        if isinstance(item, ContentItem) and item.image_url
-                        else item.get("image_url", {}).get("url")
-                    )
+                    item_image_url = getattr(item, "image_url", None)
+                    url = item_image_url.get("url") if item_image_url else None
                     text_parts.append(f"[image_url:{url}]")
                 elif item_type == "file":
-                    url = (
-                        item.file.get("url") or item.file.get("filename")
-                        if isinstance(item, ContentItem) and item.file
-                        else item.get("file", {}).get("url") or item.get("file", {}).get("filename")
-                    )
+                    item_file = getattr(item, "file", None)
+                    url = (item_file.get("url") or item_file.get("filename")) if item_file else None
+                    text_parts.append(f"[file:{url}]")
+            elif isinstance(item, dict):
+                item_type = item.get("type")
+                if item_type == "image_url":
+                    url = item.get("image_url", {}).get("url")
+                    text_parts.append(f"[image_url:{url}]")
+                elif item_type == "file":
+                    url = item.get("file", {}).get("url") or item.get("file", {}).get("filename")
                     text_parts.append(f"[file:{url}]")
 
         core_data["content"] = "\n".join(text_parts) if text_parts else None
@@ -126,7 +136,7 @@ def _hash_message(message: Message, fuzzy: bool = False) -> str:
 
 
 def _hash_conversation(
-    client_id: str, model: str, messages: list[Message], fuzzy: bool = False
+    client_id: str, model: str, messages: list[ChatCompletionMessage], fuzzy: bool = False
 ) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
@@ -168,7 +178,7 @@ def __init__(
         self.db_path: Path = Path(db_path)
         self.max_db_size: int = max_db_size
         self.retention_days: int = max(0, int(retention_days))
-        self._env: lmdb.Environment | None = None
+        self._env: Environment | None = None
 
         self._ensure_db_path()
         self._init_environment()
@@ -189,12 +199,12 @@ def _init_environment(self) -> None:
                 meminit=False,
             )
             logger.info(f"LMDB environment initialized at {self.db_path}")
-        except lmdb.Error as e:
+        except Error as e:
             logger.error(f"Failed to initialize LMDB environment: {e}")
             raise
 
     @contextmanager
-    def _get_transaction(self, write: bool = False):
+    def _get_transaction(self, write: bool = False) -> Generator[Transaction]:
         """
         Context manager for LMDB transactions.
 
@@ -204,12 +214,12 @@ def _get_transaction(self, write: bool = False):
         if not self._env:
             raise RuntimeError("LMDB environment not initialized")
 
-        txn: lmdb.Transaction = self._env.begin(write=write)
+        txn: Transaction = self._env.begin(write=write)
         try:
             yield txn
             if write:
                 txn.commit()
-        except lmdb.Error:
+        except Error:
             if write:
                 txn.abort()
             raise
@@ -236,24 +246,22 @@ def _decode_index_value(data: bytes) -> list[str]:
         except UnicodeDecodeError:
             return []
 
-    @staticmethod
-    def _update_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
+    def _update_index(self, txn: Transaction, prefix: str, hash_val: str, storage_key: str):
         """Add a storage key to the index for a given hash, avoiding duplicates."""
         idx_key = f"{prefix}{hash_val}".encode()
         existing = txn.get(idx_key)
-        keys = LMDBConversationStore._decode_index_value(existing) if existing else []
+        keys = self._decode_index_value(existing) if existing else []
         if storage_key not in keys:
             keys.append(storage_key)
             txn.put(idx_key, orjson.dumps(keys))
 
-    @staticmethod
-    def _remove_from_index(txn: lmdb.Transaction, prefix: str, hash_val: str, storage_key: str):
+    def _remove_from_index(self, txn: Transaction, prefix: str, hash_val: str, storage_key: str):
         """Remove a specific storage key from the index for a given hash."""
         idx_key = f"{prefix}{hash_val}".encode()
         existing = txn.get(idx_key)
         if not existing:
             return
-        keys = LMDBConversationStore._decode_index_value(existing)
+        keys = self._decode_index_value(existing)
         if storage_key in keys:
             keys.remove(storage_key)
             if keys:
@@ -304,7 +312,7 @@ def store(
                 logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}")
                 return storage_key
 
-        except lmdb.Error as e:
+        except Error as e:
             logger.error(f"LMDB error while storing messages with key {storage_key[:12]}: {e}")
             raise
         except Exception as e:
@@ -334,14 +342,14 @@ def get(self, key: str) -> ConversationInStore | None:
 
                 logger.debug(f"Retrieved {len(conv.messages)} messages with key: {key[:12]}")
                 return conv
-        except (lmdb.Error, orjson.JSONDecodeError) as e:
+        except (Error, orjson.JSONDecodeError) as e:
             logger.error(f"Failed to retrieve/parse messages with key {key[:12]}: {e}")
             return None
         except Exception as e:
             logger.error(f"Unexpected error retrieving messages with key {key[:12]}: {e}")
             return None
 
-    def find(self, model: str, messages: list[Message]) -> ConversationInStore | None:
+    def find(self, model: str, messages: list[ChatCompletionMessage]) -> ConversationInStore | None:
         """
         Search conversation data by message list.
         Tries raw matching, then sanitized matching, and finally fuzzy matching.
@@ -381,7 +389,7 @@ def find(self, model: str, messages: list[Message]) -> ConversationInStore | Non
     def _find_by_message_list(
         self,
         model: str,
-        messages: list[Message],
+        messages: list[ChatCompletionMessage],
         fuzzy: bool = False,
     ) -> ConversationInStore | None:
         """
@@ -423,7 +431,7 @@ def _find_by_message_list(
 
                                 if match_found:
                                     return conv
-            except lmdb.Error as e:
+            except Error as e:
                 logger.error(
                     f"LMDB error while searching for hash {message_hash} and client {c.id}: {e}"
                 )
@@ -438,7 +446,7 @@ def exists(self, key: str) -> bool:
         try:
             with self._get_transaction(write=False) as txn:
                 return txn.get(key.encode("utf-8")) is not None
-        except lmdb.Error as e:
+        except Error as e:
             logger.error(f"Failed to check existence of key {key}: {e}")
             return False
 
@@ -464,7 +472,7 @@ def delete(self, key: str) -> ConversationInStore | None:
 
                 logger.debug(f"Deleted messages with key: {key[:12]}")
                 return conv
-        except (lmdb.Error, orjson.JSONDecodeError) as e:
+        except (Error, orjson.JSONDecodeError) as e:
             logger.error(f"Failed to delete messages with key {key[:12]}: {e}")
             return None
 
@@ -490,7 +498,7 @@ def keys(self, prefix: str = "", limit: int | None = None) -> list[str]:
                         count += 1
                         if limit and count >= limit:
                             break
-        except lmdb.Error as e:
+        except Error as e:
             logger.error(f"Failed to list keys: {e}")
         return keys
 
@@ -529,7 +537,7 @@ def cleanup_expired(self, retention_days: int | None = None) -> int:
 
                     if timestamp < cutoff:
                         expired_entries.append((key_str, conv))
-        except lmdb.Error as exc:
+        except Error as exc:
             logger.error(f"Failed to scan LMDB for retention cleanup: {exc}")
             raise
 
@@ -552,7 +560,7 @@ def cleanup_expired(self, retention_days: int | None = None) -> int:
                         )
                         self._remove_from_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, key_str)
                     removed += 1
-        except lmdb.Error as exc:
+        except Error as exc:
             logger.error(f"Failed to delete expired conversations: {exc}")
             raise
 
@@ -570,7 +578,7 @@ def stats(self) -> dict[str, Any]:
             return {}
         try:
             return self._env.stat()
-        except lmdb.Error as e:
+        except Error as e:
             logger.error(f"Failed to get database stats: {e}")
             return {}
 
@@ -586,7 +594,7 @@ def __del__(self):
         self.close()
 
     @staticmethod
-    def sanitize_messages(messages: list[Message]) -> list[Message]:
+    def sanitize_messages(messages: list[ChatCompletionMessage]) -> list[ChatCompletionMessage]:
         """Clean all messages of internal markers, hints and normalize tool calls."""
         cleaned_messages = []
         for msg in messages:
@@ -622,9 +630,30 @@ def sanitize_messages(messages: list[Message]) -> list[Message]:
                 new_content = []
                 all_extracted_calls = list(msg.tool_calls or [])
                 list_changed = False
+                reasoning_parts = []
 
                 for item in msg.content:
-                    if isinstance(item, ContentItem) and item.type == "text" and item.text:
+                    # Extract reasoning items and move them to reasoning_content
+                    if (
+                        isinstance(item, ChatCompletionAssistantContentItem)
+                        and item.type == "reasoning"
+                    ):
+                        val = item.text
+                        if val:
+                            norm_val = _normalize_text(val)
+                            if norm_val:
+                                reasoning_parts.append(norm_val)
+                        list_changed = True
+                        continue
+
+                    if (
+                        isinstance(
+                            item,
+                            (ChatCompletionRequestContentItem, ChatCompletionAssistantContentItem),
+                        )
+                        and item.type == "text"
+                        and item.text
+                    ):
                         text = item.text
                         if msg.role == "assistant" and not msg.tool_calls:
                             text, extracted = extract_tool_calls(text)
@@ -639,8 +668,17 @@ def sanitize_messages(messages: list[Message]) -> list[Message]:
                             item = item.model_copy(update={"text": text.strip() or None})
                     new_content.append(item)
 
+                if reasoning_parts:
+                    existing_reason = update_data.get("reasoning_content") or msg.reasoning_content
+                    all_reasoning = "\n\n".join(
+                        r for r in ([existing_reason, *reasoning_parts]) if r
+                    )
+                    if all_reasoning:
+                        update_data["reasoning_content"] = all_reasoning
+                        content_changed = True
+
                 if list_changed:
-                    update_data["content"] = new_content
+                    update_data["content"] = new_content if new_content else None
                     update_data["tool_calls"] = all_extracted_calls or None
                     content_changed = True
 
diff --git a/app/utils/config.py b/app/utils/config.py
index fec8eee..9b4f1a3 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -352,7 +352,7 @@ def initialize_config() -> Config:
         env_models_overrides = extract_gemini_models_env()
 
         # Then, initialize Config with pydantic_settings
-        config = Config()  # type: ignore
+        config = Config()
 
         # Synthesize clients
         config.gemini.clients = _merge_clients_with_env(
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 187f310..8bc4940 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,7 +14,7 @@
 from curl_cffi.requests import AsyncSession
 from loguru import logger
 
-from app.models import FunctionCall, Message, ToolCall
+from app.models import ChatCompletionMessage, ChatCompletionMessageToolCall, FunctionCall
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
@@ -278,7 +278,9 @@ def strip_system_hints(text: str) -> str:
     return cleaned
 
 
-def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[ToolCall]]:
+def _process_tools_internal(
+    text: str, extract: bool = True
+) -> tuple[str, list[ChatCompletionMessageToolCall]]:
     """
     Extract tool metadata and return text stripped of technical markers.
     Arguments are parsed into JSON and assigned deterministic call IDs.
@@ -286,7 +288,7 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
     if not text:
         return text, []
 
-    tool_calls: list[ToolCall] = []
+    tool_calls: list[ChatCompletionMessageToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
         if not extract:
@@ -321,7 +323,7 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
         tool_calls.append(
-            ToolCall(
+            ChatCompletionMessageToolCall(
                 id=call_id,
                 type="function",
                 function=FunctionCall(name=name, arguments=arguments),
@@ -341,12 +343,12 @@ def remove_tool_call_blocks(text: str) -> str:
     return cleaned
 
 
-def extract_tool_calls(text: str) -> tuple[str, list[ToolCall]]:
+def extract_tool_calls(text: str) -> tuple[str, list[ChatCompletionMessageToolCall]]:
     """Extract tool calls and return cleaned text."""
     return _process_tools_internal(text, extract=True)
 
 
-def text_from_message(message: Message) -> str:
+def text_from_message(message: ChatCompletionMessage) -> str:
     """Concatenate text and tool arguments from a message for token estimation."""
     base_text = ""
     if isinstance(message.content, str):
diff --git a/scripts/dump_lmdb.py b/scripts/dump_lmdb.py
index 889af4f..15ce5df 100644
--- a/scripts/dump_lmdb.py
+++ b/scripts/dump_lmdb.py
@@ -5,6 +5,7 @@
 
 import lmdb
 import orjson
+from lmdb import Transaction
 
 
 def _decode_value(value: bytes) -> Any:
@@ -15,7 +16,7 @@ def _decode_value(value: bytes) -> Any:
         return value.decode("utf-8", errors="replace")
 
 
-def _dump_all(txn: lmdb.Transaction) -> list[dict[str, Any]]:
+def _dump_all(txn: Transaction) -> list[dict[str, Any]]:
     """Return all records from the database."""
     result: list[dict[str, Any]] = []
     for key, value in txn.cursor():
@@ -23,7 +24,7 @@ def _dump_all(txn: lmdb.Transaction) -> list[dict[str, Any]]:
     return result
 
 
-def _dump_selected(txn: lmdb.Transaction, keys: Iterable[str]) -> list[dict[str, Any]]:
+def _dump_selected(txn: Transaction, keys: Iterable[str]) -> list[dict[str, Any]]:
     """Return records for the provided keys."""
     result: list[dict[str, Any]] = []
     for key in keys:

From 1a9f7de4e61db7a028f63e4356365de572e03a69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Mar 2026 22:03:22 +0700
Subject: [PATCH 171/236] Add type check using `ty`

---
 .github/workflows/docker.yaml | 13 +++++++-----
 .github/workflows/lint.yaml   | 37 +++++++++++++++++++++++++++++++++++
 .github/workflows/ruff.yaml   | 30 ----------------------------
 .github/workflows/track.yml   |  6 ++----
 pyproject.toml                |  1 +
 uv.lock                       | 26 ++++++++++++++++++++++++
 6 files changed, 74 insertions(+), 39 deletions(-)
 create mode 100644 .github/workflows/lint.yaml
 delete mode 100644 .github/workflows/ruff.yaml

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 1c5a2ee..6fb71e8 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -6,11 +6,14 @@ on:
       - main
     tags:
       - "v*"
-    paths-ignore:
-      - "**/*.md"
-      - ".github/*"
-      - "LICENSE"
-      - ".gitignore"
+    paths:
+      - "app/**"
+      - "config/**"
+      - "pyproject.toml"
+      - "uv.lock"
+      - "Dockerfile"
+      - "run.py"
+      - ".github/workflows/docker.yaml"
 
 env:
   REGISTRY: ghcr.io
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 0000000..f513a31
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,37 @@
+name: Lint and Type Check
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - "**.py"
+      - "pyproject.toml"
+      - "uv.lock"
+      - ".github/workflows/lint.yaml"
+  pull_request:
+    paths:
+      - "**.py"
+      - "pyproject.toml"
+      - "uv.lock"
+      - ".github/workflows/lint.yaml"
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: Install dependencies
+        run: uv sync --all-groups
+
+      - name: Run Ruff
+        run: uv run ruff check .
+
+      - name: Run Ty Check
+        run: uv run ty check
diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml
deleted file mode 100644
index 5e13127..0000000
--- a/.github/workflows/ruff.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: Ruff Lint
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-    types:
-      - opened
-
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-
-      - name: Set up Python
-        uses: actions/setup-python@v6
-        with:
-          python-version: "3.13"
-
-      - name: Install Ruff
-        run: |
-          python -m pip install --upgrade pip
-          pip install ruff
-
-      - name: Run Ruff
-        run: ruff check .
diff --git a/.github/workflows/track.yml b/.github/workflows/track.yml
index 85b087f..778ef03 100644
--- a/.github/workflows/track.yml
+++ b/.github/workflows/track.yml
@@ -16,8 +16,6 @@ jobs:
 
       - name: Install uv
         uses: astral-sh/setup-uv@v7
-        with:
-          version: "latest"
 
       - name: Update gemini-webapi
         id: update
@@ -33,8 +31,8 @@ jobs:
           fi
           echo "Current gemini-webapi version: $OLD_VERSION"
 
-          # Update the package using uv, which handles pyproject.toml and uv.lock
-          uv add --upgrade gemini-webapi
+          # Update gemini-webapi to the latest version
+          uv lock --upgrade-package gemini-webapi
 
           # Get new version of gemini-webapi after upgrade
           NEW_VERSION=$(uv pip show gemini-webapi | grep ^Version: | awk '{print $2}')
diff --git a/pyproject.toml b/pyproject.toml
index 83a1b87..f961758 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ Repository = "https://github.com/Nativu5/Gemini-FastAPI"
 dev = [
     "pytest>=9.0.2",
     "ruff>=0.15.4",
+    "ty>=0.0.20",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index fd1a7df..f6415bd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -145,6 +145,7 @@ dependencies = [
 dev = [
     { name = "pytest" },
     { name = "ruff" },
+    { name = "ty" },
 ]
 
 [package.dev-dependencies]
@@ -164,6 +165,7 @@ requires-dist = [
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.4" },
+    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.20" },
     { name = "uvicorn", specifier = ">=0.41.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
@@ -519,6 +521,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
 ]
 
+[[package]]
+name = "ty"
+version = "0.0.20"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/56/95/8de69bb98417227b01f1b1d743c819d6456c9fd140255b6124b05b17dfd6/ty-0.0.20.tar.gz", hash = "sha256:ebba6be7974c14efbb2a9adda6ac59848f880d7259f089dfa72a093039f1dcc6", size = 5262529, upload-time = "2026-03-02T15:51:36.587Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0b/2c/718abe48393e521bf852cd6b0f984766869b09c258d6e38a118768a91731/ty-0.0.20-py3-none-linux_armv6l.whl", hash = "sha256:7cc12769c169c9709a829c2248ee2826b7aae82e92caeac813d856f07c021eae", size = 10333656, upload-time = "2026-03-02T15:51:56.461Z" },
+    { url = "https://files.pythonhosted.org/packages/41/0e/eb1c4cc4a12862e2327b72657bcebb10b7d9f17046f1bdcd6457a0211615/ty-0.0.20-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3b777c1bf13bc0a95985ebb8a324b8668a4a9b2e514dde5ccf09e4d55d2ff232", size = 10168505, upload-time = "2026-03-02T15:51:51.895Z" },
+    { url = "https://files.pythonhosted.org/packages/89/7f/10230798e673f0dd3094dfd16e43bfd90e9494e7af6e8e7db516fb431ddf/ty-0.0.20-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b2a4a7db48bf8cba30365001bc2cad7fd13c1a5aacdd704cc4b7925de8ca5eb3", size = 9678510, upload-time = "2026-03-02T15:51:48.451Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/3d/59d9159577494edd1728f7db77b51bb07884bd21384f517963114e3ab5f6/ty-0.0.20-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6846427b8b353a43483e9c19936dc6a25612573b44c8f7d983dfa317e7f00d4c", size = 10162926, upload-time = "2026-03-02T15:51:40.558Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/a8/b7273eec3e802f78eb913fbe0ce0c16ef263723173e06a5776a8359b2c66/ty-0.0.20-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245ceef5bd88df366869385cf96411cb14696334f8daa75597cf7e41c3012eb8", size = 10171702, upload-time = "2026-03-02T15:51:44.069Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/32/5f1144f2f04a275109db06e3498450c4721554215b80ae73652ef412eeab/ty-0.0.20-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4d21d1cdf67a444d3c37583c17291ddba9382a9871021f3f5d5735e09e85efe", size = 10682552, upload-time = "2026-03-02T15:51:33.102Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/db/9f1f637310792f12bd6ed37d5fc8ab39ba1a9b0c6c55a33865e9f1cad840/ty-0.0.20-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bd4ffd907d1bd70e46af9e9a2f88622f215e1bf44658ea43b32c2c0b357299e4", size = 11242605, upload-time = "2026-03-02T15:51:34.895Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/68/cc9cae2e732fcfd20ccdffc508407905a023fc8493b8771c392d915528dc/ty-0.0.20-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6594b58d8b0e9d16a22b3045fc1305db4b132c8d70c17784ab8c7a7cc986807", size = 10974655, upload-time = "2026-03-02T15:51:46.011Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c1/b9e3e3f28fe63486331e653f6aeb4184af8b1fe80542fcf74d2dda40a93d/ty-0.0.20-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3662f890518ce6cf4d7568f57d03906912d2afbf948a01089a28e325b1ef198c", size = 10761325, upload-time = "2026-03-02T15:51:26.818Z" },
+    { url = "https://files.pythonhosted.org/packages/39/9e/67db935bdedf219a00fb69ec5437ba24dab66e0f2e706dd54a4eca234b84/ty-0.0.20-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0e3ffbae58f9f0d17cdc4ac6d175ceae560b7ed7d54f9ddfb1c9f31054bcdc2c", size = 10145793, upload-time = "2026-03-02T15:51:38.562Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/de/b0eb815d4dc5a819c7e4faddc2a79058611169f7eef07ccc006531ce228c/ty-0.0.20-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:176e52bc8bb00b0e84efd34583962878a447a3a0e34ecc45fd7097a37554261b", size = 10189640, upload-time = "2026-03-02T15:51:50.202Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/71/63734923965cbb70df1da3e93e4b8875434e326b89e9f850611122f279bf/ty-0.0.20-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b2bc73025418e976ca4143dde71fb9025a90754a08ac03e6aa9b80d4bed1294b", size = 10370568, upload-time = "2026-03-02T15:51:42.295Z" },
+    { url = "https://files.pythonhosted.org/packages/32/a0/a532c2048533347dff48e9ca98bd86d2c224356e101688a8edaf8d6973fb/ty-0.0.20-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d52f7c9ec6e363e094b3c389c344d5a140401f14a77f0625e3f28c21918552f5", size = 10853999, upload-time = "2026-03-02T15:51:58.963Z" },
+    { url = "https://files.pythonhosted.org/packages/48/88/36c652c658fe96658043e4abc8ea97801de6fb6e63ab50aaa82807bff1d8/ty-0.0.20-py3-none-win32.whl", hash = "sha256:c7d32bfe93f8fcaa52b6eef3f1b930fd7da410c2c94e96f7412c30cfbabf1d17", size = 9744206, upload-time = "2026-03-02T15:51:54.183Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/a7/a4a13bed1d7fd9d97aaa3c5bb5e6d3e9a689e6984806cbca2ab4c9233cac/ty-0.0.20-py3-none-win_amd64.whl", hash = "sha256:a5e10f40fc4a0a1cbcb740a4aad5c7ce35d79f030836ea3183b7a28f43170248", size = 10711999, upload-time = "2026-03-02T15:51:29.212Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/7e/6bfd748a9f4ff9267ed3329b86a0f02cdf6ab49f87bc36c8a164852f99fc/ty-0.0.20-py3-none-win_arm64.whl", hash = "sha256:53f7a5c12c960e71f160b734f328eff9a35d578af4b67a36b0bb5990ac5cdc27", size = 10150143, upload-time = "2026-03-02T15:51:31.283Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"

From da54b7389720a7c6b01de7a0381e07e84e2a7083 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Mar 2026 22:10:33 +0700
Subject: [PATCH 172/236] Temporarily switch to the `move-httpx-to-curl_cffi`
 branch for testing purposes.

---
 pyproject.toml |  3 ++
 uv.lock        | 76 +++-----------------------------------------------
 2 files changed, 7 insertions(+), 72 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f961758..03d5661 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,3 +63,6 @@ extend-immutable-calls = [
 [tool.ruff.format]
 quote-style = "double"
 indent-style = "space"
+
+[tool.uv.sources]
+gemini-webapi = { git = "https://github.com/luuquangvu/Gemini-API.git", rev = "move-httpx-to-curl_cffi" }
diff --git a/uv.lock b/uv.lock
index f6415bd..f0019d4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -157,7 +157,7 @@ dev = [
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.135.1" },
-    { name = "gemini-webapi", specifier = ">=1.19.2" },
+    { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
@@ -176,18 +176,14 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.19.2"
-source = { registry = "https://pypi.org/simple" }
+version = "0.0.post232"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#645a17d962288e9f99e805db0c1a7385718afcb6" }
 dependencies = [
-    { name = "httpx", extra = ["http2"] },
+    { name = "curl-cffi" },
     { name = "loguru" },
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d4/d3/b4ff659bfb0fff378b16f934429d53b7f78451ac184406ab2f9ddda9357e/gemini_webapi-1.19.2.tar.gz", hash = "sha256:f6e96e28f3f1e78be6176fbb8b2eca25ad509aec6cfacf99c415559f27691b71", size = 266805, upload-time = "2026-02-14T05:26:04.103Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2d/e7/a676f721980e3daa43e05abe94884a84648efdc6203889e7a0f5c8ca2e98/gemini_webapi-1.19.2-py3-none-any.whl", hash = "sha256:fdc088ca35361301f40ea807a58c4bec18886b17a54164a1a8f3d639eadc6a66", size = 63524, upload-time = "2026-02-14T05:26:02.173Z" },
-]
 
 [[package]]
 name = "h11"
@@ -198,41 +194,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 
-[[package]]
-name = "h2"
-version = "4.3.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "hpack" },
-    { name = "hyperframe" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
-]
-
-[[package]]
-name = "hpack"
-version = "4.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" },
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.9"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "certifi" },
-    { name = "h11" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
-]
-
 [[package]]
 name = "httptools"
 version = "0.7.1"
@@ -248,35 +209,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" },
 ]
 
-[[package]]
-name = "httpx"
-version = "0.28.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "anyio" },
-    { name = "certifi" },
-    { name = "httpcore" },
-    { name = "idna" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
-]
-
-[package.optional-dependencies]
-http2 = [
-    { name = "h2" },
-]
-
-[[package]]
-name = "hyperframe"
-version = "6.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" },
-]
-
 [[package]]
 name = "idna"
 version = "3.11"

From 22edc6b8203f92e0941ae023f92a16cbe066e795 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Mar 2026 22:13:33 +0700
Subject: [PATCH 173/236] Add workflow_dispatch trigger to Docker workflow

---
 .github/workflows/docker.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 6fb71e8..4b15e84 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -14,6 +14,7 @@ on:
       - "Dockerfile"
       - "run.py"
       - ".github/workflows/docker.yaml"
+  workflow_dispatch:
 
 env:
   REGISTRY: ghcr.io

From d2937d5caa1fc73aaa066153627700b476129c37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Mar 2026 22:25:57 +0700
Subject: [PATCH 174/236] Revert "Temporarily switch to the
 `move-httpx-to-curl_cffi` branch for testing purposes."

This reverts commit da54b7389720a7c6b01de7a0381e07e84e2a7083.
---
 pyproject.toml |  3 --
 uv.lock        | 76 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 03d5661..f961758 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,6 +63,3 @@ extend-immutable-calls = [
 [tool.ruff.format]
 quote-style = "double"
 indent-style = "space"
-
-[tool.uv.sources]
-gemini-webapi = { git = "https://github.com/luuquangvu/Gemini-API.git", rev = "move-httpx-to-curl_cffi" }
diff --git a/uv.lock b/uv.lock
index f0019d4..f6415bd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -157,7 +157,7 @@ dev = [
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.135.1" },
-    { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi" },
+    { name = "gemini-webapi", specifier = ">=1.19.2" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
@@ -176,14 +176,18 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post232"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#645a17d962288e9f99e805db0c1a7385718afcb6" }
+version = "1.19.2"
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "curl-cffi" },
+    { name = "httpx", extra = ["http2"] },
     { name = "loguru" },
     { name = "orjson" },
     { name = "pydantic" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/d4/d3/b4ff659bfb0fff378b16f934429d53b7f78451ac184406ab2f9ddda9357e/gemini_webapi-1.19.2.tar.gz", hash = "sha256:f6e96e28f3f1e78be6176fbb8b2eca25ad509aec6cfacf99c415559f27691b71", size = 266805, upload-time = "2026-02-14T05:26:04.103Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2d/e7/a676f721980e3daa43e05abe94884a84648efdc6203889e7a0f5c8ca2e98/gemini_webapi-1.19.2-py3-none-any.whl", hash = "sha256:fdc088ca35361301f40ea807a58c4bec18886b17a54164a1a8f3d639eadc6a66", size = 63524, upload-time = "2026-02-14T05:26:02.173Z" },
+]
 
 [[package]]
 name = "h11"
@@ -194,6 +198,41 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 
+[[package]]
+name = "h2"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "hpack" },
+    { name = "hyperframe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
+]
+
+[[package]]
+name = "hpack"
+version = "4.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
 [[package]]
 name = "httptools"
 version = "0.7.1"
@@ -209,6 +248,35 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" },
 ]
 
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[package.optional-dependencies]
+http2 = [
+    { name = "h2" },
+]
+
+[[package]]
+name = "hyperframe"
+version = "6.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.11"

From ca4475a427159bc6413ab21aa373c092b4754b42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 3 Mar 2026 22:30:34 +0700
Subject: [PATCH 175/236] Fix type checking

---
 app/server/chat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0386bd3..109abb2 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -857,7 +857,7 @@ async def _find_reusable_session(
 async def _send_with_split(
     session: ChatSession,
     text: str,
-    files: list[str | Path | bytes | io.BytesIO] | None = None,
+    files: list[Any] | None = None,
     stream: bool = False,
 ) -> AsyncGenerator[ModelOutput] | ModelOutput:
     """Send text to Gemini, splitting or converting to attachment if too long."""
@@ -876,7 +876,7 @@ async def _send_with_split(
     file_obj = io.BytesIO(text.encode("utf-8"))
     file_obj.name = "message.txt"
     try:
-        final_files: list[str | Path | bytes | io.BytesIO] = list(files) if files else []
+        final_files: list[Any] = list(files) if files else []
         final_files.append(file_obj)
         instruction = (
             "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"

From 91105e86b005c5e74803d0ddf2ca43739cbf2217 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Mar 2026 10:01:52 +0700
Subject: [PATCH 176/236] Update dependencies to latest versions

---
 pyproject.toml | 2 +-
 uv.lock        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f961758..ad31996 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
     "fastapi>=0.135.1",
-    "gemini-webapi>=1.19.2",
+    "gemini-webapi>=1.20.0",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
diff --git a/uv.lock b/uv.lock
index f6415bd..7fbcb24 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,7 +176,7 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.19.2"
+version = "1.20.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx", extra = ["http2"] },
@@ -184,9 +184,9 @@ dependencies = [
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d4/d3/b4ff659bfb0fff378b16f934429d53b7f78451ac184406ab2f9ddda9357e/gemini_webapi-1.19.2.tar.gz", hash = "sha256:f6e96e28f3f1e78be6176fbb8b2eca25ad509aec6cfacf99c415559f27691b71", size = 266805, upload-time = "2026-02-14T05:26:04.103Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ad/65/4c283c0c7a1004a92ef575fa3deb5d2cab8d66fa3a8de6500031491d2d6b/gemini_webapi-1.20.0.tar.gz", hash = "sha256:540e77aee4c28f57be4a9a3cb7845f509411763fc6958dc0af984822e2dd3bc1", size = 267233, upload-time = "2026-03-03T22:48:14.046Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2d/e7/a676f721980e3daa43e05abe94884a84648efdc6203889e7a0f5c8ca2e98/gemini_webapi-1.19.2-py3-none-any.whl", hash = "sha256:fdc088ca35361301f40ea807a58c4bec18886b17a54164a1a8f3d639eadc6a66", size = 63524, upload-time = "2026-02-14T05:26:02.173Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/93/bd3f3bc00da483fbc8d4c329a602b037dcf410cd63d42d1d5c3ee81ec9e4/gemini_webapi-1.20.0-py3-none-any.whl", hash = "sha256:49fe29d7ce80c686c20f45a3218d1832bdd0673978e18ea86d8fb8c3ebff6c18", size = 63759, upload-time = "2026-03-03T22:48:12.769Z" },
 ]
 
 [[package]]

From d858c2b31aab51424db97d5f2791b03acec394be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Mar 2026 10:03:19 +0700
Subject: [PATCH 177/236] Temporarily switch to the `move-httpx-to-curl_cffi`
 branch for testing purposes.

---
 Dockerfile     |  2 +-
 pyproject.toml |  3 ++
 uv.lock        | 76 +++-----------------------------------------------
 3 files changed, 8 insertions(+), 73 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 62ce9d1..5c669c2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,7 +6,7 @@ LABEL org.opencontainers.image.title="Gemini-FastAPI" \
 WORKDIR /app
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    tini \
+    tini git \
     && rm -rf /var/lib/apt/lists/*
 
 ENV UV_COMPILE_BYTECODE=1 \
diff --git a/pyproject.toml b/pyproject.toml
index ad31996..9a8f666 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,3 +63,6 @@ extend-immutable-calls = [
 [tool.ruff.format]
 quote-style = "double"
 indent-style = "space"
+
+[tool.uv.sources]
+gemini-webapi = { git = "https://github.com/luuquangvu/Gemini-API.git", rev = "move-httpx-to-curl_cffi" }
diff --git a/uv.lock b/uv.lock
index 7fbcb24..b262688 100644
--- a/uv.lock
+++ b/uv.lock
@@ -157,7 +157,7 @@ dev = [
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.135.1" },
-    { name = "gemini-webapi", specifier = ">=1.19.2" },
+    { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=1.7.5" },
     { name = "loguru", specifier = ">=0.7.3" },
@@ -176,18 +176,14 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.20.0"
-source = { registry = "https://pypi.org/simple" }
+version = "0.0.post243"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#79d86aff4785dd45bd951910b8be75cf8122e9cd" }
 dependencies = [
-    { name = "httpx", extra = ["http2"] },
+    { name = "curl-cffi" },
     { name = "loguru" },
     { name = "orjson" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ad/65/4c283c0c7a1004a92ef575fa3deb5d2cab8d66fa3a8de6500031491d2d6b/gemini_webapi-1.20.0.tar.gz", hash = "sha256:540e77aee4c28f57be4a9a3cb7845f509411763fc6958dc0af984822e2dd3bc1", size = 267233, upload-time = "2026-03-03T22:48:14.046Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4c/93/bd3f3bc00da483fbc8d4c329a602b037dcf410cd63d42d1d5c3ee81ec9e4/gemini_webapi-1.20.0-py3-none-any.whl", hash = "sha256:49fe29d7ce80c686c20f45a3218d1832bdd0673978e18ea86d8fb8c3ebff6c18", size = 63759, upload-time = "2026-03-03T22:48:12.769Z" },
-]
 
 [[package]]
 name = "h11"
@@ -198,41 +194,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 
-[[package]]
-name = "h2"
-version = "4.3.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "hpack" },
-    { name = "hyperframe" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
-]
-
-[[package]]
-name = "hpack"
-version = "4.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" },
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.9"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "certifi" },
-    { name = "h11" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
-]
-
 [[package]]
 name = "httptools"
 version = "0.7.1"
@@ -248,35 +209,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" },
 ]
 
-[[package]]
-name = "httpx"
-version = "0.28.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "anyio" },
-    { name = "certifi" },
-    { name = "httpcore" },
-    { name = "idna" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
-]
-
-[package.optional-dependencies]
-http2 = [
-    { name = "h2" },
-]
-
-[[package]]
-name = "hyperframe"
-version = "6.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" },
-]
-
 [[package]]
 name = "idna"
 version = "3.11"

From 592132fefa1bffec95bebb01f696cc927afc3e96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Mar 2026 11:15:24 +0700
Subject: [PATCH 178/236] Resolve HTTP 422 "Input should be a valid string"

---
 app/models/models.py | 58 +++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/app/models/models.py b/app/models/models.py
index 7284b5f..9b3c0ff 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -150,14 +150,14 @@ class ChatCompletionResponse(BaseModel):
 class ResponseInputText(BaseModel):
     """Text content item in a Responses API input message."""
 
-    type: Literal["input_text"]
+    type: Literal["input_text"] | None = Field(default="input_text")
     text: str | None = Field(default=None)
 
 
 class ResponseInputImage(BaseModel):
     """Image content item in a Responses API input message."""
 
-    type: Literal["input_image"]
+    type: Literal["input_image"] | None = Field(default="input_image")
     detail: Literal["auto", "low", "high"] | None = Field(default=None)
     file_id: str | None = Field(default=None)
     image_url: str | None = Field(default=None)
@@ -166,7 +166,7 @@ class ResponseInputImage(BaseModel):
 class ResponseInputFile(BaseModel):
     """File content item in a Responses API input message."""
 
-    type: Literal["input_file"]
+    type: Literal["input_file"] | None = Field(default="input_file")
     file_id: str | None = Field(default=None)
     file_url: str | None = Field(default=None)
     file_data: str | None = Field(default=None)
@@ -204,10 +204,10 @@ class ResponseFunctionToolCall(BaseModel):
 
     type: Literal["function_call"] | None = Field(default="function_call")
     id: str | None = Field(default=None)
-    call_id: str
-    name: str
-    arguments: str
-    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
+    call_id: str | None = Field(default=None)
+    name: str | None = Field(default=None)
+    arguments: str | None = Field(default=None)
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(default="completed")
 
 
 class FunctionCallOutput(BaseModel):
@@ -215,9 +215,11 @@ class FunctionCallOutput(BaseModel):
 
     type: Literal["function_call_output"] | None = Field(default="function_call_output")
     id: str | None = Field(default=None)
-    call_id: str
-    output: str | list[ResponseInputText | ResponseInputImage | ResponseInputFile]
-    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
+    call_id: str | None = Field(default=None)
+    output: str | list[ResponseInputText | ResponseInputImage | ResponseInputFile] | None = Field(
+        default=None
+    )
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(default="completed")
 
 
 class FunctionTool(BaseModel):
@@ -267,7 +269,7 @@ class ResponseUsage(BaseModel):
 class ResponseOutputText(BaseModel):
     """Text content part inside a Responses API output message."""
 
-    type: Literal["output_text"]
+    type: Literal["output_text"] | None = Field(default="output_text")
     text: str | None = Field(default=None)
     annotations: list[dict[str, Any]] = Field(default_factory=list)
     logprobs: list[dict[str, Any]] | None = Field(default=None)
@@ -276,7 +278,7 @@ class ResponseOutputText(BaseModel):
 class ResponseOutputRefusal(BaseModel):
     """Refusal content part inside a Responses API output message."""
 
-    type: Literal["refusal"]
+    type: Literal["refusal"] | None = Field(default="refusal")
     refusal: str | None = Field(default=None)
 
 
@@ -286,8 +288,8 @@ class ResponseOutputRefusal(BaseModel):
 class ResponseOutputMessage(BaseModel):
     """Assistant message output item in a Responses API response."""
 
-    id: str
-    type: Literal["message"]
+    id: str | None = Field(default=None)
+    type: Literal["message"] | None = Field(default="message")
     status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
     role: Literal["assistant"]
     content: list[ResponseOutputText | ResponseOutputRefusal]
@@ -296,22 +298,22 @@ class ResponseOutputMessage(BaseModel):
 class SummaryTextContent(BaseModel):
     """Summary text part inside a reasoning item."""
 
-    type: Literal["summary_text"] = Field(default="summary_text")
-    text: str
+    type: Literal["summary_text"] | None = Field(default="summary_text")
+    text: str | None = Field(default=None)
 
 
 class ReasoningTextContent(BaseModel):
     """Full reasoning text part inside a reasoning item."""
 
-    type: Literal["reasoning_text"] = Field(default="reasoning_text")
-    text: str
+    type: Literal["reasoning_text"] | None = Field(default="reasoning_text")
+    text: str | None = Field(default=None)
 
 
 class ResponseReasoningItem(BaseModel):
     """A reasoning output item emitted by a thinking model."""
 
-    id: str
-    type: Literal["reasoning"] = Field(default="reasoning")
+    id: str | None = Field(default=None)
+    type: Literal["reasoning"] | None = Field(default="reasoning")
     status: Literal["in_progress", "completed", "incomplete"] | None = Field(default=None)
     summary: list[SummaryTextContent] | None = Field(default=None)
     content: list[ReasoningTextContent] | None = Field(default=None)
@@ -321,19 +323,19 @@ class ResponseReasoningItem(BaseModel):
 class ResponseToolCall(BaseModel):
     """A function-call output item emitted by the model."""
 
-    id: str
-    type: Literal["function_call"] = Field(default="function_call")
-    call_id: str
-    name: str
-    arguments: str
-    status: Literal["in_progress", "completed", "incomplete"] = Field(default="completed")
+    id: str | None = Field(default=None)
+    type: Literal["function_call"] | None = Field(default="function_call")
+    call_id: str | None = Field(default=None)
+    name: str | None = Field(default=None)
+    arguments: str | None = Field(default=None)
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(default="completed")
 
 
 class ImageGenerationCall(BaseModel):
     """An image-generation output item emitted by the Responses API."""
 
-    id: str
-    type: Literal["image_generation_call"] = Field(default="image_generation_call")
+    id: str | None = Field(default=None)
+    type: Literal["image_generation_call"] | None = Field(default="image_generation_call")
     status: Literal["completed", "in_progress", "generating", "failed"] = Field(default="completed")
     result: str | None = Field(default=None)
     output_format: str | None = Field(default=None)

From 80750ad2c79bb409416663050e5d6cca18e6ef56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Mar 2026 15:25:12 +0700
Subject: [PATCH 179/236] Move the content storage in LMDB to a separate
 `AppMessage`

Independent of the `ChatCompletionMessage` format, to simplify maintenance and improve scalability.
---
 app/models/__init__.py |  95 +-------
 app/models/core.py     |  48 ++++
 app/models/models.py   |  16 --
 app/server/chat.py     | 495 +++++++++++++++++++++++------------------
 app/services/client.py |  37 ++-
 app/services/lmdb.py   | 200 +++--------------
 app/utils/helper.py    |  14 +-
 7 files changed, 395 insertions(+), 510 deletions(-)
 create mode 100644 app/models/core.py

diff --git a/app/models/__init__.py b/app/models/__init__.py
index 1e02dcb..6fa671f 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -1,93 +1,4 @@
-from .models import (
-    ChatCompletionAssistantContentItem,
-    ChatCompletionChoice,
-    ChatCompletionContentItem,
-    ChatCompletionFunctionTool,
-    ChatCompletionMessage,
-    ChatCompletionMessageToolCall,
-    ChatCompletionNamedToolChoice,
-    ChatCompletionNamedToolChoiceFunction,
-    ChatCompletionRequest,
-    ChatCompletionRequestContentItem,
-    ChatCompletionResponse,
-    CompletionUsage,
-    ConversationInStore,
-    FunctionCall,
-    FunctionCallOutput,
-    FunctionDefinition,
-    FunctionTool,
-    HealthCheckResponse,
-    ImageGeneration,
-    ImageGenerationCall,
-    ModelData,
-    ModelListResponse,
-    ReasoningTextContent,
-    ResponseCreateRequest,
-    ResponseCreateResponse,
-    ResponseFormatText,
-    ResponseFormatTextJSONSchemaConfig,
-    ResponseFunctionToolCall,
-    ResponseInputFile,
-    ResponseInputImage,
-    ResponseInputMessage,
-    ResponseInputMessageContentList,
-    ResponseInputText,
-    ResponseOutputContent,
-    ResponseOutputMessage,
-    ResponseOutputRefusal,
-    ResponseOutputText,
-    ResponseReasoningItem,
-    ResponseTextConfig,
-    ResponseToolCall,
-    ResponseUsage,
-    SummaryTextContent,
-    ToolChoiceFunction,
-    ToolChoiceTypes,
-)
+# ruff: noqa: F403
 
-__all__ = [
-    "ChatCompletionAssistantContentItem",
-    "ChatCompletionChoice",
-    "ChatCompletionContentItem",
-    "ChatCompletionFunctionTool",
-    "ChatCompletionMessage",
-    "ChatCompletionMessageToolCall",
-    "ChatCompletionNamedToolChoice",
-    "ChatCompletionNamedToolChoiceFunction",
-    "ChatCompletionRequest",
-    "ChatCompletionRequestContentItem",
-    "ChatCompletionResponse",
-    "CompletionUsage",
-    "ConversationInStore",
-    "FunctionCall",
-    "FunctionCallOutput",
-    "FunctionDefinition",
-    "FunctionTool",
-    "HealthCheckResponse",
-    "ImageGeneration",
-    "ImageGenerationCall",
-    "ModelData",
-    "ModelListResponse",
-    "ReasoningTextContent",
-    "ResponseCreateRequest",
-    "ResponseCreateResponse",
-    "ResponseFormatText",
-    "ResponseFormatTextJSONSchemaConfig",
-    "ResponseFunctionToolCall",
-    "ResponseInputFile",
-    "ResponseInputImage",
-    "ResponseInputMessage",
-    "ResponseInputMessageContentList",
-    "ResponseInputText",
-    "ResponseOutputContent",
-    "ResponseOutputMessage",
-    "ResponseOutputRefusal",
-    "ResponseOutputText",
-    "ResponseReasoningItem",
-    "ResponseTextConfig",
-    "ResponseToolCall",
-    "ResponseUsage",
-    "SummaryTextContent",
-    "ToolChoiceFunction",
-    "ToolChoiceTypes",
-]
+from .core import *
+from .models import *
diff --git a/app/models/core.py b/app/models/core.py
new file mode 100644
index 0000000..d92dcc6
--- /dev/null
+++ b/app/models/core.py
@@ -0,0 +1,48 @@
+from datetime import datetime
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+
+class AppToolCallFunction(BaseModel):
+    name: str
+    arguments: str
+
+
+class AppToolCall(BaseModel):
+    id: str
+    type: Literal["function"] = "function"
+    function: AppToolCallFunction
+
+
+class AppContentItem(BaseModel):
+    type: str
+    text: str | None = None
+    url: str | None = None
+    file_data: str | bytes | None = Field(default=None, exclude=True)
+    filename: str | None = None
+    raw_data: dict[str, Any] | None = None
+
+
+class AppMessage(BaseModel):
+    role: Literal["system", "user", "assistant", "tool"]
+    name: str | None = None
+    content: str | list[AppContentItem] | None = None
+    tool_calls: list[AppToolCall] | None = None
+    tool_call_id: str | None = None
+    reasoning_content: str | None = None
+
+
+class ConversationInStore(BaseModel):
+    """Persisted conversation record stored in LMDB."""
+
+    created_at: datetime | None = Field(default=None)
+    updated_at: datetime | None = Field(default=None)
+    model: str = Field(..., description="Model used for the conversation")
+    client_id: str = Field(..., description="Identifier of the Gemini client")
+    metadata: list[str | None] = Field(
+        ..., description="Metadata for Gemini API to locate the conversation"
+    )
+    messages: list[AppMessage] = Field(
+        ..., description="Canonical message contents in the conversation"
+    )
diff --git a/app/models/models.py b/app/models/models.py
index 9b3c0ff..71462b2 100644
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from datetime import datetime
 from typing import Any, Literal
 
 from pydantic import BaseModel, Field, model_validator
@@ -454,21 +453,6 @@ class HealthCheckResponse(BaseModel):
     error: str | None = Field(default=None)
 
 
-class ConversationInStore(BaseModel):
-    """Persisted conversation record stored in LMDB."""
-
-    created_at: datetime | None = Field(default=None)
-    updated_at: datetime | None = Field(default=None)
-    model: str = Field(..., description="Model used for the conversation")
-    client_id: str = Field(..., description="Identifier of the Gemini client")
-    metadata: list[str | None] = Field(
-        ..., description="Metadata for Gemini API to locate the conversation"
-    )
-    messages: list[ChatCompletionMessage] = Field(
-        ..., description="Message contents in the conversation"
-    )
-
-
 ChatCompletionMessage.model_rebuild()
 ChatCompletionMessageToolCall.model_rebuild()
 ChatCompletionRequest.model_rebuild()
diff --git a/app/server/chat.py b/app/server/chat.py
index 109abb2..86d25ae 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -7,7 +7,7 @@
 from dataclasses import dataclass
 from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any, Literal, cast
 
 import orjson
 from fastapi import APIRouter, Depends, HTTPException, Request, status
@@ -19,18 +19,18 @@
 from loguru import logger
 
 from app.models import (
+    AppContentItem,
+    AppMessage,
+    AppToolCall,
+    AppToolCallFunction,
     ChatCompletionChoice,
-    ChatCompletionContentItem,
     ChatCompletionFunctionTool,
     ChatCompletionMessage,
     ChatCompletionMessageToolCall,
     ChatCompletionNamedToolChoice,
     ChatCompletionRequest,
-    ChatCompletionRequestContentItem,
     ChatCompletionResponse,
     CompletionUsage,
-    ConversationInStore,
-    FunctionCall,
     FunctionCallOutput,
     FunctionTool,
     ImageGeneration,
@@ -130,7 +130,7 @@ async def _image_to_base64(
 
 
 def _calculate_usage(
-    messages: list[ChatCompletionMessage],
+    messages: list[AppMessage],
     assistant_text: str | None,
     tool_calls: list[Any] | None,
     thoughts: str | None = None,
@@ -321,36 +321,109 @@ def _process_llm_output(
     return thoughts, visible_output, storage_output, tool_calls
 
 
+def _convert_to_app_messages(messages: list[ChatCompletionMessage]) -> list[AppMessage]:
+    """Convert ChatCompletionMessage (OpenAI format) to generic internal AppMessage."""
+    app_messages = []
+    for msg in messages:
+        app_content = None
+        if isinstance(msg.content, str):
+            app_content = msg.content
+        elif isinstance(msg.content, list):
+            app_content = []
+            for item in msg.content:
+                if item.type == "text":
+                    app_content.append(AppContentItem(type="text", text=item.text))
+                elif item.type == "image_url":
+                    media_dict = getattr(item, "image_url", None)
+                    url = media_dict.get("url") if media_dict else None
+                    if url and url.startswith("data:"):
+                        # image_url can be either a regular url or base64 data url
+                        app_content.append(AppContentItem(type="image_url", url=url))
+                    else:
+                        app_content.append(AppContentItem(type="image_url", url=url))
+                elif item.type == "file":
+                    file_dict = getattr(item, "file", None)
+                    filename = file_dict.get("filename") if file_dict else None
+                    file_data = file_dict.get("file_data") if file_dict else None
+                    app_content.append(
+                        AppContentItem(type="file", filename=filename, file_data=file_data)
+                    )
+                elif item.type == "input_audio":
+                    audio_dict = getattr(item, "input_audio", None)
+                    audio_data = audio_dict.get("data") if audio_dict else None
+                    app_content.append(
+                        AppContentItem(
+                            type="input_audio",
+                            file_data=audio_data,
+                            raw_data=audio_dict,
+                        )
+                    )
+                elif item.type in ("refusal", "reasoning"):
+                    text_val = getattr(item, "text", None) or getattr(item, item.type, None)
+                    app_content.append(AppContentItem(type=item.type, text=text_val))
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = [
+                AppToolCall(
+                    id=tc.id if hasattr(tc, "id") else tc.get("id", ""),
+                    type="function",
+                    function=AppToolCallFunction(
+                        name=tc.function.name
+                        if hasattr(tc, "function")
+                        else tc.get("function", {}).get("name", ""),
+                        arguments=tc.function.arguments
+                        if hasattr(tc, "function")
+                        else tc.get("function", {}).get("arguments", ""),
+                    ),
+                )
+                for tc in msg.tool_calls
+            ]
+
+        role = {"developer": "system", "function": "tool"}.get(msg.role, msg.role)
+        if role not in ("system", "user", "assistant", "tool"):
+            role = "system"
+
+        app_messages.append(
+            AppMessage(
+                role=role,  # type: ignore
+                content=app_content,
+                tool_calls=tool_calls,
+                tool_call_id=msg.tool_call_id,
+                name=msg.name,
+                reasoning_content=getattr(msg, "reasoning_content", None),
+            )
+        )
+    return app_messages
+
+
 def _persist_conversation(
     db: LMDBConversationStore,
     model_name: str,
     client_id: str,
     metadata: list[str | None],
-    messages: list[ChatCompletionMessage],
+    messages: list[AppMessage],
     storage_output: str | None,
     tool_calls: list[Any] | None,
-    thoughts: str | None = None,
 ) -> str | None:
     """Unified logic to save conversation history to LMDB."""
     try:
-        current_assistant_message = ChatCompletionMessage(
+        current_assistant_message = AppMessage(
             role="assistant",
             content=storage_output or None,
             tool_calls=tool_calls or None,
-            reasoning_content=thoughts or None,
+            reasoning_content=None,
         )
         full_history = [*messages, current_assistant_message]
-        cleaned_history = db.sanitize_messages(full_history)
 
-        conv = ConversationInStore(
-            model=model_name,
+        db.store(
             client_id=client_id,
+            model=model_name,
+            messages=full_history,
             metadata=metadata,
-            messages=cleaned_history,
         )
-        key = db.store(conv)
-        logger.debug(f"Conversation saved to LMDB with key: {key[:12]}")
-        return key
+        logger.debug("Conversation saved to LMDB.")
+        return "success"
     except Exception as e:
         logger.warning(f"Failed to save {len(messages) + 1} messages to LMDB: {e}")
         return None
@@ -486,7 +559,7 @@ def _build_image_generation_instruction(
     return "\n\n".join(instructions)
 
 
-def _append_tool_hint_to_last_user_message(messages: list[ChatCompletionMessage]) -> None:
+def _append_tool_hint_to_last_user_message(messages: list[AppMessage]) -> None:
     """Ensure the last user message carries the tool wrap hint."""
     for msg in reversed(messages):
         if msg.role != "user" or msg.content is None:
@@ -508,12 +581,12 @@ def _append_tool_hint_to_last_user_message(messages: list[ChatCompletionMessage]
                 return
 
             messages_text = TOOL_WRAP_HINT.strip()
-            msg.content.append(ChatCompletionRequestContentItem(type="text", text=messages_text))
+            msg.content.append(AppContentItem(type="text", text=messages_text))
             return
 
 
 def _prepare_messages_for_model(
-    source_messages: list[ChatCompletionMessage],
+    source_messages: list[AppMessage],
     tools: list[ChatCompletionFunctionTool] | None,
     tool_choice: Literal["none", "auto", "required"]
     | ChatCompletionNamedToolChoice
@@ -522,7 +595,7 @@ def _prepare_messages_for_model(
     | None,
     extra_instructions: list[str] | None = None,
     inject_system_defaults: bool = True,
-) -> list[ChatCompletionMessage]:
+) -> list[AppMessage]:
     """Return a copy of messages enriched with tool instructions when needed."""
     prepared = [msg.model_copy(deep=True) for msg in source_messages]
 
@@ -564,7 +637,7 @@ def _prepare_messages_for_model(
             separator = "\n\n" if existing else ""
             prepared[0].content = f"{existing}{separator}{combined_instructions}"
     else:
-        prepared.insert(0, ChatCompletionMessage(role="system", content=combined_instructions))
+        prepared.insert(0, AppMessage(role="system", content=combined_instructions))
 
     if tools and tool_choice != "none" and not tool_prompt_injected:
         _append_tool_hint_to_last_user_message(prepared)
@@ -572,33 +645,36 @@ def _prepare_messages_for_model(
     return prepared
 
 
-def _response_items_to_messages(
+def _convert_responses_to_app_messages(
     items: Any,
-) -> list[ChatCompletionMessage]:
-    """Convert Responses API input items into internal Message objects."""
-    messages: list[ChatCompletionMessage] = []
+) -> list[AppMessage]:
+    """Convert Responses API input items into internal AppMessage objects."""
+    messages: list[AppMessage] = []
 
     if isinstance(items, str):
-        messages.append(ChatCompletionMessage(role="user", content=items))
+        messages.append(AppMessage(role="user", content=items))
         logger.debug("Normalized Responses input: single string message.")
         return messages
 
     for item in items:
         if isinstance(item, (ResponseInputMessage, ResponseOutputMessage)):
-            role = item.role
+            raw_role = getattr(item, "role", "user")
+            normalized_role = {"developer": "system", "function": "tool"}.get(raw_role, raw_role)
+            if normalized_role not in ("system", "user", "assistant", "tool"):
+                normalized_role = "system"
+            role = cast(Literal["system", "user", "assistant", "tool"], normalized_role)
+
             content = item.content
             if isinstance(content, str):
-                messages.append(ChatCompletionMessage(role=role, content=content))
+                messages.append(AppMessage(role=role, content=content))
             else:
-                converted: list[ChatCompletionContentItem] = []
+                converted: list[AppContentItem] = []
                 reasoning_parts: list[str] = []
                 for part in content:
                     if part.type in ("input_text", "output_text"):
                         text_value = getattr(part, "text", "") or ""
                         if text_value:
-                            converted.append(
-                                ChatCompletionRequestContentItem(type="text", text=text_value)
-                            )
+                            converted.append(AppContentItem(type="text", text=text_value))
                     elif part.type == "reasoning_text":
                         text_value = getattr(part, "text", "") or ""
                         if text_value:
@@ -606,31 +682,22 @@ def _response_items_to_messages(
                     elif part.type == "input_image":
                         image_url = getattr(part, "image_url", None)
                         if image_url:
-                            converted.append(
-                                ChatCompletionRequestContentItem(
-                                    type="image_url",
-                                    image_url={
-                                        "url": image_url,
-                                        "detail": getattr(part, "detail", "auto") or "auto",
-                                    },
-                                )
-                            )
+                            converted.append(AppContentItem(type="image_url", url=image_url))
                     elif part.type == "input_file":
                         file_url = getattr(part, "file_url", None)
                         file_data = getattr(part, "file_data", None)
                         if file_url or file_data:
-                            file_info = {}
-                            if file_data:
-                                file_info["file_data"] = file_data
-                                file_info["filename"] = getattr(part, "filename", None)
-                            if file_url:
-                                file_info["url"] = file_url
                             converted.append(
-                                ChatCompletionRequestContentItem(type="file", file=file_info)
+                                AppContentItem(
+                                    type="file",
+                                    url=file_url,
+                                    file_data=file_data,
+                                    filename=getattr(part, "filename", None),
+                                )
                             )
                 reasoning_val = "\n\n".join(reasoning_parts) if reasoning_parts else None
                 messages.append(
-                    ChatCompletionMessage(
+                    AppMessage(
                         role=role,
                         content=converted or None,
                         reasoning_content=reasoning_val,
@@ -639,13 +706,13 @@ def _response_items_to_messages(
 
         elif isinstance(item, ResponseFunctionToolCall):
             messages.append(
-                ChatCompletionMessage(
+                AppMessage(
                     role="assistant",
                     tool_calls=[
-                        ChatCompletionMessageToolCall(
+                        AppToolCall(
                             id=item.call_id,
                             type="function",
-                            function=FunctionCall(name=item.name, arguments=item.arguments),
+                            function=AppToolCallFunction(name=item.name, arguments=item.arguments),
                         )
                     ],
                 )
@@ -653,7 +720,7 @@ def _response_items_to_messages(
         elif isinstance(item, FunctionCallOutput):
             output_content = str(item.output) if isinstance(item.output, list) else item.output
             messages.append(
-                ChatCompletionMessage(
+                AppMessage(
                     role="tool",
                     tool_call_id=item.call_id,
                     content=output_content,
@@ -664,14 +731,14 @@ def _response_items_to_messages(
             if item.content:
                 reasoning_val = "\n\n".join(x.text for x in item.content if x.text)
             messages.append(
-                ChatCompletionMessage(
+                AppMessage(
                     role="assistant",
                     reasoning_content=reasoning_val,
                 )
             )
         elif isinstance(item, ImageGenerationCall):
             messages.append(
-                ChatCompletionMessage(
+                AppMessage(
                     role="assistant",
                     content=item.result or None,
                 )
@@ -679,82 +746,109 @@ def _response_items_to_messages(
 
         else:
             if hasattr(item, "role"):
+                raw_role = getattr(item, "role", "user")
+                normalized_role = {"developer": "system", "function": "tool"}.get(
+                    raw_role, raw_role
+                )
+                if normalized_role not in ("system", "user", "assistant", "tool"):
+                    normalized_role = "system"
+                role = cast(Literal["system", "user", "assistant", "tool"], normalized_role)
                 messages.append(
-                    ChatCompletionMessage(
-                        role=item.role,
+                    AppMessage(
+                        role=role,
                         content=str(getattr(item, "content", "")),
                     )
                 )
 
-    logger.debug(f"Normalized Responses input: {len(messages)} message items.")
-    return messages
+    compacted_messages: list[AppMessage] = []
+    for msg in messages:
+        if not compacted_messages:
+            compacted_messages.append(msg)
+            continue
+
+        last_msg = compacted_messages[-1]
+        if last_msg.role == "assistant" and msg.role == "assistant":
+            reasoning_parts = []
+            if last_msg.reasoning_content:
+                reasoning_parts.append(last_msg.reasoning_content)
+            if msg.reasoning_content:
+                reasoning_parts.append(msg.reasoning_content)
+
+            merged_content = []
+            if isinstance(last_msg.content, str):
+                merged_content.append(AppContentItem(type="text", text=last_msg.content))
+            elif isinstance(last_msg.content, list):
+                merged_content.extend(last_msg.content)
+
+            if isinstance(msg.content, str):
+                merged_content.append(AppContentItem(type="text", text=msg.content))
+            elif isinstance(msg.content, list):
+                merged_content.extend(msg.content)
+
+            merged_tools = []
+            if last_msg.tool_calls:
+                merged_tools.extend(last_msg.tool_calls)
+            if msg.tool_calls:
+                merged_tools.extend(msg.tool_calls)
+
+            last_msg.reasoning_content = "\n\n".join(reasoning_parts) if reasoning_parts else None
+            last_msg.content = merged_content if merged_content else None
+            last_msg.tool_calls = merged_tools if merged_tools else None
+        else:
+            compacted_messages.append(msg)
 
+    logger.debug(f"Normalized Responses input: {len(compacted_messages)} message items.")
+    return compacted_messages
 
-def _instructions_to_messages(
+
+def _convert_instructions_to_app_messages(
     instructions: str | list[ResponseInputMessage] | None,
-) -> list[ChatCompletionMessage]:
-    """Normalize instructions payload into Message objects."""
+) -> list[AppMessage]:
+    """Normalize instructions payload into AppMessage objects."""
     if not instructions:
         return []
 
     if isinstance(instructions, str):
-        return [ChatCompletionMessage(role="system", content=instructions)]
+        return [AppMessage(role="system", content=instructions)]
 
-    instruction_messages: list[ChatCompletionMessage] = []
-    for item in instructions:
-        if item.type and item.type != "message":
+    instruction_messages: list[AppMessage] = []
+    for instruction in instructions:
+        if instruction.type and instruction.type != "message":
             continue
 
-        role = item.role
-        content = item.content
+        raw_role = instruction.role
+        normalized_role = {"developer": "system", "function": "tool"}.get(raw_role, raw_role)
+        if normalized_role not in ("system", "user", "assistant", "tool"):
+            normalized_role = "system"
+        role = cast(Literal["system", "user", "assistant", "tool"], normalized_role)
+
+        content = instruction.content
         if isinstance(content, str):
-            instruction_messages.append(ChatCompletionMessage(role=role, content=content))
+            instruction_messages.append(AppMessage(role=role, content=content))
         else:
-            converted: list[ChatCompletionContentItem] = []
-            reasoning_parts: list[str] = []
+            converted: list[AppContentItem] = []
             for part in content:
                 if part.type in ("input_text", "output_text"):
                     text_value = getattr(part, "text", "") or ""
                     if text_value:
-                        converted.append(
-                            ChatCompletionRequestContentItem(type="text", text=text_value)
-                        )
-                elif part.type == "reasoning_text":
-                    text_value = getattr(part, "text", "") or ""
-                    if text_value:
-                        reasoning_parts.append(text_value)
+                        converted.append(AppContentItem(type="text", text=text_value))
                 elif part.type == "input_image":
                     image_url = getattr(part, "image_url", None)
                     if image_url:
-                        converted.append(
-                            ChatCompletionRequestContentItem(
-                                type="image_url",
-                                image_url={
-                                    "url": image_url,
-                                    "detail": getattr(part, "detail", "auto") or "auto",
-                                },
-                            )
-                        )
+                        converted.append(AppContentItem(type="image_url", url=image_url))
                 elif part.type == "input_file":
-                    file_data = getattr(part, "file_data", None)
                     file_url = getattr(part, "file_url", None)
-                    if file_data or file_url:
-                        file_info = {}
-                        if file_data:
-                            file_info["file_data"] = file_data
-                            file_info["filename"] = getattr(part, "filename", None)
-                        if file_url:
-                            file_info["url"] = file_url
+                    file_data = getattr(part, "file_data", None)
+                    if file_url or file_data:
                         converted.append(
-                            ChatCompletionRequestContentItem(type="file", file=file_info)
+                            AppContentItem(
+                                type="file",
+                                url=file_url,
+                                file_data=file_data,
+                                filename=getattr(part, "filename", None),
+                            )
                         )
-            instruction_messages.append(
-                ChatCompletionMessage(
-                    role=role,
-                    content=converted or None,
-                    reasoning_content="\n".join(reasoning_parts) if reasoning_parts else None,
-                )
-            )
+            instruction_messages.append(AppMessage(role=role, content=converted or None))
 
     return instruction_messages
 
@@ -813,8 +907,8 @@ async def _find_reusable_session(
     db: LMDBConversationStore,
     pool: GeminiClientPool,
     model: Model,
-    messages: list[ChatCompletionMessage],
-) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[ChatCompletionMessage]]:
+    messages: list[AppMessage],
+) -> tuple[ChatSession | None, GeminiClientWrapper | None, list[AppMessage]]:
     """Find an existing chat session matching the longest suitable history prefix."""
     if len(messages) < 2:
         return None, None, messages
@@ -982,12 +1076,12 @@ def flush(self) -> str:
 # --- Response Builders & Streaming ---
 
 
-def _create_real_streaming_response(
-    generator: AsyncGenerator[ModelOutput],
+async def _create_real_streaming_response(
+    resp_or_stream: AsyncGenerator[ModelOutput] | ModelOutput,
     completion_id: str,
     created_time: int,
     model_name: str,
-    messages: list[ChatCompletionMessage],
+    messages: list[AppMessage],
     db: LMDBConversationStore,
     model: Model,
     client_wrapper: GeminiClientWrapper,
@@ -1005,56 +1099,44 @@ async def generate_stream():
         has_started = False
         all_outputs: list[ModelOutput] = []
         suppressor = StreamingOutputFilter()
+
+        async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
+            yield item
+
+        def make_chunk(delta_content: dict) -> str:
+            data = {
+                "id": completion_id,
+                "object": "chat.completion.chunk",
+                "created": created_time,
+                "model": model_name,
+                "choices": [{"index": 0, **delta_content}],
+            }
+            return f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+
         try:
+            if hasattr(resp_or_stream, "__aiter__"):
+                generator = cast(AsyncGenerator[ModelOutput], resp_or_stream)
+            else:
+                generator = _make_async_gen(cast(ModelOutput, resp_or_stream))
+
             async for chunk in generator:
                 all_outputs.append(chunk)
                 if not has_started:
-                    data = {
-                        "id": completion_id,
-                        "object": "chat.completion.chunk",
-                        "created": created_time,
-                        "model": model_name,
-                        "choices": [
-                            {"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}
-                        ],
-                    }
-                    yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+                    yield make_chunk({"delta": {"role": "assistant"}, "finish_reason": None})
                     has_started = True
 
                 if t_delta := chunk.thoughts_delta:
                     full_thoughts += t_delta
-                    data = {
-                        "id": completion_id,
-                        "object": "chat.completion.chunk",
-                        "created": created_time,
-                        "model": model_name,
-                        "choices": [
-                            {
-                                "index": 0,
-                                "delta": {"reasoning_content": t_delta},
-                                "finish_reason": None,
-                            }
-                        ],
-                    }
-                    yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+                    yield make_chunk(
+                        {"delta": {"reasoning_content": t_delta}, "finish_reason": None}
+                    )
 
                 if text_delta := chunk.text_delta:
                     full_text += text_delta
                     if visible_delta := suppressor.process(text_delta):
-                        data = {
-                            "id": completion_id,
-                            "object": "chat.completion.chunk",
-                            "created": created_time,
-                            "model": model_name,
-                            "choices": [
-                                {
-                                    "index": 0,
-                                    "delta": {"content": visible_delta},
-                                    "finish_reason": None,
-                                }
-                            ],
-                        }
-                        yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+                        yield make_chunk(
+                            {"delta": {"content": visible_delta}, "finish_reason": None}
+                        )
         except Exception as e:
             logger.exception(f"Error during OpenAI streaming: {e}")
             yield f"data: {orjson.dumps({'error': {'message': 'Streaming error occurred.', 'type': 'server_error', 'param': None, 'code': None}}).decode('utf-8')}\n\n"
@@ -1068,18 +1150,9 @@ async def generate_stream():
                 full_thoughts = final_chunk.thoughts
 
         if remaining_text := suppressor.flush():
-            data = {
-                "id": completion_id,
-                "object": "chat.completion.chunk",
-                "created": created_time,
-                "model": model_name,
-                "choices": [
-                    {"index": 0, "delta": {"content": remaining_text}, "finish_reason": None}
-                ],
-            }
-            yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+            yield make_chunk({"delta": {"content": remaining_text}, "finish_reason": None})
 
-        _thoughts, assistant_text, storage_output, tool_calls = _process_llm_output(
+        _thoughts, assistant_text, storage_output, detected_tool_calls = _process_llm_output(
             full_thoughts, full_text, structured_requirement
         )
 
@@ -1092,7 +1165,7 @@ async def generate_stream():
                         images.append(img)
                         seen_urls.add(img.url)
 
-        image_markdown = ""
+        image_results = []
         seen_hashes = set()
         for image in images:
             try:
@@ -1103,43 +1176,35 @@ async def generate_stream():
                     continue
                 seen_hashes.add(fhash)
 
-                img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
-                image_markdown += f"\n\n{img_url}"
+                img_url = f"{base_url}images/{fname}?token={get_image_token(fname)}"
+                image_results.append(img_url)
             except Exception as exc:
                 logger.warning(f"Failed to process image in OpenAI stream: {exc}")
 
-        if image_markdown:
-            assistant_text += image_markdown
-            storage_output += image_markdown
-            data = {
-                "id": completion_id,
-                "object": "chat.completion.chunk",
-                "created": created_time,
-                "model": model_name,
-                "choices": [
-                    {"index": 0, "delta": {"content": image_markdown}, "finish_reason": None}
-                ],
-            }
-            yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+        if detected_tool_calls:
+            for call in detected_tool_calls:
+                tc_dict = {
+                    "index": call.id,
+                    "id": call.id,
+                    "type": "function",
+                    "function": {"name": call.function.name, "arguments": call.function.arguments},
+                }
+
+                yield make_chunk(
+                    {
+                        "delta": {
+                            "tool_calls": [tc_dict],
+                        }
+                    }
+                )
 
-        tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
-        if tool_calls_payload:
-            tool_calls_delta = [
-                {**call, "index": idx} for idx, call in enumerate(tool_calls_payload)
-            ]
-            data = {
-                "id": completion_id,
-                "object": "chat.completion.chunk",
-                "created": created_time,
-                "model": model_name,
-                "choices": [
-                    {"index": 0, "delta": {"tool_calls": tool_calls_delta}, "finish_reason": None}
-                ],
-            }
-            yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
+        for image_url in image_results:
+            yield make_chunk({"delta": {"content": f"\n\n![Generated Image]({image_url})"}})
+
+        yield make_chunk({"delta": {}, "finish_reason": "stop"})
 
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
-            messages, assistant_text, tool_calls, full_thoughts
+            messages, assistant_text, detected_tool_calls, full_thoughts
         )
         usage = CompletionUsage(
             prompt_tokens=p_tok,
@@ -1147,16 +1212,6 @@ async def generate_stream():
             total_tokens=t_tok,
             completion_tokens_details={"reasoning_tokens": r_tok},
         )
-        data = {
-            "id": completion_id,
-            "object": "chat.completion.chunk",
-            "created": created_time,
-            "model": model_name,
-            "choices": [
-                {"index": 0, "delta": {}, "finish_reason": "tool_calls" if tool_calls else "stop"}
-            ],
-            "usage": usage.model_dump(mode="json"),
-        }
         _persist_conversation(
             db,
             model.model_name,
@@ -1164,21 +1219,26 @@ async def generate_stream():
             session.metadata,
             messages,
             storage_output,
-            tool_calls,
-            full_thoughts,
+            detected_tool_calls,
+        )
+        yield make_chunk(
+            {
+                "delta": {},
+                "finish_reason": "tool_calls" if detected_tool_calls else "stop",
+                "usage": usage.model_dump(mode="json"),
+            }
         )
-        yield f"data: {orjson.dumps(data).decode('utf-8')}\n\n"
         yield "data: [DONE]\n\n"
 
     return StreamingResponse(generate_stream(), media_type="text/event-stream")
 
 
-def _create_responses_real_streaming_response(
-    generator: AsyncGenerator[ModelOutput],
+async def _create_responses_real_streaming_response(
+    resp_or_stream: AsyncGenerator[ModelOutput] | ModelOutput,
     response_id: str,
     created_time: int,
     model_name: str,
-    messages: list[ChatCompletionMessage],
+    messages: list[AppMessage],
     db: LMDBConversationStore,
     model: Model,
     client_wrapper: GeminiClientWrapper,
@@ -1257,6 +1317,15 @@ def make_event(etype: str, data: dict) -> str:
         suppressor = StreamingOutputFilter()
 
         try:
+            if hasattr(resp_or_stream, "__aiter__"):
+                generator = cast(AsyncGenerator[ModelOutput], resp_or_stream)
+            else:
+
+                async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
+                    yield item
+
+                generator = _make_async_gen(cast(ModelOutput, resp_or_stream))
+
             async for chunk in generator:
                 all_outputs.append(chunk)
 
@@ -1624,7 +1693,6 @@ def make_event(etype: str, data: dict) -> str:
             messages,
             storage_output,
             detected_tool_calls,
-            full_thoughts,
         )
 
         yield make_event(
@@ -1670,9 +1738,10 @@ async def create_chat_completion(
     structured_requirement = _build_structured_requirement(request.response_format)
     extra_instr = [structured_requirement.instruction] if structured_requirement else None
 
-    # This ensures that server-injected system instructions are part of the history
+    app_messages = _convert_to_app_messages(request.messages)
+
     msgs = _prepare_messages_for_model(
-        request.messages,
+        app_messages,
         request.tools,
         request.tool_choice,
         extra_instr,
@@ -1784,7 +1853,7 @@ async def create_chat_completion(
         logger.debug(f"Detected tool calls: {reprlib.repr(tool_calls_payload)}")
 
     p_tok, c_tok, t_tok, r_tok = _calculate_usage(
-        request.messages, visible_output, tool_calls, thoughts
+        app_messages, visible_output, tool_calls, thoughts
     )
     usage = {
         "prompt_tokens": p_tok,
@@ -1810,7 +1879,6 @@ async def create_chat_completion(
         msgs,  # Use prepared messages 'msgs'
         storage_output,
         tool_calls,
-        thoughts,
     )
     return payload
 
@@ -1824,7 +1892,7 @@ async def create_response(
     image_store: Path = Depends(get_image_store_dir),
 ):
     base_url = str(raw_request.base_url)
-    base_messages = _response_items_to_messages(request.input)
+    base_messages = _convert_responses_to_app_messages(request.input)
     struct_req = _build_structured_requirement(request.response_format)
     extra_instr = [struct_req.instruction] if struct_req else []
 
@@ -1847,7 +1915,7 @@ async def create_response(
     )
     if img_instr:
         extra_instr.append(img_instr)
-    preface = _instructions_to_messages(request.instructions)
+    preface = _convert_instructions_to_app_messages(request.instructions)
     conv_messages = [*preface, *base_messages] if preface else base_messages
     model_tool_choice = (
         request.tool_choice
@@ -2026,6 +2094,5 @@ async def create_response(
         messages,
         storage_output,
         tool_calls,
-        thoughts,
     )
     return payload
diff --git a/app/services/client.py b/app/services/client.py
index 77dfdf6..d2e5270 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -6,7 +6,7 @@
 from gemini_webapi import GeminiClient, ModelOutput
 from loguru import logger
 
-from app.models import ChatCompletionMessage
+from app.models import AppMessage
 from app.utils import g_config
 from app.utils.helper import (
     add_tag,
@@ -69,7 +69,7 @@ def running(self) -> bool:
 
     @staticmethod
     async def process_message(
-        message: ChatCompletionMessage,
+        message: AppMessage,
         tempdir: Path | None = None,
         tagged: bool = True,
         wrap_tool: bool = True,
@@ -91,28 +91,27 @@ async def process_message(
                     if item_text or message.role == "tool":
                         text_fragments.append(item_text)
                 elif item.type == "image_url":
-                    item_image_url = getattr(item, "image_url", None)
-                    if not item_image_url:
-                        raise ValueError("Image URL cannot be empty")
-                    if url := item_image_url.get("url", None):
-                        files.append(await save_url_to_tempfile(url, tempdir))
-                    else:
-                        raise ValueError("Image URL must contain 'url' key")
+                    item_media_url = getattr(item, "url", None)
+                    if not item_media_url:
+                        raise ValueError(f"{item.type} cannot be empty")
+                    files.append(await save_url_to_tempfile(item_media_url, tempdir))
                 elif item.type == "file":
-                    item_file = getattr(item, "file", None)
-                    if not item_file:
-                        raise ValueError("File cannot be empty")
-                    if file_data := item_file.get("file_data", None):
-                        filename = item_file.get("filename", "")
+                    file_data = getattr(item, "file_data", None)
+                    if file_data:
+                        filename = getattr(item, "filename", "") or ""
                         files.append(await save_file_to_tempfile(file_data, filename, tempdir))
-                    elif url := item_file.get("url", None):
-                        files.append(await save_url_to_tempfile(url, tempdir))
                     else:
-                        raise ValueError("File must contain 'file_data' or 'url' key")
+                        raise ValueError("File must contain 'file_data'")
+                elif item.type == "input_audio":
+                    file_data = getattr(item, "file_data", None)
+                    if file_data:
+                        files.append(await save_file_to_tempfile(file_data, "audio.wav", tempdir))
+                    else:
+                        raise ValueError("input_audio must contain 'file_data' key")
         elif message.content is None and message.role == "tool":
             text_fragments.append("")
         elif message.content is not None:
-            raise ValueError("Unsupported message content type.")
+            raise ValueError(f"Unsupported message content type: {type(message.content)}")
 
         if message.role == "tool":
             tool_name = message.name or "unknown"
@@ -161,7 +160,7 @@ async def process_message(
 
     @staticmethod
     async def process_conversation(
-        messages: list[ChatCompletionMessage], tempdir: Path | None = None
+        messages: list[AppMessage], tempdir: Path | None = None
     ) -> tuple[str, list[str | Path | bytes | io.BytesIO]]:
         conversation: list[str] = []
         files: list[str | Path | bytes | io.BytesIO] = []
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 7a915d4..5a32089 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -12,17 +12,13 @@
 from loguru import logger
 
 from app.models import (
-    ChatCompletionAssistantContentItem,
-    ChatCompletionMessage,
-    ChatCompletionRequestContentItem,
+    AppMessage,
     ConversationInStore,
 )
 from app.utils import g_config
 from app.utils.helper import (
-    extract_tool_calls,
     normalize_llm_text,
     remove_tool_call_blocks,
-    strip_system_hints,
     unescape_text,
 )
 from app.utils.singleton import Singleton
@@ -57,17 +53,14 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
     return text.strip() if text.strip() else None
 
 
-def _hash_message(message: ChatCompletionMessage, fuzzy: bool = False) -> str:
+def _hash_message(message: AppMessage, fuzzy: bool = False) -> str:
     """
     Generate a stable, canonical hash for a single message.
     """
     core_data: dict[str, Any] = {
         "role": message.role,
-        "name": message.name or None,
-        "tool_call_id": message.tool_call_id or None,
-        "reasoning_content": _normalize_text(message.reasoning_content)
-        if message.reasoning_content
-        else None,
+        "name": message.name,
+        "tool_call_id": message.tool_call_id,
     }
 
     content = message.content
@@ -76,44 +69,22 @@ def _hash_message(message: ChatCompletionMessage, fuzzy: bool = False) -> str:
     elif isinstance(content, str):
         core_data["content"] = _normalize_text(content, fuzzy=fuzzy)
     elif isinstance(content, list):
-        _ContentItem = (ChatCompletionRequestContentItem, ChatCompletionAssistantContentItem)
         text_parts = []
         for item in content:
-            text_val = ""
-            if isinstance(item, _ContentItem) and item.type == "text":
-                text_val = item.text or ""
-            elif isinstance(item, dict) and item.get("type") == "text":
-                text_val = item.get("text") or ""
-
-            if text_val:
-                normalized_part = _normalize_text(text_val, fuzzy=fuzzy)
+            if item.type == "text" and item.text:
+                normalized_part = _normalize_text(item.text, fuzzy=fuzzy)
                 if normalized_part:
                     text_parts.append(normalized_part)
-            elif isinstance(item, _ContentItem):
-                item_type = item.type
-                if item_type == "image_url":
-                    item_image_url = getattr(item, "image_url", None)
-                    url = item_image_url.get("url") if item_image_url else None
-                    text_parts.append(f"[image_url:{url}]")
-                elif item_type == "file":
-                    item_file = getattr(item, "file", None)
-                    url = (item_file.get("url") or item_file.get("filename")) if item_file else None
-                    text_parts.append(f"[file:{url}]")
-            elif isinstance(item, dict):
-                item_type = item.get("type")
-                if item_type == "image_url":
-                    url = item.get("image_url", {}).get("url")
-                    text_parts.append(f"[image_url:{url}]")
-                elif item_type == "file":
-                    url = item.get("file", {}).get("url") or item.get("file", {}).get("filename")
-                    text_parts.append(f"[file:{url}]")
+            elif item.type != "text" and item.url:
+                text_parts.append(f"[{item.type}:{item.url}]")
 
         core_data["content"] = "\n".join(text_parts) if text_parts else None
 
     if message.tool_calls:
         calls_data = []
         for tc in message.tool_calls:
-            args = tc.function.arguments or "{}"
+            args = tc.function.arguments
+            name = tc.function.name
             try:
                 parsed = orjson.loads(args)
                 canon_args = orjson.dumps(parsed, option=orjson.OPT_SORT_KEYS).decode("utf-8")
@@ -122,7 +93,7 @@ def _hash_message(message: ChatCompletionMessage, fuzzy: bool = False) -> str:
 
             calls_data.append(
                 {
-                    "name": tc.function.name,
+                    "name": name,
                     "arguments": canon_args,
                 }
             )
@@ -136,7 +107,7 @@ def _hash_message(message: ChatCompletionMessage, fuzzy: bool = False) -> str:
 
 
 def _hash_conversation(
-    client_id: str, model: str, messages: list[ChatCompletionMessage], fuzzy: bool = False
+    client_id: str, model: str, messages: list[AppMessage], fuzzy: bool = False
 ) -> str:
     """Generate a hash for a list of messages and model name, tied to a specific client_id."""
     combined_hash = hashlib.sha256()
@@ -271,29 +242,35 @@ def _remove_from_index(self, txn: Transaction, prefix: str, hash_val: str, stora
 
     def store(
         self,
-        conv: ConversationInStore,
-        custom_key: str | None = None,
-    ) -> str:
+        client_id: str,
+        model: str,
+        messages: list[AppMessage],
+        metadata: list[str | None],
+    ) -> None:
         """
         Store a conversation model in LMDB.
 
         Args:
-            conv: Conversation model to store
-            custom_key: Optional custom key, if not provided, hash will be used
-
-        Returns:
-            str: The key used to store the messages (hash or custom key)
+            client_id: The client identifier
+            model: The model name
+            messages: Unsanitized API messages
+            metadata: Session metadata
         """
-        if not conv:
+        if not messages:
             raise ValueError("Messages list cannot be empty")
 
-        # Ensure consistent sanitization before hashing and storage
-        sanitized_messages = self.sanitize_messages(conv.messages)
-        conv.messages = sanitized_messages
-
+        now = datetime.now()
+        conv = ConversationInStore(
+            model=model,
+            client_id=client_id,
+            metadata=metadata,
+            messages=messages,
+            created_at=now,
+            updated_at=now,
+        )
         message_hash = _hash_conversation(conv.client_id, conv.model, conv.messages)
         fuzzy_hash = _hash_conversation(conv.client_id, conv.model, conv.messages, fuzzy=True)
-        storage_key = custom_key or message_hash
+        storage_key = message_hash
 
         now = datetime.now()
         if conv.created_at is None:
@@ -310,7 +287,6 @@ def store(
                 self._update_index(txn, self.FUZZY_LOOKUP_PREFIX, fuzzy_hash, storage_key)
 
                 logger.debug(f"Stored {len(conv.messages)} messages with key: {storage_key[:12]}")
-                return storage_key
 
         except Error as e:
             logger.error(f"LMDB error while storing messages with key {storage_key[:12]}: {e}")
@@ -349,10 +325,10 @@ def get(self, key: str) -> ConversationInStore | None:
             logger.error(f"Unexpected error retrieving messages with key {key[:12]}: {e}")
             return None
 
-    def find(self, model: str, messages: list[ChatCompletionMessage]) -> ConversationInStore | None:
+    def find(self, model: str, messages: list[AppMessage]) -> ConversationInStore | None:
         """
         Search conversation data by message list.
-        Tries raw matching, then sanitized matching, and finally fuzzy matching.
+        Tries sanitized matching, and finally fuzzy matching.
 
         Args:
             model: Model name
@@ -365,16 +341,7 @@ def find(self, model: str, messages: list[ChatCompletionMessage]) -> Conversatio
             return None
 
         if conv := self._find_by_message_list(model, messages):
-            logger.debug(f"Session found for '{model}' with {len(messages)} raw messages.")
-            return conv
-
-        cleaned_messages = self.sanitize_messages(messages)
-        if cleaned_messages != messages and (
-            conv := self._find_by_message_list(model, cleaned_messages)
-        ):
-            logger.debug(
-                f"Session found for '{model}' with {len(cleaned_messages)} cleaned messages."
-            )
+            logger.debug(f"Session found for '{model}' with {len(messages)} cleaned messages.")
             return conv
 
         if conv := self._find_by_message_list(model, messages, fuzzy=True):
@@ -389,7 +356,7 @@ def find(self, model: str, messages: list[ChatCompletionMessage]) -> Conversatio
     def _find_by_message_list(
         self,
         model: str,
-        messages: list[ChatCompletionMessage],
+        messages: list[AppMessage],
         fuzzy: bool = False,
     ) -> ConversationInStore | None:
         """
@@ -592,98 +559,3 @@ def close(self) -> None:
     def __del__(self):
         """Cleanup on destruction."""
         self.close()
-
-    @staticmethod
-    def sanitize_messages(messages: list[ChatCompletionMessage]) -> list[ChatCompletionMessage]:
-        """Clean all messages of internal markers, hints and normalize tool calls."""
-        cleaned_messages = []
-        for msg in messages:
-            update_data = {}
-            content_changed = False
-
-            # Normalize reasoning_content
-            if msg.reasoning_content:
-                norm_reasoning = _normalize_text(msg.reasoning_content)
-                if norm_reasoning != msg.reasoning_content:
-                    update_data["reasoning_content"] = norm_reasoning
-                    content_changed = True
-
-            if isinstance(msg.content, str):
-                text = msg.content
-                tool_calls = msg.tool_calls
-
-                if msg.role == "assistant" and not tool_calls:
-                    text, tool_calls = extract_tool_calls(text)
-                else:
-                    text = strip_system_hints(text)
-
-                normalized_content = text.strip() or None
-
-                if normalized_content != msg.content:
-                    update_data["content"] = normalized_content
-                    content_changed = True
-                if tool_calls != msg.tool_calls:
-                    update_data["tool_calls"] = tool_calls or None
-                    content_changed = True
-
-            elif isinstance(msg.content, list):
-                new_content = []
-                all_extracted_calls = list(msg.tool_calls or [])
-                list_changed = False
-                reasoning_parts = []
-
-                for item in msg.content:
-                    # Extract reasoning items and move them to reasoning_content
-                    if (
-                        isinstance(item, ChatCompletionAssistantContentItem)
-                        and item.type == "reasoning"
-                    ):
-                        val = item.text
-                        if val:
-                            norm_val = _normalize_text(val)
-                            if norm_val:
-                                reasoning_parts.append(norm_val)
-                        list_changed = True
-                        continue
-
-                    if (
-                        isinstance(
-                            item,
-                            (ChatCompletionRequestContentItem, ChatCompletionAssistantContentItem),
-                        )
-                        and item.type == "text"
-                        and item.text
-                    ):
-                        text = item.text
-                        if msg.role == "assistant" and not msg.tool_calls:
-                            text, extracted = extract_tool_calls(text)
-                            if extracted:
-                                all_extracted_calls.extend(extracted)
-                                list_changed = True
-                        else:
-                            text = strip_system_hints(text)
-
-                        if text != item.text:
-                            list_changed = True
-                            item = item.model_copy(update={"text": text.strip() or None})
-                    new_content.append(item)
-
-                if reasoning_parts:
-                    existing_reason = update_data.get("reasoning_content") or msg.reasoning_content
-                    all_reasoning = "\n\n".join(
-                        r for r in ([existing_reason, *reasoning_parts]) if r
-                    )
-                    if all_reasoning:
-                        update_data["reasoning_content"] = all_reasoning
-                        content_changed = True
-
-                if list_changed:
-                    update_data["content"] = new_content if new_content else None
-                    update_data["tool_calls"] = all_extracted_calls or None
-                    content_changed = True
-
-            if content_changed:
-                cleaned_messages.append(msg.model_copy(update=update_data))
-            else:
-                cleaned_messages.append(msg)
-        return cleaned_messages
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 8bc4940..abab15d 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,7 +14,7 @@
 from curl_cffi.requests import AsyncSession
 from loguru import logger
 
-from app.models import ChatCompletionMessage, ChatCompletionMessageToolCall, FunctionCall
+from app.models import AppMessage, ChatCompletionMessageToolCall, FunctionCall
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
@@ -180,7 +180,7 @@ def estimate_tokens(text: str | None) -> int:
 
 
 async def save_file_to_tempfile(
-    file_in_base64: str, file_name: str = "", tempdir: Path | None = None
+    file_in_base64: str | bytes, file_name: str = "", tempdir: Path | None = None
 ) -> Path:
     """Decode base64 file data and save to a temporary file."""
     with tempfile.NamedTemporaryFile(
@@ -195,11 +195,15 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
     """Download content from a URL and save to a temporary file."""
     data: bytes | None = None
     suffix: str | None = None
-    if url.startswith("data:image/"):
+    if url.startswith("data:"):
         metadata_part = url.split(",")[0]
         mime_type = metadata_part.split(":")[1].split(";")[0]
         data = base64.b64decode(url.split(",")[1])
-        suffix = mimetypes.guess_extension(mime_type) or f".{mime_type.split('/')[1]}"
+        suffix = mimetypes.guess_extension(mime_type)
+        if not suffix and "/" in mime_type:
+            suffix = f".{mime_type.split('/')[1]}"
+        elif not suffix:
+            suffix = ".bin"
     else:
         async with AsyncSession(impersonate="chrome", allow_redirects=True) as client:
             resp = await client.get(url)
@@ -348,7 +352,7 @@ def extract_tool_calls(text: str) -> tuple[str, list[ChatCompletionMessageToolCa
     return _process_tools_internal(text, extract=True)
 
 
-def text_from_message(message: ChatCompletionMessage) -> str:
+def text_from_message(message: AppMessage) -> str:
     """Concatenate text and tool arguments from a message for token estimation."""
     base_text = ""
     if isinstance(message.content, str):

From 88c6d4df19e8b068d88ba36f2067dc14cda79f42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Mar 2026 16:14:36 +0700
Subject: [PATCH 180/236] Move the content storage in LMDB to a separate
 `AppMessage`

Independent of the `ChatCompletionMessage` format, to simplify maintenance and improve scalability.
---
 app/server/chat.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 86d25ae..3d17293 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1076,7 +1076,7 @@ def flush(self) -> str:
 # --- Response Builders & Streaming ---
 
 
-async def _create_real_streaming_response(
+def _create_real_streaming_response(
     resp_or_stream: AsyncGenerator[ModelOutput] | ModelOutput,
     completion_id: str,
     created_time: int,
@@ -1233,7 +1233,7 @@ def make_chunk(delta_content: dict) -> str:
     return StreamingResponse(generate_stream(), media_type="text/event-stream")
 
 
-async def _create_responses_real_streaming_response(
+def _create_responses_real_streaming_response(
     resp_or_stream: AsyncGenerator[ModelOutput] | ModelOutput,
     response_id: str,
     created_time: int,
@@ -1753,8 +1753,6 @@ async def create_chat_completion(
         if not remain:
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
 
-        # For reused sessions, we only need to process the remaining messages.
-        # We don't re-inject system defaults to avoid duplicating instructions already in history.
         input_msgs = _prepare_messages_for_model(
             remain,
             request.tools,
@@ -1771,7 +1769,6 @@ async def create_chat_completion(
         try:
             client = await pool.acquire()
             session = client.start_chat(model=model)
-            # Use the already prepared 'msgs' for a fresh session
             m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
         except Exception as e:
             logger.exception("Error in preparing conversation")
@@ -1795,14 +1792,13 @@ async def create_chat_completion(
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
-        # Narrow for type checker
         assert not isinstance(resp_or_stream, ModelOutput)
         return _create_real_streaming_response(
             resp_or_stream,
             completion_id,
             created_time,
             request.model,
-            msgs,  # Use prepared 'msgs'
+            msgs,
             db,
             model,
             client,
@@ -1811,7 +1807,6 @@ async def create_chat_completion(
             structured_requirement,
         )
 
-    # Narrow for type checker
     assert isinstance(resp_or_stream, ModelOutput)
 
     try:
@@ -1876,7 +1871,7 @@ async def create_chat_completion(
         model.model_name,
         client.id,
         session.metadata,
-        msgs,  # Use prepared messages 'msgs'
+        msgs,
         storage_output,
         tool_calls,
     )
@@ -1977,7 +1972,6 @@ async def create_response(
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
-        # Narrow for type checker
         assert not isinstance(resp_or_stream, ModelOutput)
         return _create_responses_real_streaming_response(
             resp_or_stream,
@@ -1995,7 +1989,6 @@ async def create_response(
             struct_req,
         )
 
-    # Narrow for type checker
     assert isinstance(resp_or_stream, ModelOutput)
 
     try:
@@ -2058,7 +2051,6 @@ async def create_response(
     if not contents:
         contents.append(ResponseOutputText(type="output_text", text=""))
 
-    # Aggregate images for storage
     image_markdown = ""
     for img_call in img_calls:
         fname = f"{img_call.id}.{img_call.output_format}"

From dec4b3cfec13fe5b303e1e6d894a7d907ce7613c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Mar 2026 16:59:07 +0700
Subject: [PATCH 181/236] Move the content storage in LMDB to a separate
 `AppMessage`

Independent of the `ChatCompletionMessage` format, to simplify maintenance and improve scalability.
---
 app/server/chat.py  | 67 ++++++++++++++++++++-------------------------
 app/utils/helper.py | 14 ++++------
 2 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 3d17293..362e791 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -31,6 +31,7 @@
     ChatCompletionRequest,
     ChatCompletionResponse,
     CompletionUsage,
+    FunctionCall,
     FunctionCallOutput,
     FunctionTool,
     ImageGeneration,
@@ -132,7 +133,7 @@ async def _image_to_base64(
 def _calculate_usage(
     messages: list[AppMessage],
     assistant_text: str | None,
-    tool_calls: list[Any] | None,
+    tool_calls: list[AppToolCall] | None,
     thoughts: str | None = None,
 ) -> tuple[int, int, int, int]:
     """Calculate prompt, completion, total and reasoning tokens consistently."""
@@ -140,10 +141,7 @@ def _calculate_usage(
     tool_args_text = ""
     if tool_calls:
         for call in tool_calls:
-            if hasattr(call, "function"):
-                tool_args_text += call.function.arguments or ""
-            elif isinstance(call, dict):
-                tool_args_text += call.get("function", {}).get("arguments", "")
+            tool_args_text += call.function.arguments or ""
 
     completion_basis = assistant_text or ""
     if tool_args_text:
@@ -167,7 +165,7 @@ def _create_responses_standard_payload(
     response_id: str,
     created_time: int,
     model_name: str,
-    detected_tool_calls: list[Any] | None,
+    detected_tool_calls: list[AppToolCall] | None,
     image_call_items: list[ImageGenerationCall],
     response_contents: list[ResponseOutputContent],
     usage: ResponseUsage,
@@ -204,18 +202,10 @@ def _create_responses_standard_payload(
         output_items.extend(
             [
                 ResponseFunctionToolCall(
-                    id=call.id if hasattr(call, "id") else call["id"],
-                    call_id=call.id if hasattr(call, "id") else call["id"],
-                    name=(
-                        call.function.name
-                        if hasattr(call, "function")
-                        else call["function"]["name"]
-                    ),
-                    arguments=(
-                        call.function.arguments
-                        if hasattr(call, "function")
-                        else call["function"]["arguments"]
-                    ),
+                    id=call.id,
+                    call_id=call.id,
+                    name=call.function.name,
+                    arguments=call.function.arguments,
                     status="completed",
                 )
                 for call in detected_tool_calls
@@ -249,21 +239,27 @@ def _create_chat_completion_standard_payload(
     created_time: int,
     model_name: str,
     visible_output: str | None,
-    tool_calls_payload: list[dict] | None,
+    tool_calls: list[AppToolCall] | None,
     finish_reason: Literal["stop", "length", "tool_calls", "content_filter"],
     usage: dict,
     reasoning_content: str | None = None,
 ) -> ChatCompletionResponse:
     """Unified factory for building Chat Completion response objects."""
-    # Convert tool calls to Model objects if they are dicts
-    tool_calls = None
-    if tool_calls_payload:
-        tool_calls = [ChatCompletionMessageToolCall.model_validate(tc) for tc in tool_calls_payload]
+    tc_converted = None
+    if tool_calls:
+        tc_converted = [
+            ChatCompletionMessageToolCall(
+                id=tc.id,
+                type="function",
+                function=FunctionCall(name=tc.function.name, arguments=tc.function.arguments),
+            )
+            for tc in tool_calls
+        ]
 
     message = ChatCompletionMessage(
         role="assistant",
         content=visible_output or None,
-        tool_calls=tool_calls,
+        tool_calls=tc_converted,
         reasoning_content=reasoning_content or None,
     )
 
@@ -287,7 +283,7 @@ def _process_llm_output(
     thoughts: str | None,
     raw_text: str,
     structured_requirement: StructuredOutputRequirement | None,
-) -> tuple[str | None, str, str, list[Any]]:
+) -> tuple[str | None, str, str, list[AppToolCall]]:
     """
     Post-process Gemini output to extract tool calls and prepare clean text for display and storage.
     Returns: (thoughts, visible_text, storage_output, tool_calls)
@@ -366,15 +362,11 @@ def _convert_to_app_messages(messages: list[ChatCompletionMessage]) -> list[AppM
         if msg.tool_calls:
             tool_calls = [
                 AppToolCall(
-                    id=tc.id if hasattr(tc, "id") else tc.get("id", ""),
+                    id=tc.id,
                     type="function",
                     function=AppToolCallFunction(
-                        name=tc.function.name
-                        if hasattr(tc, "function")
-                        else tc.get("function", {}).get("name", ""),
-                        arguments=tc.function.arguments
-                        if hasattr(tc, "function")
-                        else tc.get("function", {}).get("arguments", ""),
+                        name=tc.function.name,
+                        arguments=tc.function.arguments,
                     ),
                 )
                 for tc in msg.tool_calls
@@ -404,7 +396,7 @@ def _persist_conversation(
     metadata: list[str | None],
     messages: list[AppMessage],
     storage_output: str | None,
-    tool_calls: list[Any] | None,
+    tool_calls: list[AppToolCall] | None,
 ) -> str | None:
     """Unified logic to save conversation history to LMDB."""
     try:
@@ -1843,9 +1835,10 @@ async def create_chat_completion(
         visible_output += image_markdown
         storage_output += image_markdown
 
-    tool_calls_payload = [call.model_dump(mode="json") for call in tool_calls]
-    if tool_calls_payload:
-        logger.debug(f"Detected tool calls: {reprlib.repr(tool_calls_payload)}")
+    if tool_calls:
+        logger.debug(
+            f"Detected tool calls: {reprlib.repr([tc.model_dump(mode='json') for tc in tool_calls])}"
+        )
 
     p_tok, c_tok, t_tok, r_tok = _calculate_usage(
         app_messages, visible_output, tool_calls, thoughts
@@ -1861,7 +1854,7 @@ async def create_chat_completion(
         created_time,
         request.model,
         visible_output,
-        tool_calls_payload,
+        tool_calls or None,
         "tool_calls" if tool_calls else "stop",
         usage,
         thoughts,
diff --git a/app/utils/helper.py b/app/utils/helper.py
index abab15d..3b1d90f 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -14,7 +14,7 @@
 from curl_cffi.requests import AsyncSession
 from loguru import logger
 
-from app.models import AppMessage, ChatCompletionMessageToolCall, FunctionCall
+from app.models import AppMessage, AppToolCall, AppToolCallFunction
 
 VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
 TOOL_WRAP_HINT = (
@@ -282,9 +282,7 @@ def strip_system_hints(text: str) -> str:
     return cleaned
 
 
-def _process_tools_internal(
-    text: str, extract: bool = True
-) -> tuple[str, list[ChatCompletionMessageToolCall]]:
+def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[AppToolCall]]:
     """
     Extract tool metadata and return text stripped of technical markers.
     Arguments are parsed into JSON and assigned deterministic call IDs.
@@ -292,7 +290,7 @@ def _process_tools_internal(
     if not text:
         return text, []
 
-    tool_calls: list[ChatCompletionMessageToolCall] = []
+    tool_calls: list[AppToolCall] = []
 
     def _create_tool_call(name: str, raw_args: str) -> None:
         if not extract:
@@ -327,10 +325,10 @@ def _create_tool_call(name: str, raw_args: str) -> None:
         call_id = f"call_{hashlib.sha256(seed).hexdigest()[:24]}"
 
         tool_calls.append(
-            ChatCompletionMessageToolCall(
+            AppToolCall(
                 id=call_id,
                 type="function",
-                function=FunctionCall(name=name, arguments=arguments),
+                function=AppToolCallFunction(name=name, arguments=arguments),
             )
         )
 
@@ -347,7 +345,7 @@ def remove_tool_call_blocks(text: str) -> str:
     return cleaned
 
 
-def extract_tool_calls(text: str) -> tuple[str, list[ChatCompletionMessageToolCall]]:
+def extract_tool_calls(text: str) -> tuple[str, list[AppToolCall]]:
     """Extract tool calls and return cleaned text."""
     return _process_tools_internal(text, extract=True)
 

From ece7e7ad186f8749b06c7d1f8763fed0a6e82521 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 4 Mar 2026 19:04:18 +0700
Subject: [PATCH 182/236] Move the content storage in LMDB to a separate
 `AppMessage`

Independent of the `ChatCompletionMessage` format, to simplify maintenance and improve scalability.
---
 app/server/chat.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 362e791..029a8ea 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1114,7 +1114,9 @@ def make_chunk(delta_content: dict) -> str:
             async for chunk in generator:
                 all_outputs.append(chunk)
                 if not has_started:
-                    yield make_chunk({"delta": {"role": "assistant"}, "finish_reason": None})
+                    yield make_chunk(
+                        {"delta": {"role": "assistant", "content": ""}, "finish_reason": None}
+                    )
                     has_started = True
 
                 if t_delta := chunk.thoughts_delta:
@@ -1174,9 +1176,9 @@ def make_chunk(delta_content: dict) -> str:
                 logger.warning(f"Failed to process image in OpenAI stream: {exc}")
 
         if detected_tool_calls:
-            for call in detected_tool_calls:
+            for idx, call in enumerate(detected_tool_calls):
                 tc_dict = {
-                    "index": call.id,
+                    "index": idx,
                     "id": call.id,
                     "type": "function",
                     "function": {"name": call.function.name, "arguments": call.function.arguments},
@@ -1186,14 +1188,18 @@ def make_chunk(delta_content: dict) -> str:
                     {
                         "delta": {
                             "tool_calls": [tc_dict],
-                        }
+                        },
+                        "finish_reason": None,
                     }
                 )
 
         for image_url in image_results:
-            yield make_chunk({"delta": {"content": f"\n\n![Generated Image]({image_url})"}})
-
-        yield make_chunk({"delta": {}, "finish_reason": "stop"})
+            yield make_chunk(
+                {
+                    "delta": {"content": f"\n\n![Generated Image]({image_url})"},
+                    "finish_reason": None,
+                }
+            )
 
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
             messages, assistant_text, detected_tool_calls, full_thoughts
@@ -1280,7 +1286,6 @@ def make_event(etype: str, data: dict) -> str:
                 },
             },
         )
-
         yield make_event(
             "response.in_progress",
             {

From 67632bf0388ae78030d4b2d0fc6140eb9375edad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 5 Mar 2026 20:01:20 +0700
Subject: [PATCH 183/236] Add support for retrieving dynamic models based on
 client accounts.

---
 app/server/chat.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 029a8ea..78dc7d6 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -859,8 +859,8 @@ def _get_model_by_name(name: str) -> Model:
     return Model.from_name(name)
 
 
-def _get_available_models() -> list[ModelData]:
-    """Return a list of available models based on configuration strategy."""
+async def _get_available_models(pool: GeminiClientPool) -> list[ModelData]:
+    """Return a list of available models based on the configuration strategy and per-client accounts."""
     now = int(datetime.now(tz=UTC).timestamp())
     strategy = g_config.gemini.model_strategy
     models_data = []
@@ -876,12 +876,31 @@ def _get_available_models() -> list[ModelData]:
         )
 
     if strategy == "append":
-        custom_ids = {m.model_name for m in custom_models}
+        custom_ids = {m.id for m in models_data}
+        seen_model_ids = set()
+
+        for client in pool.clients:
+            if not client.running():
+                continue
+
+            client_models = client.list_models()
+            if client_models:
+                for am in client_models:
+                    if am.id not in custom_ids and am.id not in seen_model_ids:
+                        models_data.append(
+                            ModelData(
+                                id=am.id,
+                                created=now,
+                                owned_by="gemini-web",
+                            )
+                        )
+                        seen_model_ids.add(am.id)
+
         for model in Model:
             m_name = model.model_name
             if not m_name or m_name == "unspecified":
                 continue
-            if m_name in custom_ids:
+            if m_name in custom_ids or m_name in seen_model_ids:
                 continue
 
             models_data.append(
@@ -1711,7 +1730,8 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
 
 @router.get("/v1/models", response_model=ModelListResponse)
 async def list_models(api_key: str = Depends(verify_api_key)):
-    models = _get_available_models()
+    pool = GeminiClientPool()
+    models = await _get_available_models(pool)
     return ModelListResponse(data=models)
 
 

From 9b7d3eb5e7512762cfd24c0c7d743d96cf93d6af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 11:14:45 +0700
Subject: [PATCH 184/236] Update dependencies to latest versions

---
 pyproject.toml |  4 +--
 uv.lock        | 86 +++++++++++++++++++++++++-------------------------
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9a8f666..459b6f4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,8 +23,8 @@ Repository = "https://github.com/Nativu5/Gemini-FastAPI"
 [project.optional-dependencies]
 dev = [
     "pytest>=9.0.2",
-    "ruff>=0.15.4",
-    "ty>=0.0.20",
+    "ruff>=0.15.5",
+    "ty>=0.0.21",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index b262688..6f688b1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post243"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#79d86aff4785dd45bd951910b8be75cf8122e9cd" }
+version = "0.0.post251"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#891b9fd643d66eef2970897b06f32d395a091ad4" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },
@@ -418,27 +418,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/da/31/d6e536cdebb6568ae75a7f00e4b4819ae0ad2640c3604c305a0428680b0c/ruff-0.15.4.tar.gz", hash = "sha256:3412195319e42d634470cc97aa9803d07e9d5c9223b99bcb1518f0c725f26ae1", size = 4569550, upload-time = "2026-02-26T20:04:14.959Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f2/82/c11a03cfec3a4d26a0ea1e571f0f44be5993b923f905eeddfc397c13d360/ruff-0.15.4-py3-none-linux_armv6l.whl", hash = "sha256:a1810931c41606c686bae8b5b9a8072adac2f611bb433c0ba476acba17a332e0", size = 10453333, upload-time = "2026-02-26T20:04:20.093Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/5d/6a1f271f6e31dffb31855996493641edc3eef8077b883eaf007a2f1c2976/ruff-0.15.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5a1632c66672b8b4d3e1d1782859e98d6e0b4e70829530666644286600a33992", size = 10853356, upload-time = "2026-02-26T20:04:05.808Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/d8/0fab9f8842b83b1a9c2bf81b85063f65e93fb512e60effa95b0be49bfc54/ruff-0.15.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4386ba2cd6c0f4ff75252845906acc7c7c8e1ac567b7bc3d373686ac8c222ba", size = 10187434, upload-time = "2026-02-26T20:03:54.656Z" },
-    { url = "https://files.pythonhosted.org/packages/85/cc/cc220fd9394eff5db8d94dec199eec56dd6c9f3651d8869d024867a91030/ruff-0.15.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2496488bdfd3732747558b6f95ae427ff066d1fcd054daf75f5a50674411e75", size = 10535456, upload-time = "2026-02-26T20:03:52.738Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/0f/bced38fa5cf24373ec767713c8e4cadc90247f3863605fb030e597878661/ruff-0.15.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f1c4893841ff2d54cbda1b2860fa3260173df5ddd7b95d370186f8a5e66a4ac", size = 10287772, upload-time = "2026-02-26T20:04:08.138Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/90/58a1802d84fed15f8f281925b21ab3cecd813bde52a8ca033a4de8ab0e7a/ruff-0.15.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:820b8766bd65503b6c30aaa6331e8ef3a6e564f7999c844e9a547c40179e440a", size = 11049051, upload-time = "2026-02-26T20:04:03.53Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/ac/b7ad36703c35f3866584564dc15f12f91cb1a26a897dc2fd13d7cb3ae1af/ruff-0.15.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9fb74bab47139c1751f900f857fa503987253c3ef89129b24ed375e72873e85", size = 11890494, upload-time = "2026-02-26T20:04:10.497Z" },
-    { url = "https://files.pythonhosted.org/packages/93/3d/3eb2f47a39a8b0da99faf9c54d3eb24720add1e886a5309d4d1be73a6380/ruff-0.15.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f80c98765949c518142b3a50a5db89343aa90f2c2bf7799de9986498ae6176db", size = 11326221, upload-time = "2026-02-26T20:04:12.84Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/90/bf134f4c1e5243e62690e09d63c55df948a74084c8ac3e48a88468314da6/ruff-0.15.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451a2e224151729b3b6c9ffb36aed9091b2996fe4bdbd11f47e27d8f2e8888ec", size = 11168459, upload-time = "2026-02-26T20:04:00.969Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/e5/a64d27688789b06b5d55162aafc32059bb8c989c61a5139a36e1368285eb/ruff-0.15.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a8f157f2e583c513c4f5f896163a93198297371f34c04220daf40d133fdd4f7f", size = 11104366, upload-time = "2026-02-26T20:03:48.099Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/f6/32d1dcb66a2559763fc3027bdd65836cad9eb09d90f2ed6a63d8e9252b02/ruff-0.15.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:917cc68503357021f541e69b35361c99387cdbbf99bd0ea4aa6f28ca99ff5338", size = 10510887, upload-time = "2026-02-26T20:03:45.771Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/92/22d1ced50971c5b6433aed166fcef8c9343f567a94cf2b9d9089f6aa80fe/ruff-0.15.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e9737c8161da79fd7cfec19f1e35620375bd8b2a50c3e77fa3d2c16f574105cc", size = 10285939, upload-time = "2026-02-26T20:04:22.42Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/f4/7c20aec3143837641a02509a4668fb146a642fd1211846634edc17eb5563/ruff-0.15.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:291258c917539e18f6ba40482fe31d6f5ac023994ee11d7bdafd716f2aab8a68", size = 10765471, upload-time = "2026-02-26T20:03:58.924Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/09/6d2f7586f09a16120aebdff8f64d962d7c4348313c77ebb29c566cefc357/ruff-0.15.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3f83c45911da6f2cd5936c436cf86b9f09f09165f033a99dcf7477e34041cbc3", size = 11263382, upload-time = "2026-02-26T20:04:24.424Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/fa/2ef715a1cd329ef47c1a050e10dee91a9054b7ce2fcfdd6a06d139afb7ec/ruff-0.15.4-py3-none-win32.whl", hash = "sha256:65594a2d557d4ee9f02834fcdf0a28daa8b3b9f6cb2cb93846025a36db47ef22", size = 10506664, upload-time = "2026-02-26T20:03:50.56Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/a8/c688ef7e29983976820d18710f955751d9f4d4eb69df658af3d006e2ba3e/ruff-0.15.4-py3-none-win_amd64.whl", hash = "sha256:04196ad44f0df220c2ece5b0e959c2f37c777375ec744397d21d15b50a75264f", size = 11651048, upload-time = "2026-02-26T20:04:17.191Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/0a/9e1be9035b37448ce2e68c978f0591da94389ade5a5abafa4cf99985d1b2/ruff-0.15.4-py3-none-win_arm64.whl", hash = "sha256:60d5177e8cfc70e51b9c5fad936c634872a74209f934c1e79107d11787ad5453", size = 10966776, upload-time = "2026-02-26T20:03:56.908Z" },
+version = "0.15.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" },
+    { url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" },
+    { url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" },
+    { url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" },
+    { url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" },
+    { url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" },
+    { url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" },
 ]
 
 [[package]]
@@ -455,26 +455,26 @@ wheels = [
 
 [[package]]
 name = "ty"
-version = "0.0.20"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/56/95/8de69bb98417227b01f1b1d743c819d6456c9fd140255b6124b05b17dfd6/ty-0.0.20.tar.gz", hash = "sha256:ebba6be7974c14efbb2a9adda6ac59848f880d7259f089dfa72a093039f1dcc6", size = 5262529, upload-time = "2026-03-02T15:51:36.587Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0b/2c/718abe48393e521bf852cd6b0f984766869b09c258d6e38a118768a91731/ty-0.0.20-py3-none-linux_armv6l.whl", hash = "sha256:7cc12769c169c9709a829c2248ee2826b7aae82e92caeac813d856f07c021eae", size = 10333656, upload-time = "2026-03-02T15:51:56.461Z" },
-    { url = "https://files.pythonhosted.org/packages/41/0e/eb1c4cc4a12862e2327b72657bcebb10b7d9f17046f1bdcd6457a0211615/ty-0.0.20-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3b777c1bf13bc0a95985ebb8a324b8668a4a9b2e514dde5ccf09e4d55d2ff232", size = 10168505, upload-time = "2026-03-02T15:51:51.895Z" },
-    { url = "https://files.pythonhosted.org/packages/89/7f/10230798e673f0dd3094dfd16e43bfd90e9494e7af6e8e7db516fb431ddf/ty-0.0.20-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b2a4a7db48bf8cba30365001bc2cad7fd13c1a5aacdd704cc4b7925de8ca5eb3", size = 9678510, upload-time = "2026-03-02T15:51:48.451Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/3d/59d9159577494edd1728f7db77b51bb07884bd21384f517963114e3ab5f6/ty-0.0.20-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6846427b8b353a43483e9c19936dc6a25612573b44c8f7d983dfa317e7f00d4c", size = 10162926, upload-time = "2026-03-02T15:51:40.558Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/a8/b7273eec3e802f78eb913fbe0ce0c16ef263723173e06a5776a8359b2c66/ty-0.0.20-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245ceef5bd88df366869385cf96411cb14696334f8daa75597cf7e41c3012eb8", size = 10171702, upload-time = "2026-03-02T15:51:44.069Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/32/5f1144f2f04a275109db06e3498450c4721554215b80ae73652ef412eeab/ty-0.0.20-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4d21d1cdf67a444d3c37583c17291ddba9382a9871021f3f5d5735e09e85efe", size = 10682552, upload-time = "2026-03-02T15:51:33.102Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/db/9f1f637310792f12bd6ed37d5fc8ab39ba1a9b0c6c55a33865e9f1cad840/ty-0.0.20-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bd4ffd907d1bd70e46af9e9a2f88622f215e1bf44658ea43b32c2c0b357299e4", size = 11242605, upload-time = "2026-03-02T15:51:34.895Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/68/cc9cae2e732fcfd20ccdffc508407905a023fc8493b8771c392d915528dc/ty-0.0.20-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6594b58d8b0e9d16a22b3045fc1305db4b132c8d70c17784ab8c7a7cc986807", size = 10974655, upload-time = "2026-03-02T15:51:46.011Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/c1/b9e3e3f28fe63486331e653f6aeb4184af8b1fe80542fcf74d2dda40a93d/ty-0.0.20-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3662f890518ce6cf4d7568f57d03906912d2afbf948a01089a28e325b1ef198c", size = 10761325, upload-time = "2026-03-02T15:51:26.818Z" },
-    { url = "https://files.pythonhosted.org/packages/39/9e/67db935bdedf219a00fb69ec5437ba24dab66e0f2e706dd54a4eca234b84/ty-0.0.20-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0e3ffbae58f9f0d17cdc4ac6d175ceae560b7ed7d54f9ddfb1c9f31054bcdc2c", size = 10145793, upload-time = "2026-03-02T15:51:38.562Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/de/b0eb815d4dc5a819c7e4faddc2a79058611169f7eef07ccc006531ce228c/ty-0.0.20-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:176e52bc8bb00b0e84efd34583962878a447a3a0e34ecc45fd7097a37554261b", size = 10189640, upload-time = "2026-03-02T15:51:50.202Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/71/63734923965cbb70df1da3e93e4b8875434e326b89e9f850611122f279bf/ty-0.0.20-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b2bc73025418e976ca4143dde71fb9025a90754a08ac03e6aa9b80d4bed1294b", size = 10370568, upload-time = "2026-03-02T15:51:42.295Z" },
-    { url = "https://files.pythonhosted.org/packages/32/a0/a532c2048533347dff48e9ca98bd86d2c224356e101688a8edaf8d6973fb/ty-0.0.20-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d52f7c9ec6e363e094b3c389c344d5a140401f14a77f0625e3f28c21918552f5", size = 10853999, upload-time = "2026-03-02T15:51:58.963Z" },
-    { url = "https://files.pythonhosted.org/packages/48/88/36c652c658fe96658043e4abc8ea97801de6fb6e63ab50aaa82807bff1d8/ty-0.0.20-py3-none-win32.whl", hash = "sha256:c7d32bfe93f8fcaa52b6eef3f1b930fd7da410c2c94e96f7412c30cfbabf1d17", size = 9744206, upload-time = "2026-03-02T15:51:54.183Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/a7/a4a13bed1d7fd9d97aaa3c5bb5e6d3e9a689e6984806cbca2ab4c9233cac/ty-0.0.20-py3-none-win_amd64.whl", hash = "sha256:a5e10f40fc4a0a1cbcb740a4aad5c7ce35d79f030836ea3183b7a28f43170248", size = 10711999, upload-time = "2026-03-02T15:51:29.212Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/7e/6bfd748a9f4ff9267ed3329b86a0f02cdf6ab49f87bc36c8a164852f99fc/ty-0.0.20-py3-none-win_arm64.whl", hash = "sha256:53f7a5c12c960e71f160b734f328eff9a35d578af4b67a36b0bb5990ac5cdc27", size = 10150143, upload-time = "2026-03-02T15:51:31.283Z" },
+version = "0.0.21"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/20/2ba8fd9493c89c41dfe9dbb73bc70a28b28028463bc0d2897ba8be36230a/ty-0.0.21.tar.gz", hash = "sha256:a4c2ba5d67d64df8fcdefd8b280ac1149d24a73dbda82fa953a0dff9d21400ed", size = 5297967, upload-time = "2026-03-06T01:57:13.809Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/36/70/edf38bb37517531681d1c37f5df64744e5ad02673c02eb48447eae4bea08/ty-0.0.21-py3-none-linux_armv6l.whl", hash = "sha256:7bdf2f572378de78e1f388d24691c89db51b7caf07cf90f2bfcc1d6b18b70a76", size = 10299222, upload-time = "2026-03-06T01:57:16.64Z" },
+    { url = "https://files.pythonhosted.org/packages/72/62/0047b0bd19afeefbc7286f20a5f78a2aa39f92b4d89853f0d7185ab89edc/ty-0.0.21-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7e9613994610431ab8625025bd2880dbcb77c5c9fabdd21134cda12d840a529d", size = 10130513, upload-time = "2026-03-06T01:57:29.93Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/20/0b93a9e91aaed23155780258cdfdb4726ef68b6985378ac069bc427291a0/ty-0.0.21-py3-none-macosx_11_0_arm64.whl", hash = "sha256:56d3b198b64dd0a19b2b66e257deaed2ecea568e722ae5352f3c6fb62027f89d", size = 9605425, upload-time = "2026-03-06T01:57:27.115Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/fd/9945e2fa2996a1287b1e1d7ce050e97e1f420233b271e770934bfa0880a0/ty-0.0.21-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d23d2c34f7a77d974bb08f0860ef700addc8a683d81a0319f71c08f87506cfd0", size = 10108298, upload-time = "2026-03-06T01:57:35.429Z" },
+    { url = "https://files.pythonhosted.org/packages/52/e7/4ec52fcb15f3200826c9f048472c062549a05b0d1ef0b51f32d527b513c4/ty-0.0.21-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56b01fd2519637a4ca88344f61c96225f540c98ff18bca321d4eaa7bb0f7aa2f", size = 10121556, upload-time = "2026-03-06T01:57:03.242Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/c0/ad457be2a8abea0f25549598bd098554540ced66229488daa0d558dad3c8/ty-0.0.21-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9de7e11c63c6afc40f3e9ba716374add171aee7fabc70b5146a510705c6d41b", size = 10603264, upload-time = "2026-03-06T01:56:52.134Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/5b/2ecc7a2175243a4bcb72f5298ae41feabbb93b764bb0dc45722f3752c2c2/ty-0.0.21-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62f7f5b235c4f7876db305c36997aea07b7af29b1a068f373d0e2547e25f32ff", size = 11196428, upload-time = "2026-03-06T01:57:32.94Z" },
+    { url = "https://files.pythonhosted.org/packages/37/f5/aff507d6a901f328ef96a298032b0c11aaaf950a146ed7dd3b5bf2cd3acf/ty-0.0.21-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee8399f7c453a425291e6688efe430cfae7ab0ac4ffd50eba9f872bf878b54f6", size = 10866355, upload-time = "2026-03-06T01:56:57.831Z" },
+    { url = "https://files.pythonhosted.org/packages/be/30/822bbcb92d55b65989aa7ed06d9585f28ade9c9447369194ed4b0fb3b5b9/ty-0.0.21-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210e7568c9f886c4d01308d751949ee714ad7ad9d7d928d2ba90d329dd880367", size = 10738177, upload-time = "2026-03-06T01:57:11.256Z" },
+    { url = "https://files.pythonhosted.org/packages/57/cc/46e7991b6469e93ac2c7e533a028983e402485580150ac864c56352a3a82/ty-0.0.21-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:53508e345b11569f78b21ba8e2b4e61df38a9754947fb3cd9f2ef574367338fb", size = 10079158, upload-time = "2026-03-06T01:57:00.516Z" },
+    { url = "https://files.pythonhosted.org/packages/15/c2/0bbdadfbd008240f8f1a87dc877433cb3884436097926107ccf06e618199/ty-0.0.21-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:553e43571f4a35604c36cfd07d8b61a5eb7a714e3c67f8c4ff2cf674fefbaef9", size = 10150535, upload-time = "2026-03-06T01:57:08.815Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/b5/2dbdb7b57b5362200ef0a39738ebd31331726328336def0143ac097ee59d/ty-0.0.21-py3-none-musllinux_1_2_i686.whl", hash = "sha256:666f6822e3b9200abfa7e95eb0ddd576460adb8d66b550c0ad2c70abc84a2048", size = 10319803, upload-time = "2026-03-06T01:57:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/72/84/70e52c0b7abc7c2086f9876ef454a73b161d3125315536d8d7e911c94ca4/ty-0.0.21-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a0854d008347ce4a5fb351af132f660a390ab2a1163444d075251d43e6f74b9b", size = 10826239, upload-time = "2026-03-06T01:57:21.727Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/8a/1f72480fd013bbc6cd1929002abbbcde9a0b08ead6a15154de9d7f7fa37e/ty-0.0.21-py3-none-win32.whl", hash = "sha256:bef3ab4c7b966bcc276a8ac6c11b63ba222d21355b48d471ea782c4104eee4e0", size = 9693196, upload-time = "2026-03-06T01:57:24.126Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/f8/1104808b875c26c640e536945753a78562d606bef4e241d9dbf3d92477f6/ty-0.0.21-py3-none-win_amd64.whl", hash = "sha256:a709d576e5bea84b745d43058d8b9cd4f27f74a0b24acb4b0cbb7d3d41e0d050", size = 10668660, upload-time = "2026-03-06T01:56:55.06Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/b8/25e0adc404bbf986977657b25318991f93097b49f8aea640d93c0b0db68e/ty-0.0.21-py3-none-win_arm64.whl", hash = "sha256:f72047996598ac20553fb7e21ba5741e3c82dee4e9eadf10d954551a5fe09391", size = 10104161, upload-time = "2026-03-06T01:57:06.072Z" },
 ]
 
 [[package]]

From 11a709abe5a73d8b31f53bf9124b1430bfb1c351 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 13:43:07 +0700
Subject: [PATCH 185/236] Add support for music and video generation, and
 update the persistent path from `images` to `media` to accommodate multiple
 media types.

---
 app/main.py              |   8 +-
 app/server/chat.py       | 364 ++++++++++++++++++++++++++++++---------
 app/server/images.py     |  18 --
 app/server/media.py      |  18 ++
 app/server/middleware.py |  28 +--
 app/utils/config.py      |   6 +-
 config/config.yaml       |   2 +-
 uv.lock                  |   4 +-
 8 files changed, 325 insertions(+), 123 deletions(-)
 delete mode 100644 app/server/images.py
 create mode 100644 app/server/media.py

diff --git a/app/main.py b/app/main.py
index 20d15b0..9782726 100644
--- a/app/main.py
+++ b/app/main.py
@@ -6,11 +6,11 @@
 
 from .server.chat import router as chat_router
 from .server.health import router as health_router
-from .server.images import router as images_router
+from .server.media import router as media_router
 from .server.middleware import (
     add_cors_middleware,
     add_exception_handler,
-    cleanup_expired_images,
+    cleanup_expired_media,
 )
 from .services import GeminiClientPool, LMDBConversationStore
 
@@ -33,7 +33,7 @@ async def _run_retention_cleanup(stop_event: asyncio.Event) -> None:
     while not stop_event.is_set():
         try:
             store.cleanup_expired()
-            cleanup_expired_images(store.retention_days)
+            cleanup_expired_media(store.retention_days)
         except Exception:
             logger.exception("LMDB retention cleanup task failed.")
 
@@ -99,6 +99,6 @@ def create_app() -> FastAPI:
 
     app.include_router(health_router, tags=["Health"])
     app.include_router(chat_router, tags=["Chat"])
-    app.include_router(images_router, tags=["Images"])
+    app.include_router(media_router, tags=["Media"])
 
     return app
diff --git a/app/server/chat.py b/app/server/chat.py
index 78dc7d6..ff62840 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -16,6 +16,7 @@
 from gemini_webapi.client import ChatSession
 from gemini_webapi.constants import Model
 from gemini_webapi.types.image import GeneratedImage, Image
+from gemini_webapi.types.video import GeneratedMedia, GeneratedVideo
 from loguru import logger
 
 from app.models import (
@@ -54,8 +55,8 @@
     ToolChoiceTypes,
 )
 from app.server.middleware import (
-    get_image_store_dir,
-    get_image_token,
+    get_media_store_dir,
+    get_media_token,
     get_temp_dir,
     verify_api_key,
 )
@@ -130,6 +131,38 @@ async def _image_to_base64(
     return base64.b64encode(data).decode("ascii"), width, height, filename, file_hash
 
 
+async def _media_to_local_file(
+    media: GeneratedVideo | GeneratedMedia, temp_dir: Path
+) -> dict[str, tuple[str, str]]:
+    """Persist media and return dict mapping type to (filename, hash)"""
+    try:
+        saved_paths = await media.save(path=str(temp_dir))
+    except Exception as e:
+        logger.warning(f"Failed to save media: {e}")
+        return {}
+
+    results = {}
+    for mtype, spath in saved_paths.items():
+        if not spath:
+            continue
+
+        original_path = Path(spath)
+        data = original_path.read_bytes()
+        suffix = original_path.suffix
+
+        if not suffix:
+            suffix = ".mp4" if "video" in mtype else ".mp3"
+
+        random_name = f"media_{uuid.uuid4().hex}{suffix}"
+        new_path = temp_dir / random_name
+        original_path.rename(new_path)
+
+        fhash = hashlib.sha256(data).hexdigest()
+        results[mtype] = (random_name, fhash)
+
+    return results
+
+
 def _calculate_usage(
     messages: list[AppMessage],
     assistant_text: str | None,
@@ -1170,30 +1203,86 @@ def make_chunk(delta_content: dict) -> str:
         )
 
         images = []
-        seen_urls = set()
+        seen_image_urls = set()
+        media_items: list[GeneratedVideo | GeneratedMedia] = []
+        seen_media_urls = set()
+
         for out in all_outputs:
             if out.images:
                 for img in out.images:
-                    if img.url not in seen_urls:
+                    if img.url not in seen_image_urls:
                         images.append(img)
-                        seen_urls.add(img.url)
+                        seen_image_urls.add(img.url)
+
+            m_list = (out.videos or []) + (out.media or [])
+            for m in m_list:
+                m_url = getattr(m, "url", None) or getattr(m, "mp3_url", None)
+                if m_url and m_url not in seen_media_urls:
+                    media_items.append(m)
+                    seen_media_urls.add(m_url)
 
         image_results = []
         seen_hashes = set()
         for image in images:
             try:
-                image_store = get_image_store_dir()
-                _, _, _, fname, fhash = await _image_to_base64(image, image_store)
+                media_store = get_media_store_dir()
+                _, _, _, fname, fhash = await _image_to_base64(image, media_store)
                 if fhash in seen_hashes:
-                    (image_store / fname).unlink(missing_ok=True)
+                    (media_store / fname).unlink(missing_ok=True)
                     continue
                 seen_hashes.add(fhash)
-
-                img_url = f"{base_url}images/{fname}?token={get_image_token(fname)}"
-                image_results.append(img_url)
+                img_url = f"{base_url}media/{fname}?token={get_media_token(fname)}"
+                title = getattr(image, "title", "Image")
+                image_results.append(f"![{title}]({img_url})")
             except Exception as exc:
                 logger.warning(f"Failed to process image in OpenAI stream: {exc}")
 
+        media_results = []
+        seen_media_hashes = set()
+        for media_item in media_items:
+            try:
+                media_store = get_media_store_dir()
+                m_dict = await _media_to_local_file(media_item, media_store)
+
+                m_urls = {}
+                for mtype, (random_name, fhash) in m_dict.items():
+                    if fhash in seen_media_hashes:
+                        (media_store / random_name).unlink(missing_ok=True)
+                        continue
+                    seen_media_hashes.add(fhash)
+                    m_urls[mtype] = (
+                        f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
+                    )
+
+                media_url = m_urls.get("video") or m_urls.get("audio")
+                thumb_url = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+
+                title = getattr(media_item, "title", "Media")
+                if thumb_url and media_url:
+                    media_results.append(f"[![{title}]({thumb_url})]({media_url})")
+                elif media_url:
+                    media_results.append(f"[{title}]({media_url})")
+                elif thumb_url:
+                    media_results.append(f"![{title}]({thumb_url})")
+            except Exception as exc:
+                logger.warning(f"Failed to process media in OpenAI stream: {exc}")
+
+        for image_url in image_results:
+            yield make_chunk(
+                {
+                    "delta": {"content": f"\n\n{image_url}"},
+                    "finish_reason": None,
+                }
+            )
+
+        for media_md in media_results:
+            yield make_chunk(
+                {
+                    "delta": {"content": f"\n\n{media_md}"},
+                    "finish_reason": None,
+                }
+            )
+
         if detected_tool_calls:
             for idx, call in enumerate(detected_tool_calls):
                 tc_dict = {
@@ -1212,14 +1301,6 @@ def make_chunk(delta_content: dict) -> str:
                     }
                 )
 
-        for image_url in image_results:
-            yield make_chunk(
-                {
-                    "delta": {"content": f"\n\n![Generated Image]({image_url})"},
-                    "finish_reason": None,
-                }
-            )
-
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
             messages, assistant_text, detected_tool_calls, full_thoughts
         )
@@ -1261,7 +1342,7 @@ def _create_responses_real_streaming_response(
     client_wrapper: GeminiClientWrapper,
     session: ChatSession,
     request: ResponseCreateRequest,
-    image_store: Path,
+    media_store: Path,
     base_url: str,
     structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
@@ -1596,57 +1677,107 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
         final_response_contents: list[ResponseOutputContent] = []
         seen_hashes = set()
 
+        images = []
+        seen_image_urls = set()
+        media_items: list[GeneratedVideo | GeneratedMedia] = []
+        seen_media_urls = set()
+
         for out in all_outputs:
             if out.images:
-                for image in out.images:
-                    try:
-                        b64, w, h, fname, fhash = await _image_to_base64(image, image_store)
-                        if fhash in seen_hashes:
-                            continue
-                        seen_hashes.add(fhash)
-
-                        parts = fname.rsplit(".", 1)
-                        img_id = parts[0]
-                        fmt = parts[1] if len(parts) > 1 else "png"
-
-                        img_item = ImageGenerationCall(
-                            id=img_id,
-                            result=b64,
-                            output_format=fmt,
-                            size=f"{w}x{h}" if w and h else None,
-                        )
+                for img in out.images:
+                    if img.url not in seen_image_urls:
+                        images.append(img)
+                        seen_image_urls.add(img.url)
 
-                        image_url = (
-                            f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
-                        )
-                        final_response_contents.append(
-                            ResponseOutputText(type="output_text", text=image_url)
-                        )
+            m_list = (out.videos or []) + (out.media or [])
+            for m in m_list:
+                m_url = getattr(m, "url", None) or getattr(m, "mp3_url", None)
+                if m_url and m_url not in seen_media_urls:
+                    media_items.append(m)
+                    seen_media_urls.add(m_url)
 
-                        yield make_event(
-                            "response.output_item.added",
-                            {
-                                **base_event,
-                                "type": "response.output_item.added",
-                                "output_index": current_index,
-                                "item": img_item.model_dump(mode="json"),
-                            },
-                        )
+        for image in images:
+            try:
+                b64, w, h, fname, fhash = await _image_to_base64(image, media_store)
+                if fhash in seen_hashes:
+                    continue
+                seen_hashes.add(fhash)
 
-                        yield make_event(
-                            "response.output_item.done",
-                            {
-                                **base_event,
-                                "type": "response.output_item.done",
-                                "output_index": current_index,
-                                "item": img_item.model_dump(mode="json"),
-                            },
-                        )
-                        current_index += 1
-                        image_items.append(img_item)
-                        storage_output += f"\n\n{image_url}"
-                    except Exception:
-                        logger.warning("Image processing failed in stream")
+                parts = fname.rsplit(".", 1)
+                img_id = parts[0]
+                fmt = parts[1] if len(parts) > 1 else "png"
+
+                img_item = ImageGenerationCall(
+                    id=img_id,
+                    result=b64,
+                    output_format=fmt,
+                    size=f"{w}x{h}" if w and h else None,
+                )
+
+                image_url = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
+                final_response_contents.append(
+                    ResponseOutputText(type="output_text", text=image_url)
+                )
+
+                yield make_event(
+                    "response.output_item.added",
+                    {
+                        **base_event,
+                        "type": "response.output_item.added",
+                        "output_index": current_index,
+                        "item": img_item.model_dump(mode="json"),
+                    },
+                )
+
+                yield make_event(
+                    "response.output_item.done",
+                    {
+                        **base_event,
+                        "type": "response.output_item.done",
+                        "output_index": current_index,
+                        "item": img_item.model_dump(mode="json"),
+                    },
+                )
+                current_index += 1
+                image_items.append(img_item)
+                storage_output += f"\n\n{image_url}"
+            except Exception:
+                logger.warning("Image processing failed in stream")
+
+        seen_media_hashes = set()
+        for media_item in media_items:
+            try:
+                m_dict = await _media_to_local_file(media_item, media_store)
+
+                m_urls = {}
+                for mtype, (random_name, fhash) in m_dict.items():
+                    if fhash in seen_media_hashes:
+                        (media_store / random_name).unlink(missing_ok=True)
+                        continue
+                    seen_media_hashes.add(fhash)
+                    m_urls[mtype] = (
+                        f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
+                    )
+
+                media_url = m_urls.get("video") or m_urls.get("audio")
+                thumb_url = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+
+                title = getattr(media_item, "title", "Media")
+                media_md = ""
+                if thumb_url and media_url:
+                    media_md = f"[![{title}]({thumb_url})]({media_url})"
+                elif media_url:
+                    media_md = f"[{title}]({media_url})"
+                elif thumb_url:
+                    media_md = f"![{title}]({thumb_url})"
+
+                if media_md:
+                    final_response_contents.append(
+                        ResponseOutputText(type="output_text", text=media_md)
+                    )
+                    storage_output += f"\n\n{media_md}"
+            except Exception:
+                logger.warning("Media processing failed in stream")
 
         for call in detected_tool_calls:
             tc_item = ResponseFunctionToolCall(
@@ -1741,7 +1872,7 @@ async def create_chat_completion(
     raw_request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
-    image_store: Path = Depends(get_image_store_dir),
+    media_store: Path = Depends(get_media_store_dir),
 ):
     base_url = str(raw_request.base_url)
     pool, db = GeminiClientPool(), LMDBConversationStore()
@@ -1845,13 +1976,13 @@ async def create_chat_completion(
     seen_hashes = set()
     for image in images:
         try:
-            _, _, _, fname, fhash = await _image_to_base64(image, image_store)
+            _, _, _, fname, fhash = await _image_to_base64(image, media_store)
             if fhash in seen_hashes:
-                (image_store / fname).unlink(missing_ok=True)
+                (media_store / fname).unlink(missing_ok=True)
                 continue
             seen_hashes.add(fhash)
 
-            img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
+            img_url = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
             image_markdown += f"\n\n{img_url}"
         except Exception as exc:
             logger.warning(f"Failed to process image in OpenAI response: {exc}")
@@ -1860,6 +1991,42 @@ async def create_chat_completion(
         visible_output += image_markdown
         storage_output += image_markdown
 
+    media_items: list[GeneratedVideo | GeneratedMedia] = (resp_or_stream.videos or []) + (
+        resp_or_stream.media or []
+    )
+    media_markdown = ""
+    seen_media_hashes = set()
+    for m_item in media_items:
+        try:
+            m_dict = await _media_to_local_file(m_item, media_store)
+
+            m_urls = {}
+            for mtype, (random_name, fhash) in m_dict.items():
+                if fhash in seen_media_hashes:
+                    (media_store / random_name).unlink(missing_ok=True)
+                    continue
+                seen_media_hashes.add(fhash)
+                m_urls[mtype] = (
+                    f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
+                )
+
+            media_url = m_urls.get("video") or m_urls.get("audio")
+            thumb_url = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+
+            title = getattr(m_item, "title", "Media")
+            if thumb_url and media_url:
+                media_markdown += f"\n\n[![{title}]({thumb_url})]({media_url})"
+            elif media_url:
+                media_markdown += f"\n\n[{title}]({media_url})"
+            elif thumb_url:
+                media_markdown += f"\n\n![{title}]({thumb_url})"
+        except Exception as exc:
+            logger.warning(f"Failed to process media in OpenAI response: {exc}")
+
+    if media_markdown:
+        visible_output += media_markdown
+        storage_output += media_markdown
+
     if tool_calls:
         logger.debug(
             f"Detected tool calls: {reprlib.repr([tc.model_dump(mode='json') for tc in tool_calls])}"
@@ -1902,7 +2069,7 @@ async def create_response(
     raw_request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
-    image_store: Path = Depends(get_image_store_dir),
+    media_store: Path = Depends(get_media_store_dir),
 ):
     base_url = str(raw_request.base_url)
     base_messages = _convert_responses_to_app_messages(request.input)
@@ -2002,7 +2169,7 @@ async def create_response(
             client,
             session,
             request,
-            image_store,
+            media_store,
             base_url,
             struct_req,
         )
@@ -2033,9 +2200,9 @@ async def create_response(
     seen_hashes = set()
     for img in images:
         try:
-            b64, w, h, fname, fhash = await _image_to_base64(img, image_store)
+            b64, w, h, fname, fhash = await _image_to_base64(img, media_store)
             if fhash in seen_hashes:
-                (image_store / fname).unlink(missing_ok=True)
+                (media_store / fname).unlink(missing_ok=True)
                 continue
             seen_hashes.add(fhash)
 
@@ -2047,12 +2214,6 @@ async def create_response(
                 else ("png" if isinstance(img, GeneratedImage) else "jpeg")
             )
 
-            contents.append(
-                ResponseOutputText(
-                    type="output_text",
-                    text=f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})",
-                )
-            )
             img_calls.append(
                 ImageGenerationCall(
                     id=img_id,
@@ -2072,11 +2233,52 @@ async def create_response(
     image_markdown = ""
     for img_call in img_calls:
         fname = f"{img_call.id}.{img_call.output_format}"
-        img_url = f"![{fname}]({base_url}images/{fname}?token={get_image_token(fname)})"
+        img_url = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
         image_markdown += f"\n\n{img_url}"
 
     if image_markdown:
         storage_output += image_markdown
+        contents.append(ResponseOutputText(type="output_text", text=image_markdown.strip()))
+
+    media_items: list[GeneratedVideo | GeneratedMedia] = (resp_or_stream.videos or []) + (
+        resp_or_stream.media or []
+    )
+    seen_media_hashes = set()
+    media_markdown = ""
+    for m_item in media_items:
+        try:
+            m_dict = await _media_to_local_file(m_item, media_store)
+
+            m_urls = {}
+            for mtype, (random_name, fhash) in m_dict.items():
+                if fhash in seen_media_hashes:
+                    (media_store / random_name).unlink(missing_ok=True)
+                    continue
+                seen_media_hashes.add(fhash)
+                m_urls[mtype] = (
+                    f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
+                )
+
+            media_url = m_urls.get("video") or m_urls.get("audio")
+            thumb_url = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+
+            title = getattr(m_item, "title", "Media")
+            m_md = ""
+            if thumb_url and media_url:
+                m_md = f"[![{title}]({thumb_url})]({media_url})"
+            elif media_url:
+                m_md = f"[{title}]({media_url})"
+            elif thumb_url:
+                m_md = f"![{title}]({thumb_url})"
+
+            if m_md:
+                media_markdown += f"\n\n{m_md}"
+        except Exception as exc:
+            logger.warning(f"Failed to process media in OpenAI response: {exc}")
+
+    if media_markdown:
+        storage_output += media_markdown
+        contents.append(ResponseOutputText(type="output_text", text=media_markdown.strip()))
 
     p_tok, c_tok, t_tok, r_tok = _calculate_usage(messages, assistant_text, tool_calls, thoughts)
     usage = ResponseUsage(
diff --git a/app/server/images.py b/app/server/images.py
deleted file mode 100644
index e1c161c..0000000
--- a/app/server/images.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from fastapi import APIRouter, HTTPException, Query
-from fastapi.responses import FileResponse
-
-from app.server.middleware import get_image_store_dir, verify_image_token
-
-router = APIRouter()
-
-
-@router.get("/images/{filename}", tags=["Images"])
-async def get_image(filename: str, token: str | None = Query(default=None)):
-    if not verify_image_token(filename, token):
-        raise HTTPException(status_code=403, detail="Invalid token")
-
-    image_store = get_image_store_dir()
-    file_path = image_store / filename
-    if not file_path.exists():
-        raise HTTPException(status_code=404, detail="Image not found")
-    return FileResponse(file_path)
diff --git a/app/server/media.py b/app/server/media.py
new file mode 100644
index 0000000..86b5b70
--- /dev/null
+++ b/app/server/media.py
@@ -0,0 +1,18 @@
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import FileResponse
+
+from app.server.middleware import get_media_store_dir, verify_media_token
+
+router = APIRouter()
+
+
+@router.get("/media/{filename}", tags=["Media"])
+async def get_media(filename: str, token: str | None = Query(default=None)):
+    if not verify_media_token(filename, token):
+        raise HTTPException(status_code=403, detail="Invalid token")
+
+    media_store = get_media_store_dir()
+    file_path = media_store / filename
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Media not found")
+    return FileResponse(file_path)
diff --git a/app/server/middleware.py b/app/server/middleware.py
index 457ac0f..07840be 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -12,17 +12,17 @@
 
 from app.utils import g_config
 
-# Persistent directory for storing generated images
-IMAGE_STORE_DIR = Path(g_config.storage.images_path)
-IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
+# Persistent directory for storing generated media
+MEDIA_STORE_DIR = Path(g_config.storage.media_path)
+MEDIA_STORE_DIR.mkdir(parents=True, exist_ok=True)
 
 
-def get_image_store_dir() -> Path:
-    """Returns a persistent directory for storing images."""
-    return IMAGE_STORE_DIR
+def get_media_store_dir() -> Path:
+    """Returns a persistent directory for storing media."""
+    return MEDIA_STORE_DIR
 
 
-def get_image_token(filename: str) -> str:
+def get_media_token(filename: str) -> str:
     """Generate a HMAC-SHA256 token for a filename using the API key."""
     secret = g_config.server.api_key
     if not secret:
@@ -33,9 +33,9 @@ def get_image_token(filename: str) -> str:
     return hmac.new(secret_bytes, msg, hashlib.sha256).hexdigest()
 
 
-def verify_image_token(filename: str, token: str | None) -> bool:
+def verify_media_token(filename: str, token: str | None) -> bool:
     """Verify the provided token against the filename."""
-    expected = get_image_token(filename)
+    expected = get_media_token(filename)
     if not expected:
         return True  # No auth required
     if not token:
@@ -43,8 +43,8 @@ def verify_image_token(filename: str, token: str | None) -> bool:
     return hmac.compare_digest(token, expected)
 
 
-def cleanup_expired_images(retention_days: int) -> int:
-    """Delete images in IMAGE_STORE_DIR older than retention_days."""
+def cleanup_expired_media(retention_days: int) -> int:
+    """Delete media files in MEDIA_STORE_DIR older than retention_days."""
     if retention_days <= 0:
         return 0
 
@@ -53,7 +53,7 @@ def cleanup_expired_images(retention_days: int) -> int:
     cutoff = now - retention_seconds
 
     count = 0
-    for file_path in IMAGE_STORE_DIR.iterdir():
+    for file_path in MEDIA_STORE_DIR.iterdir():
         if not file_path.is_file():
             continue
         try:
@@ -61,10 +61,10 @@ def cleanup_expired_images(retention_days: int) -> int:
                 file_path.unlink()
                 count += 1
         except Exception as e:
-            logger.warning(f"Failed to delete expired image {file_path}: {e}")
+            logger.warning(f"Failed to delete expired media {file_path}: {e}")
 
     if count > 0:
-        logger.info(f"Cleaned up {count} expired images.")
+        logger.info(f"Cleaned up {count} expired media files.")
     return count
 
 
diff --git a/app/utils/config.py b/app/utils/config.py
index 9b4f1a3..a52bab5 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -151,9 +151,9 @@ class StorageConfig(BaseModel):
         default="data/lmdb",
         description="Path to the storage directory where data will be saved",
     )
-    images_path: str = Field(
-        default="data/images",
-        description="Path to the directory where generated images will be stored",
+    media_path: str = Field(
+        default="data/media",
+        description="Path to the directory where generated media will be stored",
     )
     max_size: int = Field(
         default=1024**2 * 256,  # 256 MB
diff --git a/config/config.yaml b/config/config.yaml
index 71301d0..746be67 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -33,7 +33,7 @@ gemini:
 
 storage:
   path: "data/lmdb"        # Database storage path
-  images_path: "data/images" # Image storage path
+  media_path: "data/media" # Media storage path
   max_size: 268435456      # Maximum database size (256 MB)
   retention_days: 14       # Number of days to retain conversations before cleanup
 
diff --git a/uv.lock b/uv.lock
index 6f688b1..e5fa9f4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -164,8 +164,8 @@ requires-dist = [
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.4" },
-    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.20" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.5" },
+    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.21" },
     { name = "uvicorn", specifier = ">=0.41.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]

From 9a962019151f7061620a0589e8c40685948ec491 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 14:02:34 +0700
Subject: [PATCH 186/236] Refactor model retrieval logic to avoid duplicates
 and streamline processing

---
 app/server/chat.py | 38 +++++++++++---------------------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ff62840..018a20f 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -897,21 +897,20 @@ async def _get_available_models(pool: GeminiClientPool) -> list[ModelData]:
     now = int(datetime.now(tz=UTC).timestamp())
     strategy = g_config.gemini.model_strategy
     models_data = []
+    seen_model_ids = set()
 
-    custom_models = [m for m in g_config.gemini.models if m.model_name]
-    for m in custom_models:
-        models_data.append(
-            ModelData(
-                id=m.model_name or "",
-                created=now,
-                owned_by="custom",
+    for m in g_config.gemini.models:
+        if m.model_name and m.model_name not in seen_model_ids:
+            models_data.append(
+                ModelData(
+                    id=m.model_name,
+                    created=now,
+                    owned_by="custom",
+                )
             )
-        )
+            seen_model_ids.add(m.model_name)
 
     if strategy == "append":
-        custom_ids = {m.id for m in models_data}
-        seen_model_ids = set()
-
         for client in pool.clients:
             if not client.running():
                 continue
@@ -919,7 +918,7 @@ async def _get_available_models(pool: GeminiClientPool) -> list[ModelData]:
             client_models = client.list_models()
             if client_models:
                 for am in client_models:
-                    if am.id not in custom_ids and am.id not in seen_model_ids:
+                    if am.id and am.id not in seen_model_ids:
                         models_data.append(
                             ModelData(
                                 id=am.id,
@@ -929,21 +928,6 @@ async def _get_available_models(pool: GeminiClientPool) -> list[ModelData]:
                         )
                         seen_model_ids.add(am.id)
 
-        for model in Model:
-            m_name = model.model_name
-            if not m_name or m_name == "unspecified":
-                continue
-            if m_name in custom_ids or m_name in seen_model_ids:
-                continue
-
-            models_data.append(
-                ModelData(
-                    id=m_name,
-                    created=now,
-                    owned_by="gemini-web",
-                )
-            )
-
     return models_data
 
 

From 391c31ecbd90d8963a9d4b5686be235a1744f1bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 14:57:26 +0700
Subject: [PATCH 187/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index e5fa9f4..f11b4a1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post251"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#891b9fd643d66eef2970897b06f32d395a091ad4" }
+version = "0.0.post252"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#3526b6597183fc7247f2304f7e4c2c72c70a9180" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 13d79833876d62fd5d641c1b688d7d5dd384971c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 15:37:50 +0700
Subject: [PATCH 188/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index f11b4a1..f38ac9c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post252"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#3526b6597183fc7247f2304f7e4c2c72c70a9180" }
+version = "0.0.post253"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#2c55ed1e9be545e5fea11d7a9ab04d68a2058596" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From eef2cbb35b2e1af45907b14acd63733968896f1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 16:12:07 +0700
Subject: [PATCH 189/236] Update the Markdown link handling to include video
 and audio support for music generation.

---
 app/server/chat.py | 131 +++++++++++++++++++++++++++++++--------------
 1 file changed, 91 insertions(+), 40 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 018a20f..950deed 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1238,16 +1238,27 @@ def make_chunk(delta_content: dict) -> str:
                         f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                     )
 
-                media_url = m_urls.get("video") or m_urls.get("audio")
-                thumb_url = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-
                 title = getattr(media_item, "title", "Media")
-                if thumb_url and media_url:
-                    media_results.append(f"[![{title}]({thumb_url})]({media_url})")
-                elif media_url:
-                    media_results.append(f"[{title}]({media_url})")
-                elif thumb_url:
-                    media_results.append(f"![{title}]({thumb_url})")
+                video_url, video_thumb = m_urls.get("video"), m_urls.get("video_thumbnail")
+                audio_url, audio_thumb = m_urls.get("audio"), m_urls.get("audio_thumbnail")
+
+                if video_url:
+                    media_results.append(
+                        f"[![{title}]({video_thumb})]({video_url})"
+                        if video_thumb
+                        else f"[{title}]({video_url})"
+                    )
+                elif video_thumb:
+                    media_results.append(f"![{title}]({video_thumb})")
+
+                if audio_url:
+                    media_results.append(
+                        f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
+                        if audio_thumb
+                        else f"[{title} (Audio)]({audio_url})"
+                    )
+                elif audio_thumb:
+                    media_results.append(f"![{title} (Audio)]({audio_thumb})")
             except Exception as exc:
                 logger.warning(f"Failed to process media in OpenAI stream: {exc}")
 
@@ -1743,17 +1754,30 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                         f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                     )
 
-                media_url = m_urls.get("video") or m_urls.get("audio")
-                thumb_url = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-
                 title = getattr(media_item, "title", "Media")
-                media_md = ""
-                if thumb_url and media_url:
-                    media_md = f"[![{title}]({thumb_url})]({media_url})"
-                elif media_url:
-                    media_md = f"[{title}]({media_url})"
-                elif thumb_url:
-                    media_md = f"![{title}]({thumb_url})"
+                video_url, video_thumb = m_urls.get("video"), m_urls.get("video_thumbnail")
+                audio_url, audio_thumb = m_urls.get("audio"), m_urls.get("audio_thumbnail")
+
+                md_parts = []
+                if video_url:
+                    md_parts.append(
+                        f"[![{title}]({video_thumb})]({video_url})"
+                        if video_thumb
+                        else f"[{title}]({video_url})"
+                    )
+                elif video_thumb:
+                    md_parts.append(f"![{title}]({video_thumb})")
+
+                if audio_url:
+                    md_parts.append(
+                        f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
+                        if audio_thumb
+                        else f"[{title} (Audio)]({audio_url})"
+                    )
+                elif audio_thumb:
+                    md_parts.append(f"![{title} (Audio)]({audio_thumb})")
+
+                media_md = "\n\n".join(md_parts)
 
                 if media_md:
                     final_response_contents.append(
@@ -1994,16 +2018,31 @@ async def create_chat_completion(
                     f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                 )
 
-            media_url = m_urls.get("video") or m_urls.get("audio")
-            thumb_url = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-
             title = getattr(m_item, "title", "Media")
-            if thumb_url and media_url:
-                media_markdown += f"\n\n[![{title}]({thumb_url})]({media_url})"
-            elif media_url:
-                media_markdown += f"\n\n[{title}]({media_url})"
-            elif thumb_url:
-                media_markdown += f"\n\n![{title}]({thumb_url})"
+            video_url, video_thumb = m_urls.get("video"), m_urls.get("video_thumbnail")
+            audio_url, audio_thumb = m_urls.get("audio"), m_urls.get("audio_thumbnail")
+
+            md_parts = []
+            if video_url:
+                md_parts.append(
+                    f"[![{title}]({video_thumb})]({video_url})"
+                    if video_thumb
+                    else f"[{title}]({video_url})"
+                )
+            elif video_thumb:
+                md_parts.append(f"![{title}]({video_thumb})")
+
+            if audio_url:
+                md_parts.append(
+                    f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
+                    if audio_thumb
+                    else f"[{title} (Audio)]({audio_url})"
+                )
+            elif audio_thumb:
+                md_parts.append(f"![{title} (Audio)]({audio_thumb})")
+
+            if md_parts:
+                media_markdown += "\n\n" + "\n\n".join(md_parts)
         except Exception as exc:
             logger.warning(f"Failed to process media in OpenAI response: {exc}")
 
@@ -2243,19 +2282,31 @@ async def create_response(
                     f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                 )
 
-            media_url = m_urls.get("video") or m_urls.get("audio")
-            thumb_url = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-
             title = getattr(m_item, "title", "Media")
-            m_md = ""
-            if thumb_url and media_url:
-                m_md = f"[![{title}]({thumb_url})]({media_url})"
-            elif media_url:
-                m_md = f"[{title}]({media_url})"
-            elif thumb_url:
-                m_md = f"![{title}]({thumb_url})"
-
-            if m_md:
+            video_url, video_thumb = m_urls.get("video"), m_urls.get("video_thumbnail")
+            audio_url, audio_thumb = m_urls.get("audio"), m_urls.get("audio_thumbnail")
+
+            md_parts = []
+            if video_url:
+                md_parts.append(
+                    f"[![{title}]({video_thumb})]({video_url})"
+                    if video_thumb
+                    else f"[{title}]({video_url})"
+                )
+            elif video_thumb:
+                md_parts.append(f"![{title}]({video_thumb})")
+
+            if audio_url:
+                md_parts.append(
+                    f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
+                    if audio_thumb
+                    else f"[{title} (Audio)]({audio_url})"
+                )
+            elif audio_thumb:
+                md_parts.append(f"![{title} (Audio)]({audio_thumb})")
+
+            if md_parts:
+                m_md = "\n\n".join(md_parts)
                 media_markdown += f"\n\n{m_md}"
         except Exception as exc:
             logger.warning(f"Failed to process media in OpenAI response: {exc}")

From d28b5a81b872c02a6c7bc78a4f369c82b36f6f31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 17:01:09 +0700
Subject: [PATCH 190/236] Update the Markdown link handling to include video
 and audio support for music generation.

---
 app/server/chat.py | 78 +++++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 25 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 950deed..e429208 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1206,15 +1206,16 @@ def make_chunk(delta_content: dict) -> str:
                     seen_media_urls.add(m_url)
 
         image_results = []
-        seen_hashes = set()
+        seen_hashes = {}
         for image in images:
             try:
                 media_store = get_media_store_dir()
                 _, _, _, fname, fhash = await _image_to_base64(image, media_store)
                 if fhash in seen_hashes:
                     (media_store / fname).unlink(missing_ok=True)
-                    continue
-                seen_hashes.add(fhash)
+                    fname = seen_hashes[fhash]
+                else:
+                    seen_hashes[fhash] = fname
                 img_url = f"{base_url}media/{fname}?token={get_media_token(fname)}"
                 title = getattr(image, "title", "Image")
                 image_results.append(f"![{title}]({img_url})")
@@ -1222,7 +1223,7 @@ def make_chunk(delta_content: dict) -> str:
                 logger.warning(f"Failed to process image in OpenAI stream: {exc}")
 
         media_results = []
-        seen_media_hashes = set()
+        seen_media_hashes = {}
         for media_item in media_items:
             try:
                 media_store = get_media_store_dir()
@@ -1232,15 +1233,21 @@ def make_chunk(delta_content: dict) -> str:
                 for mtype, (random_name, fhash) in m_dict.items():
                     if fhash in seen_media_hashes:
                         (media_store / random_name).unlink(missing_ok=True)
+                        existing_name = seen_media_hashes[fhash]
+                        m_urls[mtype] = (
+                            f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
+                        )
                         continue
-                    seen_media_hashes.add(fhash)
+                    seen_media_hashes[fhash] = random_name
                     m_urls[mtype] = (
                         f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                     )
 
                 title = getattr(media_item, "title", "Media")
-                video_url, video_thumb = m_urls.get("video"), m_urls.get("video_thumbnail")
-                audio_url, audio_thumb = m_urls.get("audio"), m_urls.get("audio_thumbnail")
+                video_url = m_urls.get("video")
+                audio_url = m_urls.get("audio")
+                video_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+                audio_thumb = m_urls.get("audio_thumbnail") or m_urls.get("video_thumbnail")
 
                 if video_url:
                     media_results.append(
@@ -1670,7 +1677,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
 
         image_items: list[ImageGenerationCall] = []
         final_response_contents: list[ResponseOutputContent] = []
-        seen_hashes = set()
+        seen_hashes = {}
 
         images = []
         seen_image_urls = set()
@@ -1695,8 +1702,10 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
             try:
                 b64, w, h, fname, fhash = await _image_to_base64(image, media_store)
                 if fhash in seen_hashes:
-                    continue
-                seen_hashes.add(fhash)
+                    (media_store / fname).unlink(missing_ok=True)
+                    b64, w, h, fname = seen_hashes[fhash]
+                else:
+                    seen_hashes[fhash] = (b64, w, h, fname)
 
                 parts = fname.rsplit(".", 1)
                 img_id = parts[0]
@@ -1739,7 +1748,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
             except Exception:
                 logger.warning("Image processing failed in stream")
 
-        seen_media_hashes = set()
+        seen_media_hashes = {}
         for media_item in media_items:
             try:
                 m_dict = await _media_to_local_file(media_item, media_store)
@@ -1748,15 +1757,21 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 for mtype, (random_name, fhash) in m_dict.items():
                     if fhash in seen_media_hashes:
                         (media_store / random_name).unlink(missing_ok=True)
+                        existing_name = seen_media_hashes[fhash]
+                        m_urls[mtype] = (
+                            f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
+                        )
                         continue
-                    seen_media_hashes.add(fhash)
+                    seen_media_hashes[fhash] = random_name
                     m_urls[mtype] = (
                         f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                     )
 
                 title = getattr(media_item, "title", "Media")
-                video_url, video_thumb = m_urls.get("video"), m_urls.get("video_thumbnail")
-                audio_url, audio_thumb = m_urls.get("audio"), m_urls.get("audio_thumbnail")
+                video_url = m_urls.get("video")
+                audio_url = m_urls.get("audio")
+                video_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+                audio_thumb = m_urls.get("audio_thumbnail") or m_urls.get("video_thumbnail")
 
                 md_parts = []
                 if video_url:
@@ -2003,7 +2018,7 @@ async def create_chat_completion(
         resp_or_stream.media or []
     )
     media_markdown = ""
-    seen_media_hashes = set()
+    seen_media_hashes = {}
     for m_item in media_items:
         try:
             m_dict = await _media_to_local_file(m_item, media_store)
@@ -2012,15 +2027,21 @@ async def create_chat_completion(
             for mtype, (random_name, fhash) in m_dict.items():
                 if fhash in seen_media_hashes:
                     (media_store / random_name).unlink(missing_ok=True)
+                    existing_name = seen_media_hashes[fhash]
+                    m_urls[mtype] = (
+                        f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
+                    )
                     continue
-                seen_media_hashes.add(fhash)
+                seen_media_hashes[fhash] = random_name
                 m_urls[mtype] = (
                     f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                 )
 
             title = getattr(m_item, "title", "Media")
-            video_url, video_thumb = m_urls.get("video"), m_urls.get("video_thumbnail")
-            audio_url, audio_thumb = m_urls.get("audio"), m_urls.get("audio_thumbnail")
+            video_url = m_urls.get("video")
+            audio_url = m_urls.get("audio")
+            video_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+            audio_thumb = m_urls.get("audio_thumbnail") or m_urls.get("video_thumbnail")
 
             md_parts = []
             if video_url:
@@ -2220,14 +2241,15 @@ async def create_response(
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
 
     contents, img_calls = [], []
-    seen_hashes = set()
+    seen_hashes = {}
     for img in images:
         try:
             b64, w, h, fname, fhash = await _image_to_base64(img, media_store)
             if fhash in seen_hashes:
                 (media_store / fname).unlink(missing_ok=True)
-                continue
-            seen_hashes.add(fhash)
+                b64, w, h, fname = seen_hashes[fhash]
+            else:
+                seen_hashes[fhash] = (b64, w, h, fname)
 
             parts = fname.rsplit(".", 1)
             img_id = parts[0]
@@ -2266,7 +2288,7 @@ async def create_response(
     media_items: list[GeneratedVideo | GeneratedMedia] = (resp_or_stream.videos or []) + (
         resp_or_stream.media or []
     )
-    seen_media_hashes = set()
+    seen_media_hashes = {}
     media_markdown = ""
     for m_item in media_items:
         try:
@@ -2276,15 +2298,21 @@ async def create_response(
             for mtype, (random_name, fhash) in m_dict.items():
                 if fhash in seen_media_hashes:
                     (media_store / random_name).unlink(missing_ok=True)
+                    existing_name = seen_media_hashes[fhash]
+                    m_urls[mtype] = (
+                        f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
+                    )
                     continue
-                seen_media_hashes.add(fhash)
+                seen_media_hashes[fhash] = random_name
                 m_urls[mtype] = (
                     f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                 )
 
             title = getattr(m_item, "title", "Media")
-            video_url, video_thumb = m_urls.get("video"), m_urls.get("video_thumbnail")
-            audio_url, audio_thumb = m_urls.get("audio"), m_urls.get("audio_thumbnail")
+            video_url = m_urls.get("video")
+            audio_url = m_urls.get("audio")
+            video_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+            audio_thumb = m_urls.get("audio_thumbnail") or m_urls.get("video_thumbnail")
 
             md_parts = []
             if video_url:

From af1b508bb4de3f48613d9a7e2f42fb821e356fa2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 17:44:05 +0700
Subject: [PATCH 191/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index f38ac9c..648394a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post253"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#2c55ed1e9be545e5fea11d7a9ab04d68a2058596" }
+version = "0.0.post254"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#8e390c25f75d156a6c0916057f0f5d92acb00239" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 0d4fa4c3c168dcae95bd920cd3873c367e692b33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 20:23:26 +0700
Subject: [PATCH 192/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index 648394a..5fdc3f0 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post254"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#8e390c25f75d156a6c0916057f0f5d92acb00239" }
+version = "0.0.post255"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#29907757372446a2ac9fa58090c035149332f7d7" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 95889afeff997de481dd3705d861af6342f7a21a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 21:00:48 +0700
Subject: [PATCH 193/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index 5fdc3f0..7e4c47e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post255"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#29907757372446a2ac9fa58090c035149332f7d7" }
+version = "0.0.post256"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#880acd7aaee4a78c6a2a3c19c96f04551c566f39" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 3d10fd53f5f2d278878502ce8c34f6ba911d0498 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 21:52:24 +0700
Subject: [PATCH 194/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index 7e4c47e..cc223d2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post256"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#880acd7aaee4a78c6a2a3c19c96f04551c566f39" }
+version = "0.0.post257"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#59d729862fa0a936ec7bbe879cfc82c51ae912ca" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From d92fa8fb4b9f1015c6c93107efa455f1de8c07c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 6 Mar 2026 22:33:18 +0700
Subject: [PATCH 195/236] Update the Markdown link handling to correctly
 include thumbnails.

---
 app/server/chat.py | 42 +++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index e429208..5db703c 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1246,26 +1246,25 @@ def make_chunk(delta_content: dict) -> str:
                 title = getattr(media_item, "title", "Media")
                 video_url = m_urls.get("video")
                 audio_url = m_urls.get("audio")
-                video_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-                audio_thumb = m_urls.get("audio_thumbnail") or m_urls.get("video_thumbnail")
+                video_thumb = m_urls.get("video_thumbnail")
+                audio_thumb = m_urls.get("audio_thumbnail")
 
+                md_parts = []
                 if video_url:
-                    media_results.append(
+                    md_parts.append(
                         f"[![{title}]({video_thumb})]({video_url})"
                         if video_thumb
                         else f"[{title}]({video_url})"
                     )
-                elif video_thumb:
-                    media_results.append(f"![{title}]({video_thumb})")
 
                 if audio_url:
-                    media_results.append(
+                    md_parts.append(
                         f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
                         if audio_thumb
                         else f"[{title} (Audio)]({audio_url})"
                     )
-                elif audio_thumb:
-                    media_results.append(f"![{title} (Audio)]({audio_thumb})")
+                if md_parts:
+                    media_results.append("\n\n".join(md_parts))
             except Exception as exc:
                 logger.warning(f"Failed to process media in OpenAI stream: {exc}")
 
@@ -1770,8 +1769,8 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 title = getattr(media_item, "title", "Media")
                 video_url = m_urls.get("video")
                 audio_url = m_urls.get("audio")
-                video_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-                audio_thumb = m_urls.get("audio_thumbnail") or m_urls.get("video_thumbnail")
+                video_thumb = m_urls.get("video_thumbnail")
+                audio_thumb = m_urls.get("audio_thumbnail")
 
                 md_parts = []
                 if video_url:
@@ -1780,8 +1779,6 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                         if video_thumb
                         else f"[{title}]({video_url})"
                     )
-                elif video_thumb:
-                    md_parts.append(f"![{title}]({video_thumb})")
 
                 if audio_url:
                     md_parts.append(
@@ -1789,8 +1786,6 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                         if audio_thumb
                         else f"[{title} (Audio)]({audio_url})"
                     )
-                elif audio_thumb:
-                    md_parts.append(f"![{title} (Audio)]({audio_thumb})")
 
                 media_md = "\n\n".join(md_parts)
 
@@ -2040,8 +2035,8 @@ async def create_chat_completion(
             title = getattr(m_item, "title", "Media")
             video_url = m_urls.get("video")
             audio_url = m_urls.get("audio")
-            video_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-            audio_thumb = m_urls.get("audio_thumbnail") or m_urls.get("video_thumbnail")
+            video_thumb = m_urls.get("video_thumbnail")
+            audio_thumb = m_urls.get("audio_thumbnail")
 
             md_parts = []
             if video_url:
@@ -2050,8 +2045,6 @@ async def create_chat_completion(
                     if video_thumb
                     else f"[{title}]({video_url})"
                 )
-            elif video_thumb:
-                md_parts.append(f"![{title}]({video_thumb})")
 
             if audio_url:
                 md_parts.append(
@@ -2059,11 +2052,10 @@ async def create_chat_completion(
                     if audio_thumb
                     else f"[{title} (Audio)]({audio_url})"
                 )
-            elif audio_thumb:
-                md_parts.append(f"![{title} (Audio)]({audio_thumb})")
 
             if md_parts:
-                media_markdown += "\n\n" + "\n\n".join(md_parts)
+                m_md = "\n\n".join(md_parts)
+                media_markdown += f"\n\n{m_md}"
         except Exception as exc:
             logger.warning(f"Failed to process media in OpenAI response: {exc}")
 
@@ -2311,8 +2303,8 @@ async def create_response(
             title = getattr(m_item, "title", "Media")
             video_url = m_urls.get("video")
             audio_url = m_urls.get("audio")
-            video_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-            audio_thumb = m_urls.get("audio_thumbnail") or m_urls.get("video_thumbnail")
+            video_thumb = m_urls.get("video_thumbnail")
+            audio_thumb = m_urls.get("audio_thumbnail")
 
             md_parts = []
             if video_url:
@@ -2321,8 +2313,6 @@ async def create_response(
                     if video_thumb
                     else f"[{title}]({video_url})"
                 )
-            elif video_thumb:
-                md_parts.append(f"![{title}]({video_thumb})")
 
             if audio_url:
                 md_parts.append(
@@ -2330,8 +2320,6 @@ async def create_response(
                     if audio_thumb
                     else f"[{title} (Audio)]({audio_url})"
                 )
-            elif audio_thumb:
-                md_parts.append(f"![{title} (Audio)]({audio_thumb})")
 
             if md_parts:
                 m_md = "\n\n".join(md_parts)

From d6920ff177dc07b8e7807e12b6ff229424945e2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 7 Mar 2026 08:35:27 +0700
Subject: [PATCH 196/236] Update the Markdown link handling to correctly
 include thumbnails.

---
 app/server/chat.py | 63 ++++++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 22 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 5db703c..15c8efd 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -138,27 +138,47 @@ async def _media_to_local_file(
     try:
         saved_paths = await media.save(path=str(temp_dir))
     except Exception as e:
-        logger.warning(f"Failed to save media: {e}")
+        logger.warning(f"Failed to save media: {e}", exc_info=True)
         return {}
 
+    default_extensions = {
+        "video": ".mp4",
+        "audio": ".mp3",
+        "video_thumbnail": ".jpg",
+        "audio_thumbnail": ".jpg",
+    }
+
     results = {}
+    path_map = {}
+
     for mtype, spath in saved_paths.items():
         if not spath:
             continue
+        try:
+            original_path = Path(spath)
+            if not original_path.exists():
+                if spath in path_map:
+                    results[mtype] = path_map[spath]
+                continue
 
-        original_path = Path(spath)
-        data = original_path.read_bytes()
-        suffix = original_path.suffix
+            if spath in path_map:
+                results[mtype] = path_map[spath]
+                continue
 
-        if not suffix:
-            suffix = ".mp4" if "video" in mtype else ".mp3"
+            data = original_path.read_bytes()
+            suffix = original_path.suffix
+            if not suffix:
+                suffix = default_extensions.get(mtype) or (".mp4" if "video" in mtype else ".mp3")
 
-        random_name = f"media_{uuid.uuid4().hex}{suffix}"
-        new_path = temp_dir / random_name
-        original_path.rename(new_path)
+            random_name = f"media_{uuid.uuid4().hex}{suffix}"
+            new_path = temp_dir / random_name
+            original_path.rename(new_path)
 
-        fhash = hashlib.sha256(data).hexdigest()
-        results[mtype] = (random_name, fhash)
+            fhash = hashlib.sha256(data).hexdigest()
+            results[mtype] = (random_name, fhash)
+            path_map[spath] = (random_name, fhash)
+        except Exception as e:
+            logger.warning(f"Error processing {mtype} at {spath}: {e}")
 
     return results
 
@@ -1674,13 +1694,13 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
             )
             current_index += 1
 
-        image_items: list[ImageGenerationCall] = []
-        final_response_contents: list[ResponseOutputContent] = []
+        image_items = []
+        final_response_contents = []
         seen_hashes = {}
 
         images = []
         seen_image_urls = set()
-        media_items: list[GeneratedVideo | GeneratedMedia] = []
+        media_items = []
         seen_media_urls = set()
 
         for out in all_outputs:
@@ -1731,7 +1751,6 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                         "item": img_item.model_dump(mode="json"),
                     },
                 )
-
                 yield make_event(
                     "response.output_item.done",
                     {
@@ -1744,8 +1763,8 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 current_index += 1
                 image_items.append(img_item)
                 storage_output += f"\n\n{image_url}"
-            except Exception:
-                logger.warning("Image processing failed in stream")
+            except Exception as e:
+                logger.warning(f"Image processing failed in stream: {e}")
 
         seen_media_hashes = {}
         for media_item in media_items:
@@ -1988,17 +2007,17 @@ async def create_chat_completion(
         thoughts, raw_clean, structured_requirement
     )
 
-    # Process images for OpenAI non-streaming flow
     images = resp_or_stream.images or []
     image_markdown = ""
-    seen_hashes = set()
+    seen_hashes = {}
     for image in images:
         try:
-            _, _, _, fname, fhash = await _image_to_base64(image, media_store)
+            b64, w, h, fname, fhash = await _image_to_base64(image, media_store)
             if fhash in seen_hashes:
                 (media_store / fname).unlink(missing_ok=True)
-                continue
-            seen_hashes.add(fhash)
+                b64, w, h, fname = seen_hashes[fhash]
+            else:
+                seen_hashes[fhash] = (b64, w, h, fname)
 
             img_url = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
             image_markdown += f"\n\n{img_url}"

From d85552d88839ab233875a7a7facb17069d6bccb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 7 Mar 2026 09:34:35 +0700
Subject: [PATCH 197/236] Update the Markdown link handling to correctly
 include thumbnails.

---
 app/server/chat.py | 84 ++++++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 32 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 15c8efd..e01aa76 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1244,16 +1244,19 @@ def make_chunk(delta_content: dict) -> str:
 
         media_results = []
         seen_media_hashes = {}
+        last_thumb_url = None
         for media_item in media_items:
             try:
                 media_store = get_media_store_dir()
                 m_dict = await _media_to_local_file(media_item, media_store)
+                logger.debug(f"Media processing keys: {list(m_dict.keys())}")
 
                 m_urls = {}
                 for mtype, (random_name, fhash) in m_dict.items():
                     if fhash in seen_media_hashes:
-                        (media_store / random_name).unlink(missing_ok=True)
                         existing_name = seen_media_hashes[fhash]
+                        if random_name != existing_name:
+                            (media_store / random_name).unlink(missing_ok=True)
                         m_urls[mtype] = (
                             f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
                         )
@@ -1263,25 +1266,27 @@ def make_chunk(delta_content: dict) -> str:
                         f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                     )
 
+                logger.debug(f"Media processing m_urls: {m_urls}")
                 title = getattr(media_item, "title", "Media")
                 video_url = m_urls.get("video")
                 audio_url = m_urls.get("audio")
-                video_thumb = m_urls.get("video_thumbnail")
-                audio_thumb = m_urls.get("audio_thumbnail")
+                item_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+                if item_thumb:
+                    last_thumb_url = item_thumb
 
                 md_parts = []
                 if video_url:
                     md_parts.append(
-                        f"[![{title}]({video_thumb})]({video_url})"
-                        if video_thumb
+                        f"[![{title}]({last_thumb_url})]({video_url})"
+                        if last_thumb_url
                         else f"[{title}]({video_url})"
                     )
 
                 if audio_url:
                     md_parts.append(
-                        f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
-                        if audio_thumb
-                        else f"[{title} (Audio)]({audio_url})"
+                        f"[![{title} - Audio]({last_thumb_url})]({audio_url})"
+                        if last_thumb_url
+                        else f"[{title} - Audio]({audio_url})"
                     )
                 if md_parts:
                     media_results.append("\n\n".join(md_parts))
@@ -1767,15 +1772,18 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 logger.warning(f"Image processing failed in stream: {e}")
 
         seen_media_hashes = {}
+        last_thumb_url = None
         for media_item in media_items:
             try:
                 m_dict = await _media_to_local_file(media_item, media_store)
+                logger.debug(f"Media processing m_dict: {m_dict}")
 
                 m_urls = {}
                 for mtype, (random_name, fhash) in m_dict.items():
                     if fhash in seen_media_hashes:
-                        (media_store / random_name).unlink(missing_ok=True)
                         existing_name = seen_media_hashes[fhash]
+                        if random_name != existing_name:
+                            (media_store / random_name).unlink(missing_ok=True)
                         m_urls[mtype] = (
                             f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
                         )
@@ -1785,25 +1793,27 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                         f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                     )
 
+                logger.debug(f"Media processing m_urls: {m_urls}")
                 title = getattr(media_item, "title", "Media")
                 video_url = m_urls.get("video")
                 audio_url = m_urls.get("audio")
-                video_thumb = m_urls.get("video_thumbnail")
-                audio_thumb = m_urls.get("audio_thumbnail")
+                item_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+                if item_thumb:
+                    last_thumb_url = item_thumb
 
                 md_parts = []
                 if video_url:
                     md_parts.append(
-                        f"[![{title}]({video_thumb})]({video_url})"
-                        if video_thumb
+                        f"[![{title}]({last_thumb_url})]({video_url})"
+                        if last_thumb_url
                         else f"[{title}]({video_url})"
                     )
 
                 if audio_url:
                     md_parts.append(
-                        f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
-                        if audio_thumb
-                        else f"[{title} (Audio)]({audio_url})"
+                        f"[![{title} - Audio]({last_thumb_url})]({audio_url})"
+                        if last_thumb_url
+                        else f"[{title} - Audio]({audio_url})"
                     )
 
                 media_md = "\n\n".join(md_parts)
@@ -2033,15 +2043,18 @@ async def create_chat_completion(
     )
     media_markdown = ""
     seen_media_hashes = {}
+    last_thumb_url = None
     for m_item in media_items:
         try:
             m_dict = await _media_to_local_file(m_item, media_store)
+            logger.debug(f"Media processing m_dict: {m_dict}")
 
             m_urls = {}
             for mtype, (random_name, fhash) in m_dict.items():
                 if fhash in seen_media_hashes:
-                    (media_store / random_name).unlink(missing_ok=True)
                     existing_name = seen_media_hashes[fhash]
+                    if random_name != existing_name:
+                        (media_store / random_name).unlink(missing_ok=True)
                     m_urls[mtype] = (
                         f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
                     )
@@ -2051,25 +2064,27 @@ async def create_chat_completion(
                     f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                 )
 
+            logger.debug(f"Media processing m_urls: {m_urls}")
             title = getattr(m_item, "title", "Media")
             video_url = m_urls.get("video")
             audio_url = m_urls.get("audio")
-            video_thumb = m_urls.get("video_thumbnail")
-            audio_thumb = m_urls.get("audio_thumbnail")
+            item_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+            if item_thumb:
+                last_thumb_url = item_thumb
 
             md_parts = []
             if video_url:
                 md_parts.append(
-                    f"[![{title}]({video_thumb})]({video_url})"
-                    if video_thumb
+                    f"[![{title}]({last_thumb_url})]({video_url})"
+                    if last_thumb_url
                     else f"[{title}]({video_url})"
                 )
 
             if audio_url:
                 md_parts.append(
-                    f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
-                    if audio_thumb
-                    else f"[{title} (Audio)]({audio_url})"
+                    f"[![{title} - Audio]({last_thumb_url})]({audio_url})"
+                    if last_thumb_url
+                    else f"[{title} - Audio]({audio_url})"
                 )
 
             if md_parts:
@@ -2301,15 +2316,18 @@ async def create_response(
     )
     seen_media_hashes = {}
     media_markdown = ""
+    last_thumb_url = None
     for m_item in media_items:
         try:
             m_dict = await _media_to_local_file(m_item, media_store)
+            logger.debug(f"Media processing m_dict: {m_dict}")
 
             m_urls = {}
             for mtype, (random_name, fhash) in m_dict.items():
                 if fhash in seen_media_hashes:
-                    (media_store / random_name).unlink(missing_ok=True)
                     existing_name = seen_media_hashes[fhash]
+                    if random_name != existing_name:
+                        (media_store / random_name).unlink(missing_ok=True)
                     m_urls[mtype] = (
                         f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
                     )
@@ -2319,25 +2337,27 @@ async def create_response(
                     f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                 )
 
+            logger.debug(f"Media processing m_urls: {m_urls}")
             title = getattr(m_item, "title", "Media")
             video_url = m_urls.get("video")
             audio_url = m_urls.get("audio")
-            video_thumb = m_urls.get("video_thumbnail")
-            audio_thumb = m_urls.get("audio_thumbnail")
+            item_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
+            if item_thumb:
+                last_thumb_url = item_thumb
 
             md_parts = []
             if video_url:
                 md_parts.append(
-                    f"[![{title}]({video_thumb})]({video_url})"
-                    if video_thumb
+                    f"[![{title}]({last_thumb_url})]({video_url})"
+                    if last_thumb_url
                     else f"[{title}]({video_url})"
                 )
 
             if audio_url:
                 md_parts.append(
-                    f"[![{title} (Audio)]({audio_thumb})]({audio_url})"
-                    if audio_thumb
-                    else f"[{title} (Audio)]({audio_url})"
+                    f"[![{title} - Audio]({last_thumb_url})]({audio_url})"
+                    if last_thumb_url
+                    else f"[{title} - Audio]({audio_url})"
                 )
 
             if md_parts:

From 9dfc866a7251c578c57993bac7b9e0efc44e9615 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 7 Mar 2026 10:55:25 +0700
Subject: [PATCH 198/236] Update the Markdown link handling to correctly
 include thumbnails.

---
 app/server/chat.py | 80 ++++++++++++++++++++++++++++------------------
 1 file changed, 49 insertions(+), 31 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index e01aa76..4d344bf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1206,25 +1206,38 @@ def make_chunk(delta_content: dict) -> str:
             full_thoughts, full_text, structured_requirement
         )
 
-        images = []
-        seen_image_urls = set()
-        media_items: list[GeneratedVideo | GeneratedMedia] = []
-        seen_media_urls = set()
+        image_map = {}
+        media_items = []
+        media_url_to_idx = {}
 
         for out in all_outputs:
-            if out.images:
-                for img in out.images:
-                    if img.url not in seen_image_urls:
-                        images.append(img)
-                        seen_image_urls.add(img.url)
+            for img in out.images or []:
+                if img.url:
+                    image_map[img.url] = img
 
             m_list = (out.videos or []) + (out.media or [])
             for m in m_list:
-                m_url = getattr(m, "url", None) or getattr(m, "mp3_url", None)
-                if m_url and m_url not in seen_media_urls:
+                v_url = getattr(m, "url", None)
+                a_url = getattr(m, "mp3_url", None)
+                if not v_url and not a_url:
+                    continue
+
+                idx = (media_url_to_idx.get(v_url) if v_url else None) or (
+                    media_url_to_idx.get(a_url) if a_url else None
+                )
+
+                if idx is not None:
+                    media_items[idx] = m
+                else:
+                    idx = len(media_items)
                     media_items.append(m)
-                    seen_media_urls.add(m_url)
 
+                if v_url:
+                    media_url_to_idx[v_url] = idx
+                if a_url:
+                    media_url_to_idx[a_url] = idx
+
+        images = list(image_map.values())
         image_results = []
         seen_hashes = {}
         for image in images:
@@ -1249,7 +1262,6 @@ def make_chunk(delta_content: dict) -> str:
             try:
                 media_store = get_media_store_dir()
                 m_dict = await _media_to_local_file(media_item, media_store)
-                logger.debug(f"Media processing keys: {list(m_dict.keys())}")
 
                 m_urls = {}
                 for mtype, (random_name, fhash) in m_dict.items():
@@ -1266,7 +1278,6 @@ def make_chunk(delta_content: dict) -> str:
                         f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                     )
 
-                logger.debug(f"Media processing m_urls: {m_urls}")
                 title = getattr(media_item, "title", "Media")
                 video_url = m_urls.get("video")
                 audio_url = m_urls.get("audio")
@@ -1703,25 +1714,38 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
         final_response_contents = []
         seen_hashes = {}
 
-        images = []
-        seen_image_urls = set()
+        image_map = {}
         media_items = []
-        seen_media_urls = set()
+        media_url_to_idx = {}
 
         for out in all_outputs:
-            if out.images:
-                for img in out.images:
-                    if img.url not in seen_image_urls:
-                        images.append(img)
-                        seen_image_urls.add(img.url)
+            for img in out.images or []:
+                if img.url:
+                    image_map[img.url] = img
 
             m_list = (out.videos or []) + (out.media or [])
             for m in m_list:
-                m_url = getattr(m, "url", None) or getattr(m, "mp3_url", None)
-                if m_url and m_url not in seen_media_urls:
+                v_url = getattr(m, "url", None)
+                a_url = getattr(m, "mp3_url", None)
+                if not v_url and not a_url:
+                    continue
+
+                idx = (media_url_to_idx.get(v_url) if v_url else None) or (
+                    media_url_to_idx.get(a_url) if a_url else None
+                )
+
+                if idx is not None:
+                    media_items[idx] = m
+                else:
+                    idx = len(media_items)
                     media_items.append(m)
-                    seen_media_urls.add(m_url)
 
+                if v_url:
+                    media_url_to_idx[v_url] = idx
+                if a_url:
+                    media_url_to_idx[a_url] = idx
+
+        images = list(image_map.values())
         for image in images:
             try:
                 b64, w, h, fname, fhash = await _image_to_base64(image, media_store)
@@ -1776,7 +1800,6 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
         for media_item in media_items:
             try:
                 m_dict = await _media_to_local_file(media_item, media_store)
-                logger.debug(f"Media processing m_dict: {m_dict}")
 
                 m_urls = {}
                 for mtype, (random_name, fhash) in m_dict.items():
@@ -1793,7 +1816,6 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                         f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                     )
 
-                logger.debug(f"Media processing m_urls: {m_urls}")
                 title = getattr(media_item, "title", "Media")
                 video_url = m_urls.get("video")
                 audio_url = m_urls.get("audio")
@@ -2047,7 +2069,6 @@ async def create_chat_completion(
     for m_item in media_items:
         try:
             m_dict = await _media_to_local_file(m_item, media_store)
-            logger.debug(f"Media processing m_dict: {m_dict}")
 
             m_urls = {}
             for mtype, (random_name, fhash) in m_dict.items():
@@ -2064,7 +2085,6 @@ async def create_chat_completion(
                     f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                 )
 
-            logger.debug(f"Media processing m_urls: {m_urls}")
             title = getattr(m_item, "title", "Media")
             video_url = m_urls.get("video")
             audio_url = m_urls.get("audio")
@@ -2320,7 +2340,6 @@ async def create_response(
     for m_item in media_items:
         try:
             m_dict = await _media_to_local_file(m_item, media_store)
-            logger.debug(f"Media processing m_dict: {m_dict}")
 
             m_urls = {}
             for mtype, (random_name, fhash) in m_dict.items():
@@ -2337,7 +2356,6 @@ async def create_response(
                     f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
                 )
 
-            logger.debug(f"Media processing m_urls: {m_urls}")
             title = getattr(m_item, "title", "Media")
             video_url = m_urls.get("video")
             audio_url = m_urls.get("audio")

From 1f11c9547c1f93e63fc3dade685ee923d1c06533 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Mar 2026 22:13:52 +0700
Subject: [PATCH 199/236] Resolve mismatch of reusable sessions

---
 app/server/chat.py   | 34 +++++++++++++++++++++-------------
 app/services/lmdb.py |  1 -
 app/utils/config.py  |  2 +-
 config/config.yaml   |  2 +-
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 4d344bf..25a1d6a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -77,7 +77,7 @@
 )
 
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
-METADATA_TTL_MINUTES = 15
+METADATA_TTL_MINUTES = 60
 
 router = APIRouter()
 
@@ -1188,8 +1188,8 @@ def make_chunk(delta_content: dict) -> str:
                             {"delta": {"content": visible_delta}, "finish_reason": None}
                         )
         except Exception as e:
-            logger.exception(f"Error during OpenAI streaming: {e}")
-            yield f"data: {orjson.dumps({'error': {'message': 'Streaming error occurred.', 'type': 'server_error', 'param': None, 'code': None}}).decode('utf-8')}\n\n"
+            logger.exception(f"Error during streaming: {e}")
+            yield f"data: {orjson.dumps({'error': {'message': f'Streaming error occurred: {e}', 'type': 'server_error', 'param': None, 'code': None}}).decode('utf-8')}\n\n"
             return
 
         if all_outputs:
@@ -1305,6 +1305,7 @@ def make_chunk(delta_content: dict) -> str:
                 logger.warning(f"Failed to process media in OpenAI stream: {exc}")
 
         for image_url in image_results:
+            storage_output += f"\n\n{image_url}"
             yield make_chunk(
                 {
                     "delta": {"content": f"\n\n{image_url}"},
@@ -1313,6 +1314,7 @@ def make_chunk(delta_content: dict) -> str:
             )
 
         for media_md in media_results:
+            storage_output += f"\n\n{media_md}"
             yield make_chunk(
                 {
                     "delta": {"content": f"\n\n{media_md}"},
@@ -1596,11 +1598,15 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                             },
                         )
 
-        except Exception:
-            logger.exception("Responses streaming error")
+        except Exception as e:
+            logger.exception(f"Error during streaming: {e}")
             yield make_event(
                 "error",
-                {**base_event, "type": "error", "error": {"message": "Streaming error."}},
+                {
+                    **base_event,
+                    "type": "error",
+                    "error": {"message": f"Streaming error occurred: {e}"},
+                },
             )
             return
 
@@ -1766,9 +1772,10 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                     size=f"{w}x{h}" if w and h else None,
                 )
 
-                image_url = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
+                img_link = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
+                image_url_with_newline = f"\n\n{img_link}"
                 final_response_contents.append(
-                    ResponseOutputText(type="output_text", text=image_url)
+                    ResponseOutputText(type="output_text", text=image_url_with_newline)
                 )
 
                 yield make_event(
@@ -1791,7 +1798,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 )
                 current_index += 1
                 image_items.append(img_item)
-                storage_output += f"\n\n{image_url}"
+                storage_output += image_url_with_newline
             except Exception as e:
                 logger.warning(f"Image processing failed in stream: {e}")
 
@@ -1841,10 +1848,11 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 media_md = "\n\n".join(md_parts)
 
                 if media_md:
+                    media_md_with_newline = f"\n\n{media_md}"
                     final_response_contents.append(
-                        ResponseOutputText(type="output_text", text=media_md)
+                        ResponseOutputText(type="output_text", text=media_md_with_newline)
                     )
-                    storage_output += f"\n\n{media_md}"
+                    storage_output += media_md_with_newline
             except Exception:
                 logger.warning("Media processing failed in stream")
 
@@ -2329,7 +2337,7 @@ async def create_response(
 
     if image_markdown:
         storage_output += image_markdown
-        contents.append(ResponseOutputText(type="output_text", text=image_markdown.strip()))
+        contents.append(ResponseOutputText(type="output_text", text=image_markdown))
 
     media_items: list[GeneratedVideo | GeneratedMedia] = (resp_or_stream.videos or []) + (
         resp_or_stream.media or []
@@ -2386,7 +2394,7 @@ async def create_response(
 
     if media_markdown:
         storage_output += media_markdown
-        contents.append(ResponseOutputText(type="output_text", text=media_markdown.strip()))
+        contents.append(ResponseOutputText(type="output_text", text=media_markdown))
 
     p_tok, c_tok, t_tok, r_tok = _calculate_usage(messages, assistant_text, tool_calls, thoughts)
     usage = ResponseUsage(
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 5a32089..2f45193 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -43,7 +43,6 @@ def _normalize_text(text: str | None, fuzzy: bool = False) -> str | None:
 
     text = normalize_llm_text(text)
     text = unescape_text(text)
-
     text = remove_tool_call_blocks(text)
 
     if fuzzy:
diff --git a/app/utils/config.py b/app/utils/config.py
index a52bab5..03e6f6d 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -82,7 +82,7 @@ class GeminiConfig(BaseModel):
         default="append",
         description="Strategy for loading models: 'append' merges custom with default, 'overwrite' uses only custom",
     )
-    timeout: int = Field(default=600, ge=30, description="Init timeout in seconds")
+    timeout: int = Field(default=450, ge=30, description="Init timeout in seconds")
     watchdog_timeout: int = Field(default=90, ge=30, description="Watchdog timeout in seconds")
     auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
     refresh_interval: int = Field(
diff --git a/config/config.yaml b/config/config.yaml
index 746be67..bfc9306 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -22,7 +22,7 @@ gemini:
       secure_1psid: "YOUR_SECURE_1PSID_HERE"
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null          # Optional proxy URL (null/empty means direct connection)
-  timeout: 600             # Init timeout in seconds (Not less than 30s)
+  timeout: 450             # Init timeout in seconds (Not less than 30s)
   watchdog_timeout: 90     # Watchdog timeout in seconds (Not less than 30s)
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 600    # Refresh interval in seconds (Not less than 60s)

From bd24c53d7f44820703e6b5e852a3859d54a455f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sun, 8 Mar 2026 22:14:22 +0700
Subject: [PATCH 200/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index cc223d2..aba96fe 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post257"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#59d729862fa0a936ec7bbe879cfc82c51ae912ca" }
+version = "0.0.post258"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#4fd2ee6c4e880f693768d32c73220e89661fcc12" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From cfd96d5d5d73aefa8bdb7e4add7ab576c91cbc73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Mar 2026 13:22:42 +0700
Subject: [PATCH 201/236] Resolve the issue where downloading media takes too
 long and results in a timeout.

---
 app/server/chat.py | 934 +++++++++++++++++++++++++--------------------
 1 file changed, 529 insertions(+), 405 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 25a1d6a..e0df9c6 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1,3 +1,4 @@
+import asyncio
 import base64
 import hashlib
 import io
@@ -137,8 +138,11 @@ async def _media_to_local_file(
     """Persist media and return dict mapping type to (filename, hash)"""
     try:
         saved_paths = await media.save(path=str(temp_dir))
+        if not saved_paths:
+            logger.warning("No files saved from media object.")
+            return {}
     except Exception as e:
-        logger.warning(f"Failed to save media: {e}", exc_info=True)
+        logger.error(f"Failed to save media: {e}")
         return {}
 
     default_extensions = {
@@ -224,10 +228,12 @@ def _create_responses_standard_payload(
     usage: ResponseUsage,
     request: ResponseCreateRequest,
     full_thoughts: str | None = None,
+    message_id: str | None = None,
+    reason_id: str | None = None,
 ) -> ResponseCreateResponse:
     """Unified factory for building ResponseCreateResponse objects."""
-    message_id = f"msg_{uuid.uuid4().hex[:24]}"
-    reason_id = f"rs_{uuid.uuid4().hex[:24]}"
+    message_id = message_id or f"msg_{uuid.uuid4().hex[:24]}"
+    reason_id = reason_id or f"rs_{uuid.uuid4().hex[:24]}"
     now_ts = int(datetime.now(tz=UTC).timestamp())
 
     output_items: list[Any] = []
@@ -241,15 +247,16 @@ def _create_responses_standard_payload(
             )
         )
 
-    output_items.append(
-        ResponseOutputMessage(
-            id=message_id,
-            type="message",
-            status="completed",
-            role="assistant",
-            content=response_contents,
+    if response_contents or not (detected_tool_calls or image_call_items):
+        output_items.append(
+            ResponseOutputMessage(
+                id=message_id,
+                type="message",
+                status="completed",
+                role="assistant",
+                content=response_contents,
+            )
         )
-    )
 
     if detected_tool_calls:
         output_items.extend(
@@ -1009,7 +1016,7 @@ async def _send_with_split(
                 return session.send_message_stream(text, files=files)
             return await session.send_message(text, files=files)
         except Exception as e:
-            logger.exception(f"Error sending message to Gemini: {e}")
+            logger.error(f"Error sending message to Gemini: {e}")
             raise
 
     logger.info(
@@ -1031,7 +1038,7 @@ async def _send_with_split(
             return session.send_message_stream(instruction, files=final_files)
         return await session.send_message(instruction, files=final_files)
     except Exception as e:
-        logger.exception(f"Error sending large text as file to Gemini: {e}")
+        logger.error(f"Error sending large text as file to Gemini: {e}")
         raise
 
 
@@ -1121,6 +1128,29 @@ def flush(self) -> str:
         return strip_system_hints(res)
 
 
+# --- Media Processing Helpers ---
+
+
+async def _process_image_item(image: Image):
+    """Process an image item by converting it to base64 and returning a standard result tuple."""
+    try:
+        media_store = get_media_store_dir()
+        return "image", image, await _image_to_base64(image, media_store)
+    except Exception as exc:
+        logger.warning(f"Background image processing failed: {exc}")
+        return None
+
+
+async def _process_media_item(media_item: GeneratedVideo | GeneratedMedia):
+    """Process a media item by saving it to a local file and returning a standard result tuple."""
+    try:
+        media_store = get_media_store_dir()
+        return "media", media_item, await _media_to_local_file(media_item, media_store)
+    except Exception as exc:
+        logger.warning(f"Background media processing failed: {exc}")
+        return None
+
+
 # --- Response Builders & Streaming ---
 
 
@@ -1143,11 +1173,16 @@ def _create_real_streaming_response(
     """
 
     async def generate_stream():
-        full_thoughts, full_text = "", ""
+        full_text = ""
+        full_thoughts = ""
         has_started = False
         all_outputs: list[ModelOutput] = []
         suppressor = StreamingOutputFilter()
 
+        media_tasks = []
+        seen_media_urls = set()
+        seen_image_urls = set()
+
         async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
             yield item
 
@@ -1187,8 +1222,20 @@ def make_chunk(delta_content: dict) -> str:
                         yield make_chunk(
                             {"delta": {"content": visible_delta}, "finish_reason": None}
                         )
+
+                for img in chunk.images or []:
+                    if img.url and img.url not in seen_image_urls:
+                        seen_image_urls.add(img.url)
+                        media_tasks.append(asyncio.create_task(_process_image_item(img)))
+
+                m_list = (chunk.videos or []) + (chunk.media or [])
+                for m in m_list:
+                    p_url = getattr(m, "url", None) or getattr(m, "mp3_url", None)
+                    if p_url and p_url not in seen_media_urls:
+                        seen_media_urls.add(p_url)
+                        media_tasks.append(asyncio.create_task(_process_media_item(m)))
         except Exception as e:
-            logger.exception(f"Error during streaming: {e}")
+            logger.error(f"Error during streaming: {e}")
             yield f"data: {orjson.dumps({'error': {'message': f'Streaming error occurred: {e}', 'type': 'server_error', 'param': None, 'code': None}}).decode('utf-8')}\n\n"
             return
 
@@ -1202,125 +1249,93 @@ def make_chunk(delta_content: dict) -> str:
         if remaining_text := suppressor.flush():
             yield make_chunk({"delta": {"content": remaining_text}, "finish_reason": None})
 
-        _thoughts, assistant_text, storage_output, detected_tool_calls = _process_llm_output(
+        _, _, storage_output, detected_tool_calls = _process_llm_output(
             full_thoughts, full_text, structured_requirement
         )
 
-        image_map = {}
-        media_items = []
-        media_url_to_idx = {}
+        seen_hashes = {}
+        seen_media_hashes = {}
+        media_store = get_media_store_dir()
 
-        for out in all_outputs:
-            for img in out.images or []:
-                if img.url:
-                    image_map[img.url] = img
+        if media_tasks:
+            logger.debug(f"Waiting for {len(media_tasks)} background media tasks with heartbeat...")
+            while media_tasks:
+                done, pending = await asyncio.wait(
+                    media_tasks, timeout=5.0, return_when=asyncio.FIRST_COMPLETED
+                )
+                media_tasks = list(pending)
 
-            m_list = (out.videos or []) + (out.media or [])
-            for m in m_list:
-                v_url = getattr(m, "url", None)
-                a_url = getattr(m, "mp3_url", None)
-                if not v_url and not a_url:
+                if not done:
+                    yield ": ping\n\n"
                     continue
 
-                idx = (media_url_to_idx.get(v_url) if v_url else None) or (
-                    media_url_to_idx.get(a_url) if a_url else None
-                )
-
-                if idx is not None:
-                    media_items[idx] = m
-                else:
-                    idx = len(media_items)
-                    media_items.append(m)
+                for task in done:
+                    res = task.result()
+                    if not res:
+                        continue
 
-                if v_url:
-                    media_url_to_idx[v_url] = idx
-                if a_url:
-                    media_url_to_idx[a_url] = idx
+                    rtype, original_item, media_data = res
+                    if rtype == "image":
+                        _, _, _, fname, fhash = media_data
+                        if fhash in seen_hashes:
+                            (media_store / fname).unlink(missing_ok=True)
+                            fname = seen_hashes[fhash]
+                        else:
+                            seen_hashes[fhash] = fname
+
+                        img_url = f"{base_url}media/{fname}?token={get_media_token(fname)}"
+                        title = getattr(original_item, "title", "Image")
+                        md = f"![{title}]({img_url})"
+                        storage_output += f"\n\n{md}"
+                        yield make_chunk({"delta": {"content": f"\n\n{md}"}, "finish_reason": None})
+
+                    elif rtype == "media":
+                        m_dict = media_data
+                        if not m_dict:
+                            continue
+
+                        m_urls = {}
+                        for mtype, (random_name, fhash) in m_dict.items():
+                            if fhash in seen_media_hashes:
+                                existing_name = seen_media_hashes[fhash]
+                                if random_name != existing_name:
+                                    (media_store / random_name).unlink(missing_ok=True)
+                                m_urls[mtype] = (
+                                    f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
+                                )
+                            else:
+                                seen_media_hashes[fhash] = random_name
+                                m_urls[mtype] = (
+                                    f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
+                                )
 
-        images = list(image_map.values())
-        image_results = []
-        seen_hashes = {}
-        for image in images:
-            try:
-                media_store = get_media_store_dir()
-                _, _, _, fname, fhash = await _image_to_base64(image, media_store)
-                if fhash in seen_hashes:
-                    (media_store / fname).unlink(missing_ok=True)
-                    fname = seen_hashes[fhash]
-                else:
-                    seen_hashes[fhash] = fname
-                img_url = f"{base_url}media/{fname}?token={get_media_token(fname)}"
-                title = getattr(image, "title", "Image")
-                image_results.append(f"![{title}]({img_url})")
-            except Exception as exc:
-                logger.warning(f"Failed to process image in OpenAI stream: {exc}")
-
-        media_results = []
-        seen_media_hashes = {}
-        last_thumb_url = None
-        for media_item in media_items:
-            try:
-                media_store = get_media_store_dir()
-                m_dict = await _media_to_local_file(media_item, media_store)
-
-                m_urls = {}
-                for mtype, (random_name, fhash) in m_dict.items():
-                    if fhash in seen_media_hashes:
-                        existing_name = seen_media_hashes[fhash]
-                        if random_name != existing_name:
-                            (media_store / random_name).unlink(missing_ok=True)
-                        m_urls[mtype] = (
-                            f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
+                        title = getattr(original_item, "title", "Media")
+                        video_url = m_urls.get("video")
+                        audio_url = m_urls.get("audio")
+                        current_thumb = m_urls.get("video_thumbnail") or m_urls.get(
+                            "audio_thumbnail"
                         )
-                        continue
-                    seen_media_hashes[fhash] = random_name
-                    m_urls[mtype] = (
-                        f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
-                    )
 
-                title = getattr(media_item, "title", "Media")
-                video_url = m_urls.get("video")
-                audio_url = m_urls.get("audio")
-                item_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-                if item_thumb:
-                    last_thumb_url = item_thumb
-
-                md_parts = []
-                if video_url:
-                    md_parts.append(
-                        f"[![{title}]({last_thumb_url})]({video_url})"
-                        if last_thumb_url
-                        else f"[{title}]({video_url})"
-                    )
-
-                if audio_url:
-                    md_parts.append(
-                        f"[![{title} - Audio]({last_thumb_url})]({audio_url})"
-                        if last_thumb_url
-                        else f"[{title} - Audio]({audio_url})"
-                    )
-                if md_parts:
-                    media_results.append("\n\n".join(md_parts))
-            except Exception as exc:
-                logger.warning(f"Failed to process media in OpenAI stream: {exc}")
-
-        for image_url in image_results:
-            storage_output += f"\n\n{image_url}"
-            yield make_chunk(
-                {
-                    "delta": {"content": f"\n\n{image_url}"},
-                    "finish_reason": None,
-                }
-            )
+                        md_parts = []
+                        if video_url:
+                            md_parts.append(
+                                f"[![{title}]({current_thumb})]({video_url})"
+                                if current_thumb
+                                else f"[{title}]({video_url})"
+                            )
+                        if audio_url:
+                            md_parts.append(
+                                f"[![{title} - Audio]({current_thumb})]({audio_url})"
+                                if current_thumb
+                                else f"[{title} - Audio]({audio_url})"
+                            )
 
-        for media_md in media_results:
-            storage_output += f"\n\n{media_md}"
-            yield make_chunk(
-                {
-                    "delta": {"content": f"\n\n{media_md}"},
-                    "finish_reason": None,
-                }
-            )
+                        if md_parts:
+                            md = "\n\n".join(md_parts)
+                            storage_output += f"\n\n{md}"
+                            yield make_chunk(
+                                {"delta": {"content": f"\n\n{md}"}, "finish_reason": None}
+                            )
 
         if detected_tool_calls:
             for idx, call in enumerate(detected_tool_calls):
@@ -1341,7 +1356,7 @@ def make_chunk(delta_content: dict) -> str:
                 )
 
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
-            messages, assistant_text, detected_tool_calls, full_thoughts
+            messages, storage_output, detected_tool_calls, full_thoughts
         )
         usage = CompletionUsage(
             prompt_tokens=p_tok,
@@ -1381,7 +1396,6 @@ def _create_responses_real_streaming_response(
     client_wrapper: GeminiClientWrapper,
     session: ChatSession,
     request: ResponseCreateRequest,
-    media_store: Path,
     base_url: str,
     structured_requirement: StructuredOutputRequirement | None = None,
 ) -> StreamingResponse:
@@ -1442,14 +1456,21 @@ def make_event(etype: str, data: dict) -> str:
             },
         )
 
-        full_thoughts, full_text = "", ""
+        full_text = ""
+        full_thoughts = ""
+        media_tasks = []
+        seen_media_urls = set()
+        seen_image_urls = set()
+
         all_outputs: list[ModelOutput] = []
 
         thought_item_id = f"rs_{uuid.uuid4().hex[:24]}"
         message_item_id = f"msg_{uuid.uuid4().hex[:24]}"
 
         thought_open, message_open = False, False
-        current_index = 0
+        next_output_index = 0
+        thought_index = 0
+        message_index = 0
         suppressor = StreamingOutputFilter()
 
         try:
@@ -1467,12 +1488,14 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
 
                 if chunk.thoughts_delta:
                     if not thought_open:
+                        thought_index = next_output_index
+                        next_output_index += 1
                         yield make_event(
                             "response.output_item.added",
                             {
                                 **base_event,
                                 "type": "response.output_item.added",
-                                "output_index": current_index,
+                                "output_index": thought_index,
                                 "item": ResponseReasoningItem(
                                     id=thought_item_id,
                                     type="reasoning",
@@ -1488,7 +1511,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                                 **base_event,
                                 "type": "response.reasoning_summary_part.added",
                                 "item_id": thought_item_id,
-                                "output_index": current_index,
+                                "output_index": thought_index,
                                 "summary_index": 0,
                                 "part": SummaryTextContent(text="").model_dump(mode="json"),
                             },
@@ -1502,7 +1525,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                             **base_event,
                             "type": "response.reasoning_summary_text.delta",
                             "item_id": thought_item_id,
-                            "output_index": current_index,
+                            "output_index": thought_index,
                             "summary_index": 0,
                             "delta": chunk.thoughts_delta,
                         },
@@ -1516,7 +1539,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                                 **base_event,
                                 "type": "response.reasoning_summary_text.done",
                                 "item_id": thought_item_id,
-                                "output_index": current_index,
+                                "output_index": thought_index,
                                 "summary_index": 0,
                                 "text": full_thoughts,
                             },
@@ -1527,7 +1550,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                                 **base_event,
                                 "type": "response.reasoning_summary_part.done",
                                 "item_id": thought_item_id,
-                                "output_index": current_index,
+                                "output_index": thought_index,
                                 "summary_index": 0,
                                 "part": SummaryTextContent(text=full_thoughts).model_dump(
                                     mode="json"
@@ -1539,7 +1562,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                             {
                                 **base_event,
                                 "type": "response.output_item.done",
-                                "output_index": current_index,
+                                "output_index": thought_index,
                                 "item": ResponseReasoningItem(
                                     id=thought_item_id,
                                     type="reasoning",
@@ -1548,16 +1571,17 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                                 ).model_dump(mode="json"),
                             },
                         )
-                        current_index += 1
                         thought_open = False
 
                     if not message_open:
+                        message_index = next_output_index
+                        next_output_index += 1
                         yield make_event(
                             "response.output_item.added",
                             {
                                 **base_event,
                                 "type": "response.output_item.added",
-                                "output_index": current_index,
+                                "output_index": message_index,
                                 "item": ResponseOutputMessage(
                                     id=message_item_id,
                                     type="message",
@@ -1574,7 +1598,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                                 **base_event,
                                 "type": "response.content_part.added",
                                 "item_id": message_item_id,
-                                "output_index": current_index,
+                                "output_index": message_index,
                                 "content_index": 0,
                                 "part": ResponseOutputText(type="output_text", text="").model_dump(
                                     mode="json"
@@ -1591,15 +1615,27 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                                 **base_event,
                                 "type": "response.output_text.delta",
                                 "item_id": message_item_id,
-                                "output_index": current_index,
+                                "output_index": message_index,
                                 "content_index": 0,
                                 "delta": visible,
                                 "logprobs": [],
                             },
                         )
 
+                for img in chunk.images or []:
+                    if img.url and img.url not in seen_image_urls:
+                        seen_image_urls.add(img.url)
+                        media_tasks.append(asyncio.create_task(_process_image_item(img)))
+
+                m_list = (chunk.videos or []) + (chunk.media or [])
+                for m in m_list:
+                    p_url = getattr(m, "url", None) or getattr(m, "mp3_url", None)
+                    if p_url and p_url not in seen_media_urls:
+                        seen_media_urls.add(p_url)
+                        media_tasks.append(asyncio.create_task(_process_media_item(m)))
+
         except Exception as e:
-            logger.exception(f"Error during streaming: {e}")
+            logger.error(f"Error during streaming: {e}")
             yield make_event(
                 "error",
                 {
@@ -1625,7 +1661,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                     **base_event,
                     "type": "response.output_text.delta",
                     "item_id": message_item_id,
-                    "output_index": current_index,
+                    "output_index": message_index,
                     "content_index": 0,
                     "delta": remaining,
                     "logprobs": [],
@@ -1639,7 +1675,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                     **base_event,
                     "type": "response.reasoning_summary_text.done",
                     "item_id": thought_item_id,
-                    "output_index": current_index,
+                    "output_index": thought_index,
                     "summary_index": 0,
                     "text": full_thoughts,
                 },
@@ -1650,7 +1686,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                     **base_event,
                     "type": "response.reasoning_summary_part.done",
                     "item_id": thought_item_id,
-                    "output_index": current_index,
+                    "output_index": thought_index,
                     "summary_index": 0,
                     "part": SummaryTextContent(text=full_thoughts).model_dump(mode="json"),
                 },
@@ -1660,7 +1696,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 {
                     **base_event,
                     "type": "response.output_item.done",
-                    "output_index": current_index,
+                    "output_index": thought_index,
                     "item": ResponseReasoningItem(
                         id=thought_item_id,
                         type="reasoning",
@@ -1669,20 +1705,239 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                     ).model_dump(mode="json"),
                 },
             )
-            current_index += 1
 
-        _thoughts, assistant_text, storage_output, detected_tool_calls = _process_llm_output(
+        _, assistant_text, storage_output, detected_tool_calls = _process_llm_output(
             full_thoughts, full_text, structured_requirement
         )
 
+        image_items = []
+        seen_hashes = {}
+        seen_media_hashes = {}
+        media_store = get_media_store_dir()
+
+        if media_tasks:
+            logger.debug(
+                f"Waiting for {len(media_tasks)} background media tasks in Responses with heartbeat..."
+            )
+            while media_tasks:
+                done, pending = await asyncio.wait(
+                    media_tasks, timeout=5.0, return_when=asyncio.FIRST_COMPLETED
+                )
+                media_tasks = list(pending)
+
+                if not done:
+                    yield ": ping\n\n"
+                    continue
+
+                for task in done:
+                    res = task.result()
+                    if not res:
+                        continue
+
+                    rtype, original_item, media_data = res
+                    if rtype == "image":
+                        b64, w, h, fname, fhash = media_data
+                        if fhash in seen_hashes:
+                            (media_store / fname).unlink(missing_ok=True)
+                            b64, w, h, fname = seen_hashes[fhash]
+                        else:
+                            seen_hashes[fhash] = (b64, w, h, fname)
+
+                        parts = fname.rsplit(".", 1)
+                        img_id = parts[0]
+                        fmt = parts[1] if len(parts) > 1 else "png"
+
+                        img_item = ImageGenerationCall(
+                            id=img_id,
+                            result=b64,
+                            output_format=fmt,
+                            size=f"{w}x{h}" if w and h else None,
+                        )
+
+                        img_link = (
+                            f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
+                        )
+                        md_to_add = f"\n\n{img_link}"
+
+                        img_index = next_output_index
+                        next_output_index += 1
+                        yield make_event(
+                            "response.output_item.added",
+                            {
+                                **base_event,
+                                "type": "response.output_item.added",
+                                "output_index": img_index,
+                                "item": img_item.model_dump(mode="json"),
+                            },
+                        )
+                        yield make_event(
+                            "response.output_item.done",
+                            {
+                                **base_event,
+                                "type": "response.output_item.done",
+                                "output_index": img_index,
+                                "item": img_item.model_dump(mode="json"),
+                            },
+                        )
+
+                        if not message_open:
+                            message_index = next_output_index
+                            next_output_index += 1
+                            yield make_event(
+                                "response.output_item.added",
+                                {
+                                    **base_event,
+                                    "type": "response.output_item.added",
+                                    "output_index": message_index,
+                                    "item": ResponseOutputMessage(
+                                        id=message_item_id,
+                                        type="message",
+                                        status="in_progress",
+                                        role="assistant",
+                                        content=[],
+                                    ).model_dump(mode="json"),
+                                },
+                            )
+                            yield make_event(
+                                "response.content_part.added",
+                                {
+                                    **base_event,
+                                    "type": "response.content_part.added",
+                                    "item_id": message_item_id,
+                                    "output_index": message_index,
+                                    "content_index": 0,
+                                    "part": ResponseOutputText(
+                                        type="output_text", text=""
+                                    ).model_dump(mode="json"),
+                                },
+                            )
+                            message_open = True
+
+                        yield make_event(
+                            "response.output_text.delta",
+                            {
+                                **base_event,
+                                "type": "response.output_text.delta",
+                                "item_id": message_item_id,
+                                "output_index": message_index,
+                                "content_index": 0,
+                                "delta": md_to_add,
+                                "logprobs": [],
+                            },
+                        )
+                        assistant_text += md_to_add
+                        storage_output += md_to_add
+                        image_items.append(img_item)
+
+                    elif rtype == "media":
+                        m_dict = media_data
+                        if not m_dict:
+                            continue
+
+                        m_urls = {}
+                        for mtype, (random_name, fhash) in m_dict.items():
+                            if fhash in seen_media_hashes:
+                                existing_name = seen_media_hashes[fhash]
+                                if random_name != existing_name:
+                                    (media_store / random_name).unlink(missing_ok=True)
+                                m_urls[mtype] = (
+                                    f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
+                                )
+                            else:
+                                seen_media_hashes[fhash] = random_name
+                                m_urls[mtype] = (
+                                    f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
+                                )
+
+                        title = getattr(original_item, "title", "Media")
+                        video_url = m_urls.get("video")
+                        audio_url = m_urls.get("audio")
+                        current_thumb = m_urls.get("video_thumbnail") or m_urls.get(
+                            "audio_thumbnail"
+                        )
+
+                        md_parts = []
+                        if video_url:
+                            md_parts.append(
+                                f"[![{title}]({current_thumb})]({video_url})"
+                                if current_thumb
+                                else f"[{title}]({video_url})"
+                            )
+                        if audio_url:
+                            md_parts.append(
+                                f"[![{title} - Audio]({current_thumb})]({audio_url})"
+                                if current_thumb
+                                else f"[{title} - Audio]({audio_url})"
+                            )
+
+                        if md_parts:
+                            media_md = "\n\n".join(md_parts)
+                            md_to_add = f"\n\n{media_md}"
+
+                            if not message_open:
+                                message_index = next_output_index
+                                next_output_index += 1
+                                yield make_event(
+                                    "response.output_item.added",
+                                    {
+                                        **base_event,
+                                        "type": "response.output_item.added",
+                                        "output_index": message_index,
+                                        "item": ResponseOutputMessage(
+                                            id=message_item_id,
+                                            type="message",
+                                            status="in_progress",
+                                            role="assistant",
+                                            content=[],
+                                        ).model_dump(mode="json"),
+                                    },
+                                )
+                                yield make_event(
+                                    "response.content_part.added",
+                                    {
+                                        **base_event,
+                                        "type": "response.content_part.added",
+                                        "item_id": message_item_id,
+                                        "output_index": message_index,
+                                        "content_index": 0,
+                                        "part": ResponseOutputText(
+                                            type="output_text", text=""
+                                        ).model_dump(mode="json"),
+                                    },
+                                )
+                                message_open = True
+
+                            yield make_event(
+                                "response.output_text.delta",
+                                {
+                                    **base_event,
+                                    "type": "response.output_text.delta",
+                                    "item_id": message_item_id,
+                                    "output_index": message_index,
+                                    "content_index": 0,
+                                    "delta": md_to_add,
+                                    "logprobs": [],
+                                },
+                            )
+                            assistant_text += md_to_add
+                            storage_output += md_to_add
+
+        final_response_contents: list[ResponseOutputContent] = []
         if message_open:
+            if assistant_text:
+                final_response_contents = [
+                    ResponseOutputText(type="output_text", text=assistant_text)
+                ]
+            else:
+                final_response_contents = [ResponseOutputText(type="output_text", text="")]
+
             yield make_event(
                 "response.output_text.done",
                 {
                     **base_event,
                     "type": "response.output_text.done",
                     "item_id": message_item_id,
-                    "output_index": current_index,
+                    "output_index": message_index,
                     "content_index": 0,
                 },
             )
@@ -1692,171 +1947,33 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                     **base_event,
                     "type": "response.content_part.done",
                     "item_id": message_item_id,
-                    "output_index": current_index,
+                    "output_index": message_index,
                     "content_index": 0,
                     "part": ResponseOutputText(type="output_text", text=assistant_text).model_dump(
                         mode="json"
                     ),
                 },
             )
+
             yield make_event(
                 "response.output_item.done",
                 {
                     **base_event,
                     "type": "response.output_item.done",
-                    "output_index": current_index,
+                    "output_index": message_index,
                     "item": ResponseOutputMessage(
                         id=message_item_id,
                         type="message",
                         status="completed",
                         role="assistant",
-                        content=[ResponseOutputText(type="output_text", text=assistant_text)],
+                        content=final_response_contents,
                     ).model_dump(mode="json"),
                 },
             )
-            current_index += 1
-
-        image_items = []
-        final_response_contents = []
-        seen_hashes = {}
-
-        image_map = {}
-        media_items = []
-        media_url_to_idx = {}
-
-        for out in all_outputs:
-            for img in out.images or []:
-                if img.url:
-                    image_map[img.url] = img
-
-            m_list = (out.videos or []) + (out.media or [])
-            for m in m_list:
-                v_url = getattr(m, "url", None)
-                a_url = getattr(m, "mp3_url", None)
-                if not v_url and not a_url:
-                    continue
-
-                idx = (media_url_to_idx.get(v_url) if v_url else None) or (
-                    media_url_to_idx.get(a_url) if a_url else None
-                )
-
-                if idx is not None:
-                    media_items[idx] = m
-                else:
-                    idx = len(media_items)
-                    media_items.append(m)
-
-                if v_url:
-                    media_url_to_idx[v_url] = idx
-                if a_url:
-                    media_url_to_idx[a_url] = idx
-
-        images = list(image_map.values())
-        for image in images:
-            try:
-                b64, w, h, fname, fhash = await _image_to_base64(image, media_store)
-                if fhash in seen_hashes:
-                    (media_store / fname).unlink(missing_ok=True)
-                    b64, w, h, fname = seen_hashes[fhash]
-                else:
-                    seen_hashes[fhash] = (b64, w, h, fname)
-
-                parts = fname.rsplit(".", 1)
-                img_id = parts[0]
-                fmt = parts[1] if len(parts) > 1 else "png"
-
-                img_item = ImageGenerationCall(
-                    id=img_id,
-                    result=b64,
-                    output_format=fmt,
-                    size=f"{w}x{h}" if w and h else None,
-                )
-
-                img_link = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
-                image_url_with_newline = f"\n\n{img_link}"
-                final_response_contents.append(
-                    ResponseOutputText(type="output_text", text=image_url_with_newline)
-                )
-
-                yield make_event(
-                    "response.output_item.added",
-                    {
-                        **base_event,
-                        "type": "response.output_item.added",
-                        "output_index": current_index,
-                        "item": img_item.model_dump(mode="json"),
-                    },
-                )
-                yield make_event(
-                    "response.output_item.done",
-                    {
-                        **base_event,
-                        "type": "response.output_item.done",
-                        "output_index": current_index,
-                        "item": img_item.model_dump(mode="json"),
-                    },
-                )
-                current_index += 1
-                image_items.append(img_item)
-                storage_output += image_url_with_newline
-            except Exception as e:
-                logger.warning(f"Image processing failed in stream: {e}")
-
-        seen_media_hashes = {}
-        last_thumb_url = None
-        for media_item in media_items:
-            try:
-                m_dict = await _media_to_local_file(media_item, media_store)
-
-                m_urls = {}
-                for mtype, (random_name, fhash) in m_dict.items():
-                    if fhash in seen_media_hashes:
-                        existing_name = seen_media_hashes[fhash]
-                        if random_name != existing_name:
-                            (media_store / random_name).unlink(missing_ok=True)
-                        m_urls[mtype] = (
-                            f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
-                        )
-                        continue
-                    seen_media_hashes[fhash] = random_name
-                    m_urls[mtype] = (
-                        f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
-                    )
-
-                title = getattr(media_item, "title", "Media")
-                video_url = m_urls.get("video")
-                audio_url = m_urls.get("audio")
-                item_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-                if item_thumb:
-                    last_thumb_url = item_thumb
-
-                md_parts = []
-                if video_url:
-                    md_parts.append(
-                        f"[![{title}]({last_thumb_url})]({video_url})"
-                        if last_thumb_url
-                        else f"[{title}]({video_url})"
-                    )
-
-                if audio_url:
-                    md_parts.append(
-                        f"[![{title} - Audio]({last_thumb_url})]({audio_url})"
-                        if last_thumb_url
-                        else f"[{title} - Audio]({audio_url})"
-                    )
-
-                media_md = "\n\n".join(md_parts)
-
-                if media_md:
-                    media_md_with_newline = f"\n\n{media_md}"
-                    final_response_contents.append(
-                        ResponseOutputText(type="output_text", text=media_md_with_newline)
-                    )
-                    storage_output += media_md_with_newline
-            except Exception:
-                logger.warning("Media processing failed in stream")
 
         for call in detected_tool_calls:
+            tc_index = next_output_index
+            next_output_index += 1
             tc_item = ResponseFunctionToolCall(
                 id=call.id,
                 call_id=call.id,
@@ -1869,7 +1986,7 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 {
                     **base_event,
                     "type": "response.output_item.added",
-                    "output_index": current_index,
+                    "output_index": tc_index,
                     "item": tc_item.model_dump(mode="json"),
                 },
             )
@@ -1878,19 +1995,13 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
                 {
                     **base_event,
                     "type": "response.output_item.done",
-                    "output_index": current_index,
+                    "output_index": tc_index,
                     "item": tc_item.model_dump(mode="json"),
                 },
             )
-            current_index += 1
-
-        if assistant_text:
-            final_response_contents.insert(
-                0, ResponseOutputText(type="output_text", text=assistant_text)
-            )
 
         p_tok, c_tok, t_tok, r_tok = _calculate_usage(
-            messages, assistant_text, detected_tool_calls, full_thoughts
+            messages, storage_output, detected_tool_calls, full_thoughts
         )
         usage = ResponseUsage(
             input_tokens=p_tok,
@@ -1908,6 +2019,8 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
             usage,
             request,
             full_thoughts,
+            message_item_id,
+            thought_item_id,
         )
         _persist_conversation(
             db,
@@ -1949,7 +2062,6 @@ async def create_chat_completion(
     raw_request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
-    media_store: Path = Depends(get_media_store_dir),
 ):
     base_url = str(raw_request.base_url)
     pool, db = GeminiClientPool(), LMDBConversationStore()
@@ -1996,7 +2108,7 @@ async def create_chat_completion(
             session = client.start_chat(model=model)
             m_input, files = await GeminiClientWrapper.process_conversation(msgs, tmp_dir)
         except Exception as e:
-            logger.exception("Error in preparing conversation")
+            logger.error(f"Error in preparing conversation: {e}")
             raise HTTPException(
                 status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)
             ) from e
@@ -2013,7 +2125,7 @@ async def create_chat_completion(
             session, m_input, files=files, stream=bool(request.stream)
         )
     except Exception as e:
-        logger.exception("Gemini API error")
+        logger.error(f"Gemini API error: {e}")
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
@@ -2038,7 +2150,7 @@ async def create_chat_completion(
         thoughts = resp_or_stream.thoughts
         raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
     except Exception as exc:
-        logger.exception("Gemini output parsing failed.")
+        logger.error(f"Gemini output parsing failed: {exc}")
         raise HTTPException(
             status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
         ) from exc
@@ -2048,35 +2160,51 @@ async def create_chat_completion(
     )
 
     images = resp_or_stream.images or []
+    media_items: list[GeneratedVideo | GeneratedMedia] = (resp_or_stream.videos or []) + (
+        resp_or_stream.media or []
+    )
+    unique_media = []
+    seen_urls = set()
+    for m in media_items:
+        v_url = getattr(m, "url", None)
+        a_url = getattr(m, "mp3_url", None)
+        primary_url = v_url or a_url
+        if primary_url and primary_url not in seen_urls:
+            unique_media.append(m)
+            seen_urls.add(primary_url)
+
+    tasks = [_process_image_item(img) for img in images] + [
+        _process_media_item(m) for m in unique_media
+    ]
+    results = await asyncio.gather(*tasks)
+
     image_markdown = ""
+    media_markdown = ""
     seen_hashes = {}
-    for image in images:
-        try:
-            b64, w, h, fname, fhash = await _image_to_base64(image, media_store)
+    seen_media_hashes = {}
+    media_store = get_media_store_dir()
+
+    for res in results:
+        if not res:
+            continue
+        rtype, original_item, media_data = res
+
+        if rtype == "image":
+            _, _, _, fname, fhash = media_data
             if fhash in seen_hashes:
                 (media_store / fname).unlink(missing_ok=True)
-                b64, w, h, fname = seen_hashes[fhash]
+                fname = seen_hashes[fhash]
             else:
-                seen_hashes[fhash] = (b64, w, h, fname)
+                seen_hashes[fhash] = fname
 
-            img_url = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
-            image_markdown += f"\n\n{img_url}"
-        except Exception as exc:
-            logger.warning(f"Failed to process image in OpenAI response: {exc}")
-
-    if image_markdown:
-        visible_output += image_markdown
-        storage_output += image_markdown
+            img_url = f"{base_url}media/{fname}?token={get_media_token(fname)}"
+            title = getattr(original_item, "title", "Image")
+            image_markdown += f"\n\n![{title}]({img_url})"
 
-    media_items: list[GeneratedVideo | GeneratedMedia] = (resp_or_stream.videos or []) + (
-        resp_or_stream.media or []
-    )
-    media_markdown = ""
-    seen_media_hashes = {}
-    last_thumb_url = None
-    for m_item in media_items:
-        try:
-            m_dict = await _media_to_local_file(m_item, media_store)
+        elif rtype == "media":
+            m_dict = media_data
+            if not m_dict:
+                continue
 
             m_urls = {}
             for mtype, (random_name, fhash) in m_dict.items():
@@ -2087,39 +2215,37 @@ async def create_chat_completion(
                     m_urls[mtype] = (
                         f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
                     )
-                    continue
-                seen_media_hashes[fhash] = random_name
-                m_urls[mtype] = (
-                    f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
-                )
+                else:
+                    seen_media_hashes[fhash] = random_name
+                    m_urls[mtype] = (
+                        f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
+                    )
 
-            title = getattr(m_item, "title", "Media")
+            title = getattr(original_item, "title", "Media")
             video_url = m_urls.get("video")
             audio_url = m_urls.get("audio")
-            item_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-            if item_thumb:
-                last_thumb_url = item_thumb
+            current_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
 
             md_parts = []
             if video_url:
                 md_parts.append(
-                    f"[![{title}]({last_thumb_url})]({video_url})"
-                    if last_thumb_url
+                    f"[![{title}]({current_thumb})]({video_url})"
+                    if current_thumb
                     else f"[{title}]({video_url})"
                 )
-
             if audio_url:
                 md_parts.append(
-                    f"[![{title} - Audio]({last_thumb_url})]({audio_url})"
-                    if last_thumb_url
+                    f"[![{title} - Audio]({current_thumb})]({audio_url})"
+                    if current_thumb
                     else f"[{title} - Audio]({audio_url})"
                 )
 
             if md_parts:
-                m_md = "\n\n".join(md_parts)
-                media_markdown += f"\n\n{m_md}"
-        except Exception as exc:
-            logger.warning(f"Failed to process media in OpenAI response: {exc}")
+                media_markdown += f"\n\n{'\n\n'.join(md_parts)}"
+
+    if image_markdown:
+        visible_output += image_markdown
+        storage_output += image_markdown
 
     if media_markdown:
         visible_output += media_markdown
@@ -2131,7 +2257,7 @@ async def create_chat_completion(
         )
 
     p_tok, c_tok, t_tok, r_tok = _calculate_usage(
-        app_messages, visible_output, tool_calls, thoughts
+        app_messages, storage_output, tool_calls, thoughts
     )
     usage = {
         "prompt_tokens": p_tok,
@@ -2167,7 +2293,6 @@ async def create_response(
     raw_request: Request,
     api_key: str = Depends(verify_api_key),
     tmp_dir: Path = Depends(get_temp_dir),
-    media_store: Path = Depends(get_media_store_dir),
 ):
     base_url = str(raw_request.base_url)
     base_messages = _convert_responses_to_app_messages(request.input)
@@ -2234,7 +2359,7 @@ async def create_response(
             session = client.start_chat(model=model)
             m_input, files = await GeminiClientWrapper.process_conversation(messages, tmp_dir)
         except Exception as e:
-            logger.exception("Error in preparing conversation")
+            logger.error(f"Error in preparing conversation: {e}")
             raise HTTPException(
                 status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(e)
             ) from e
@@ -2251,7 +2376,7 @@ async def create_response(
             session, m_input, files=files, stream=bool(request.stream)
         )
     except Exception as e:
-        logger.exception("Gemini API error")
+        logger.error(f"Gemini API error: {e}")
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=str(e)) from e
 
     if request.stream:
@@ -2267,7 +2392,6 @@ async def create_response(
             client,
             session,
             request,
-            media_store,
             base_url,
             struct_req,
         )
@@ -2278,7 +2402,7 @@ async def create_response(
         thoughts = resp_or_stream.thoughts
         raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
     except Exception as exc:
-        logger.exception("Gemini parsing failed")
+        logger.error(f"Gemini parsing failed: {exc}")
         raise HTTPException(
             status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
         ) from exc
@@ -2294,11 +2418,32 @@ async def create_response(
     ) and not images:
         raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
 
+    unique_media = []
+    seen_urls = set()
+    for m in (resp_or_stream.videos or []) + (resp_or_stream.media or []):
+        p_url = getattr(m, "url", None) or getattr(m, "mp3_url", None)
+        if p_url and p_url not in seen_urls:
+            unique_media.append(m)
+            seen_urls.add(p_url)
+
+    tasks = [_process_image_item(img) for img in images] + [
+        _process_media_item(m) for m in unique_media
+    ]
+    results = await asyncio.gather(*tasks)
+
     contents, img_calls = [], []
     seen_hashes = {}
-    for img in images:
-        try:
-            b64, w, h, fname, fhash = await _image_to_base64(img, media_store)
+    seen_media_hashes = {}
+    media_markdown = ""
+    media_store = get_media_store_dir()
+
+    for res in results:
+        if not res:
+            continue
+        rtype, original_item, media_data = res
+
+        if rtype == "image":
+            b64, w, h, fname, fhash = media_data
             if fhash in seen_hashes:
                 (media_store / fname).unlink(missing_ok=True)
                 b64, w, h, fname = seen_hashes[fhash]
@@ -2307,47 +2452,17 @@ async def create_response(
 
             parts = fname.rsplit(".", 1)
             img_id = parts[0]
-            img_format = (
-                parts[1]
-                if len(parts) > 1
-                else ("png" if isinstance(img, GeneratedImage) else "jpeg")
-            )
-
+            fmt = parts[1] if len(parts) > 1 else "png"
             img_calls.append(
                 ImageGenerationCall(
-                    id=img_id,
-                    result=b64,
-                    output_format=img_format,
-                    size=f"{w}x{h}" if w and h else None,
+                    id=img_id, result=b64, output_format=fmt, size=f"{w}x{h}" if w and h else None
                 )
             )
-        except Exception as e:
-            logger.warning(f"Image error: {e}")
-
-    if assistant_text:
-        contents.append(ResponseOutputText(type="output_text", text=assistant_text))
-    if not contents:
-        contents.append(ResponseOutputText(type="output_text", text=""))
-
-    image_markdown = ""
-    for img_call in img_calls:
-        fname = f"{img_call.id}.{img_call.output_format}"
-        img_url = f"![{fname}]({base_url}media/{fname}?token={get_media_token(fname)})"
-        image_markdown += f"\n\n{img_url}"
-
-    if image_markdown:
-        storage_output += image_markdown
-        contents.append(ResponseOutputText(type="output_text", text=image_markdown))
 
-    media_items: list[GeneratedVideo | GeneratedMedia] = (resp_or_stream.videos or []) + (
-        resp_or_stream.media or []
-    )
-    seen_media_hashes = {}
-    media_markdown = ""
-    last_thumb_url = None
-    for m_item in media_items:
-        try:
-            m_dict = await _media_to_local_file(m_item, media_store)
+        elif rtype == "media":
+            m_dict = media_data
+            if not m_dict:
+                continue
 
             m_urls = {}
             for mtype, (random_name, fhash) in m_dict.items():
@@ -2358,45 +2473,54 @@ async def create_response(
                     m_urls[mtype] = (
                         f"{base_url}media/{existing_name}?token={get_media_token(existing_name)}"
                     )
-                    continue
-                seen_media_hashes[fhash] = random_name
-                m_urls[mtype] = (
-                    f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
-                )
+                else:
+                    seen_media_hashes[fhash] = random_name
+                    m_urls[mtype] = (
+                        f"{base_url}media/{random_name}?token={get_media_token(random_name)}"
+                    )
 
-            title = getattr(m_item, "title", "Media")
+            title = getattr(original_item, "title", "Media")
             video_url = m_urls.get("video")
             audio_url = m_urls.get("audio")
-            item_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
-            if item_thumb:
-                last_thumb_url = item_thumb
+            current_thumb = m_urls.get("video_thumbnail") or m_urls.get("audio_thumbnail")
 
             md_parts = []
             if video_url:
                 md_parts.append(
-                    f"[![{title}]({last_thumb_url})]({video_url})"
-                    if last_thumb_url
+                    f"[![{title}]({current_thumb})]({video_url})"
+                    if current_thumb
                     else f"[{title}]({video_url})"
                 )
-
             if audio_url:
                 md_parts.append(
-                    f"[![{title} - Audio]({last_thumb_url})]({audio_url})"
-                    if last_thumb_url
+                    f"[![{title} - Audio]({current_thumb})]({audio_url})"
+                    if current_thumb
                     else f"[{title} - Audio]({audio_url})"
                 )
 
             if md_parts:
-                m_md = "\n\n".join(md_parts)
-                media_markdown += f"\n\n{m_md}"
-        except Exception as exc:
-            logger.warning(f"Failed to process media in OpenAI response: {exc}")
+                media_markdown += f"\n\n{'\n\n'.join(md_parts)}"
+
+    if assistant_text:
+        contents.append(ResponseOutputText(type="output_text", text=assistant_text))
+
+    image_markdown = ""
+    for ic in img_calls:
+        img_url = f"{base_url}media/{ic.id}.{ic.output_format}?token={get_media_token(f'{ic.id}.{ic.output_format}')}"
+        image_markdown += f"\n\n![{ic.id}]({img_url})"
+
+    if image_markdown:
+        storage_output += image_markdown
+        contents.append(ResponseOutputText(type="output_text", text=image_markdown))
 
     if media_markdown:
         storage_output += media_markdown
         contents.append(ResponseOutputText(type="output_text", text=media_markdown))
 
-    p_tok, c_tok, t_tok, r_tok = _calculate_usage(messages, assistant_text, tool_calls, thoughts)
+    if not contents:
+        contents.append(ResponseOutputText(type="output_text", text=""))
+
+    p_tok, c_tok, t_tok, r_tok = _calculate_usage(messages, storage_output, tool_calls, thoughts)
     usage = ResponseUsage(
         input_tokens=p_tok,
         output_tokens=c_tok,

From baf67b979417deae4edd962b51583aff364ccdd5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Mon, 9 Mar 2026 21:26:16 +0700
Subject: [PATCH 202/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index aba96fe..a16988f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post258"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#4fd2ee6c4e880f693768d32c73220e89661fcc12" }
+version = "0.0.post259"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#9d56d7c0aaf62c725734d39acc6113547a9aab6a" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From ddc225706b651f1604773fed57288662068a1984 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Mar 2026 08:29:08 +0700
Subject: [PATCH 203/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index a16988f..e50d714 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post259"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#9d56d7c0aaf62c725734d39acc6113547a9aab6a" }
+version = "0.0.post260"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#8309ca686a2d3ffd5de00bd7dba3212a3f87c5c6" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From d05aa9d971561730805d0f3f3bdf51d8aa7ee53c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Mar 2026 20:19:30 +0700
Subject: [PATCH 204/236] Introduced cookies parameter and resolved text drift
 during video generation.

---
 README.md            |  11 +++-
 README.zh.md         |  13 +++-
 app/server/chat.py   | 140 +++++++++++++++++++++++++++++++++++++++++--
 app/services/pool.py |   1 +
 app/utils/config.py  |  23 +++++--
 config/config.yaml   |   5 +-
 pyproject.toml       |   2 +-
 uv.lock              |   4 +-
 8 files changed, 178 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 6b6f485..5117a9a 100644
--- a/README.md
+++ b/README.md
@@ -54,8 +54,12 @@ Edit `config/config.yaml` and provide at least one credential pair:
 gemini:
   clients:
     - id: "client-a"
-      secure_1psid: "YOUR_SECURE_1PSID_HERE"
-      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
+      secure_1psid: "YOUR_SECURE_1PSID_HERE" # Optional if 'cookies' is provided
+      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" # Optional if 'cookies' is provided
+      # OR use a cookies dictionary:
+      # cookies:
+      #   __Secure-1PSID: "..."
+      #   __Secure-1PSIDTS: "..."
       proxy: null # Optional proxy URL (null/empty keeps direct connection)
 ```
 
@@ -180,6 +184,9 @@ export CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS="your-secure-1psidts"
 # Override optional proxy settings for client 0
 export CONFIG_GEMINI__CLIENTS__0__PROXY="socks5://127.0.0.1:1080"
 
+# Override using a JSON cookies string for client 0
+export CONFIG_GEMINI__CLIENTS__0__COOKIES='{"__Secure-1PSID": "...", "__Secure-1PSIDTS": "..."}'
+
 # Override conversation storage size limit
 export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
 ```
diff --git a/README.zh.md b/README.zh.md
index d012d32..30678fe 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -54,9 +54,13 @@ pip install -e .
 gemini:
   clients:
     - id: "client-a"
-      secure_1psid: "YOUR_SECURE_1PSID_HERE"
-      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
-      proxy: null # Optional proxy URL (null/empty keeps direct connection)
+      secure_1psid: "YOUR_SECURE_1PSID_HERE" # 若已提供 'cookies'，则此项可选
+      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" # 若已提供 'cookies'，则此项可选
+      # 或者使用 Cookies 字典：
+      # cookies:
+      #   __Secure-1PSID: "..."
+      #   __Secure-1PSIDTS: "..."
+      proxy: null # 可选代理 URL (null/空值则保持直连)
 ```
 
 > [!NOTE]
@@ -180,6 +184,9 @@ export CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS="your-secure-1psidts"
 # 覆盖 Client 0 的代理设置
 export CONFIG_GEMINI__CLIENTS__0__PROXY="socks5://127.0.0.1:1080"
 
+# 通过 JSON 字符串覆盖 Client 0 的 Cookie 配置
+export CONFIG_GEMINI__CLIENTS__0__COOKIES='{"__Secure-1PSID": "...", "__Secure-1PSIDTS": "..."}'
+
 # 覆盖对话存储大小限制
 export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
 ```
diff --git a/app/server/chat.py b/app/server/chat.py
index e0df9c6..3e7154b 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1241,10 +1241,32 @@ def make_chunk(delta_content: dict) -> str:
 
         if all_outputs:
             final_chunk = all_outputs[-1]
-            if final_chunk.text:
-                full_text = final_chunk.text
             if final_chunk.thoughts:
-                full_thoughts = final_chunk.thoughts
+                f_thoughts = final_chunk.thoughts
+                ft_len, ct_len = len(f_thoughts), len(full_thoughts)
+                if ft_len >= ct_len and f_thoughts.startswith(full_thoughts):
+                    if ft_len > ct_len:
+                        drift_t = f_thoughts[ct_len:]
+                        full_thoughts = f_thoughts
+                        yield make_chunk(
+                            {"delta": {"reasoning_content": drift_t}, "finish_reason": None}
+                        )
+                else:
+                    logger.debug("Significant thoughts drift detected, preferring accumulated.")
+
+            if final_chunk.text:
+                f_text = final_chunk.text
+                f_len, c_len = len(f_text), len(full_text)
+                if f_len >= c_len and f_text.startswith(full_text):
+                    if f_len > c_len:
+                        drift = f_text[c_len:]
+                        full_text = f_text
+                        if visible_drift := suppressor.process(drift):
+                            yield make_chunk(
+                                {"delta": {"content": visible_drift}, "finish_reason": None}
+                            )
+                else:
+                    logger.debug("Significant text drift detected, preferring accumulated state.")
 
         if remaining_text := suppressor.flush():
             yield make_chunk({"delta": {"content": remaining_text}, "finish_reason": None})
@@ -1648,10 +1670,116 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
 
         if all_outputs:
             last = all_outputs[-1]
-            if last.text:
-                full_text = last.text
             if last.thoughts:
-                full_thoughts = last.thoughts
+                l_thoughts = last.thoughts
+                lt_len, ct_len = len(l_thoughts), len(full_thoughts)
+                if lt_len >= ct_len and l_thoughts.startswith(full_thoughts):
+                    if lt_len > ct_len:
+                        drift_t = l_thoughts[ct_len:]
+                        full_thoughts = l_thoughts
+                        if not thought_open:
+                            thought_index = next_output_index
+                            next_output_index += 1
+                            yield make_event(
+                                "response.output_item.added",
+                                {
+                                    **base_event,
+                                    "type": "response.output_item.added",
+                                    "output_index": thought_index,
+                                    "item": ResponseReasoningItem(
+                                        id=thought_item_id,
+                                        type="reasoning",
+                                        status="in_progress",
+                                        summary=[],
+                                    ).model_dump(mode="json"),
+                                },
+                            )
+                            yield make_event(
+                                "response.reasoning_summary_part.added",
+                                {
+                                    **base_event,
+                                    "type": "response.reasoning_summary_part.added",
+                                    "item_id": thought_item_id,
+                                    "output_index": thought_index,
+                                    "summary_index": 0,
+                                    "part": SummaryTextContent(text="").model_dump(mode="json"),
+                                },
+                            )
+                            thought_open = True
+
+                        yield make_event(
+                            "response.reasoning_summary_text.delta",
+                            {
+                                **base_event,
+                                "type": "response.reasoning_summary_text.delta",
+                                "item_id": thought_item_id,
+                                "output_index": thought_index,
+                                "summary_index": 0,
+                                "delta": drift_t,
+                            },
+                        )
+                else:
+                    logger.debug(
+                        "Significant thoughts drift detected in Responses API, preferring accumulated."
+                    )
+
+            if last.text:
+                l_text = last.text
+                l_len, c_len = len(l_text), len(full_text)
+                if l_len >= c_len and l_text.startswith(full_text):
+                    if l_len > c_len:
+                        drift = l_text[c_len:]
+                        full_text = l_text
+                        if visible := suppressor.process(drift):
+                            if not message_open:
+                                message_index = next_output_index
+                                next_output_index += 1
+                                yield make_event(
+                                    "response.output_item.added",
+                                    {
+                                        **base_event,
+                                        "type": "response.output_item.added",
+                                        "output_index": message_index,
+                                        "item": ResponseOutputMessage(
+                                            id=message_item_id,
+                                            type="message",
+                                            status="in_progress",
+                                            role="assistant",
+                                            content=[],
+                                        ).model_dump(mode="json"),
+                                    },
+                                )
+                                yield make_event(
+                                    "response.content_part.added",
+                                    {
+                                        **base_event,
+                                        "type": "response.content_part.added",
+                                        "item_id": message_item_id,
+                                        "output_index": message_index,
+                                        "content_index": 0,
+                                        "part": ResponseOutputText(
+                                            type="output_text", text=""
+                                        ).model_dump(mode="json"),
+                                    },
+                                )
+                                message_open = True
+
+                            yield make_event(
+                                "response.output_text.delta",
+                                {
+                                    **base_event,
+                                    "type": "response.output_text.delta",
+                                    "item_id": message_item_id,
+                                    "output_index": message_index,
+                                    "content_index": 0,
+                                    "delta": visible,
+                                    "logprobs": [],
+                                },
+                            )
+                else:
+                    logger.debug(
+                        "Significant text drift detected in Responses API, preferring accumulated."
+                    )
 
         remaining = suppressor.flush()
         if remaining and message_open:
diff --git a/app/services/pool.py b/app/services/pool.py
index 3b4197c..86f0e15 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -26,6 +26,7 @@ def __init__(self) -> None:
                 client_id=c.id,
                 secure_1psid=c.secure_1psid,
                 secure_1psidts=c.secure_1psidts,
+                cookies=c.cookies,
                 proxy=c.proxy,
             )
             self._clients.append(client)
diff --git a/app/utils/config.py b/app/utils/config.py
index 03e6f6d..e96ac7b 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -39,10 +39,23 @@ class GeminiClientSettings(BaseModel):
     """Credential set for one Gemini client."""
 
     id: str = Field(..., description="Unique identifier for the client")
-    secure_1psid: str = Field(..., description="Gemini Secure 1PSID")
-    secure_1psidts: str = Field(..., description="Gemini Secure 1PSIDTS")
+    secure_1psid: str | None = Field(default=None, description="Gemini Secure 1PSID")
+    secure_1psidts: str | None = Field(default=None, description="Gemini Secure 1PSIDTS")
+    cookies: dict[str, str] | None = Field(
+        default=None, description="Gemini cookies as a dictionary"
+    )
     proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client")
 
+    @field_validator("cookies", mode="before")
+    @classmethod
+    def _parse_cookies(cls, v: Any) -> Any:
+        if isinstance(v, str) and v.strip().startswith("{"):
+            try:
+                return orjson.loads(v)
+            except orjson.JSONDecodeError:
+                return v
+        return v
+
     @field_validator("proxy", mode="before")
     @classmethod
     def _blank_proxy_to_none(cls, value: str | None) -> str | None:
@@ -228,10 +241,10 @@ def settings_customise_sources(
         )
 
 
-def extract_gemini_clients_env() -> dict[int, dict[str, str]]:
+def extract_gemini_clients_env() -> dict[int, dict[str, Any]]:
     """Extract and remove all Gemini clients related environment variables, return a mapping from index to field dict."""
     prefix = "CONFIG_GEMINI__CLIENTS__"
-    env_overrides: dict[int, dict[str, str]] = {}
+    env_overrides: dict[int, dict[str, Any]] = {}
     to_delete = []
     for k, v in os.environ.items():
         if k.startswith(prefix):
@@ -252,7 +265,7 @@ def extract_gemini_clients_env() -> dict[int, dict[str, str]]:
 
 def _merge_clients_with_env(
     base_clients: list[GeminiClientSettings] | None,
-    env_overrides: dict[int, dict[str, str]],
+    env_overrides: dict[int, dict[str, Any]],
 ):
     """Override base_clients with env_overrides, return the new clients list."""
     if not env_overrides:
diff --git a/config/config.yaml b/config/config.yaml
index bfc9306..0218f14 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -19,8 +19,9 @@ cors:
 gemini:
   clients:
     - id: "example-id-1"   # Arbitrary client ID
-      secure_1psid: "YOUR_SECURE_1PSID_HERE"
-      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
+      secure_1psid: "YOUR_SECURE_1PSID_HERE"      # Optional: Gemini Secure 1PSID
+      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"  # Optional: Gemini Secure 1PSIDTS
+      cookies: null                               # Optional: Cookies dictionary (e.g. {__Secure-1PSID: "...", __Secure-1PSIDTS: "..."})
       proxy: null          # Optional proxy URL (null/empty means direct connection)
   timeout: 450             # Init timeout in seconds (Not less than 30s)
   watchdog_timeout: 90     # Watchdog timeout in seconds (Not less than 30s)
diff --git a/pyproject.toml b/pyproject.toml
index 459b6f4..3afe92a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
     "fastapi>=0.135.1",
-    "gemini-webapi>=1.20.0",
+    "gemini-webapi>=1.21.0",
     "httptools>=0.7.1",
     "lmdb>=1.7.5",
     "loguru>=0.7.3",
diff --git a/uv.lock b/uv.lock
index e50d714..c198b21 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post260"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#8309ca686a2d3ffd5de00bd7dba3212a3f87c5c6" }
+version = "0.0.post288"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#e5cfac8ccc0a56c3b4da07a23596033bf88082f0" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From cb987032767c0225cc858c6f7247ee466840f3b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Mar 2026 21:04:35 +0700
Subject: [PATCH 205/236] Fixed configuration validation failed

---
 app/utils/config.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index e96ac7b..1f1cdde 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -53,7 +53,10 @@ def _parse_cookies(cls, v: Any) -> Any:
             try:
                 return orjson.loads(v)
             except orjson.JSONDecodeError:
-                return v
+                try:
+                    return ast.literal_eval(v)
+                except (ValueError, SyntaxError):
+                    return v
         return v
 
     @field_validator("proxy", mode="before")
@@ -80,7 +83,10 @@ def _parse_json_string(cls, v: Any) -> Any:
             try:
                 return orjson.loads(v)
             except orjson.JSONDecodeError:
-                return v
+                try:
+                    return ast.literal_eval(v)
+                except (ValueError, SyntaxError):
+                    return v
         return v
 
 
@@ -116,9 +122,12 @@ def _parse_models_json(cls, v: Any) -> Any:
         if isinstance(v, str) and v.strip().startswith("["):
             try:
                 return orjson.loads(v)
-            except orjson.JSONDecodeError as e:
-                logger.warning(f"Failed to parse models JSON string: {e}")
-                return v
+            except orjson.JSONDecodeError:
+                try:
+                    return ast.literal_eval(v)
+                except (ValueError, SyntaxError) as e:
+                    logger.warning(f"Failed to parse models JSON or Python literal: {e}")
+                    return v
         return v
 
     @field_validator("models")

From 219fc61ff3cca6bafe341cbb41accb40949a32a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Mar 2026 22:10:30 +0700
Subject: [PATCH 206/236] Using default configuration values from global
 settings

---
 app/services/client.py | 35 ++++++++++-------------------------
 app/services/pool.py   | 21 +++------------------
 2 files changed, 13 insertions(+), 43 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index d2e5270..92f3737 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -1,6 +1,6 @@
 import io
 from pathlib import Path
-from typing import Any, cast
+from typing import Any
 
 import orjson
 from gemini_webapi import GeminiClient, ModelOutput
@@ -29,36 +29,21 @@ def __init__(self, client_id: str, **kwargs):
         super().__init__(**kwargs)
         self.id = client_id
 
-    async def init(  # type: ignore
-        self,
-        timeout: float = cast(float, _UNSET),
-        watchdog_timeout: float = cast(float, _UNSET),
-        auto_close: bool = False,
-        close_delay: float = cast(float, _UNSET),
-        auto_refresh: bool = cast(bool, _UNSET),
-        refresh_interval: float = cast(float, _UNSET),
-        verbose: bool = cast(bool, _UNSET),
-    ) -> None:
+    async def init(self, *args: Any, **kwargs: Any) -> None:
         """
-        Inject default configuration values.
+        Inject default configuration values from global settings.
         """
         config = g_config.gemini
-        timeout = cast(float, _resolve(timeout, config.timeout))
-        watchdog_timeout = cast(float, _resolve(watchdog_timeout, config.watchdog_timeout))
-        close_delay = timeout
-        auto_refresh = cast(bool, _resolve(auto_refresh, config.auto_refresh))
-        refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
-        verbose = cast(bool, _resolve(verbose, config.verbose))
-
+        auto_close = kwargs.get("auto_close", False)
         try:
             await super().init(
-                timeout=timeout,
-                watchdog_timeout=watchdog_timeout,
+                timeout=config.timeout,
+                watchdog_timeout=config.watchdog_timeout,
                 auto_close=auto_close,
-                close_delay=close_delay,
-                auto_refresh=auto_refresh,
-                refresh_interval=refresh_interval,
-                verbose=verbose,
+                close_delay=config.timeout,
+                auto_refresh=config.auto_refresh,
+                refresh_interval=config.refresh_interval,
+                verbose=config.verbose,
             )
         except Exception:
             logger.exception(f"Failed to initialize GeminiClient {self.id}")
diff --git a/app/services/pool.py b/app/services/pool.py
index 86f0e15..febe91d 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -24,10 +24,7 @@ def __init__(self) -> None:
         for c in g_config.gemini.clients:
             client = GeminiClientWrapper(
                 client_id=c.id,
-                secure_1psid=c.secure_1psid,
-                secure_1psidts=c.secure_1psidts,
-                cookies=c.cookies,
-                proxy=c.proxy,
+                **c.model_dump(exclude={"id"}),
             )
             self._clients.append(client)
             self._id_map[c.id] = client
@@ -40,13 +37,7 @@ async def init(self) -> None:
         for client in self._clients:
             if not client.running():
                 try:
-                    await client.init(
-                        timeout=g_config.gemini.timeout,
-                        watchdog_timeout=g_config.gemini.watchdog_timeout,
-                        auto_refresh=g_config.gemini.auto_refresh,
-                        verbose=g_config.gemini.verbose,
-                        refresh_interval=g_config.gemini.refresh_interval,
-                    )
+                    await client.init()
                 except Exception:
                     logger.exception(f"Failed to initialize client {client.id}")
 
@@ -93,13 +84,7 @@ async def _ensure_client_ready(self, client: GeminiClientWrapper) -> bool:
                 return True
 
             try:
-                await client.init(
-                    timeout=g_config.gemini.timeout,
-                    watchdog_timeout=g_config.gemini.watchdog_timeout,
-                    auto_refresh=g_config.gemini.auto_refresh,
-                    verbose=g_config.gemini.verbose,
-                    refresh_interval=g_config.gemini.refresh_interval,
-                )
+                await client.init()
                 logger.info(f"Restarted Gemini client {client.id} after it stopped.")
                 return True
             except Exception:

From 090bd0017462c9202ac81fcbb12d38f6fcc6549e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 10 Mar 2026 22:34:08 +0700
Subject: [PATCH 207/236] Using default configuration values from global
 settings

Update dependencies to latest versions
---
 app/services/client.py | 3 ---
 uv.lock                | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/app/services/client.py b/app/services/client.py
index 92f3737..e6638ab 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -34,13 +34,10 @@ async def init(self, *args: Any, **kwargs: Any) -> None:
         Inject default configuration values from global settings.
         """
         config = g_config.gemini
-        auto_close = kwargs.get("auto_close", False)
         try:
             await super().init(
                 timeout=config.timeout,
                 watchdog_timeout=config.watchdog_timeout,
-                auto_close=auto_close,
-                close_delay=config.timeout,
                 auto_refresh=config.auto_refresh,
                 refresh_interval=config.refresh_interval,
                 verbose=config.verbose,
diff --git a/uv.lock b/uv.lock
index c198b21..250849f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post288"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#e5cfac8ccc0a56c3b4da07a23596033bf88082f0" }
+version = "0.0.post289"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#d7d034db881abde77b475a869aea7911423b6d21" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 7828a4adcfa378f55191d6bc22e6fa0d685f2785 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Mar 2026 09:13:53 +0700
Subject: [PATCH 208/236] Remove cookies parameter to avoid HTTP 401 error

---
 README.md           | 10 ++--------
 README.zh.md        | 10 ++--------
 app/utils/config.py | 20 ++------------------
 config/config.yaml  |  5 ++---
 uv.lock             |  4 ++--
 5 files changed, 10 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index 5117a9a..7d96646 100644
--- a/README.md
+++ b/README.md
@@ -54,12 +54,8 @@ Edit `config/config.yaml` and provide at least one credential pair:
 gemini:
   clients:
     - id: "client-a"
-      secure_1psid: "YOUR_SECURE_1PSID_HERE" # Optional if 'cookies' is provided
-      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" # Optional if 'cookies' is provided
-      # OR use a cookies dictionary:
-      # cookies:
-      #   __Secure-1PSID: "..."
-      #   __Secure-1PSIDTS: "..."
+      secure_1psid: "YOUR_SECURE_1PSID_HERE"
+      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null # Optional proxy URL (null/empty keeps direct connection)
 ```
 
@@ -184,8 +180,6 @@ export CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS="your-secure-1psidts"
 # Override optional proxy settings for client 0
 export CONFIG_GEMINI__CLIENTS__0__PROXY="socks5://127.0.0.1:1080"
 
-# Override using a JSON cookies string for client 0
-export CONFIG_GEMINI__CLIENTS__0__COOKIES='{"__Secure-1PSID": "...", "__Secure-1PSIDTS": "..."}'
 
 # Override conversation storage size limit
 export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
diff --git a/README.zh.md b/README.zh.md
index 30678fe..e446667 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -54,12 +54,8 @@ pip install -e .
 gemini:
   clients:
     - id: "client-a"
-      secure_1psid: "YOUR_SECURE_1PSID_HERE" # 若已提供 'cookies'，则此项可选
-      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE" # 若已提供 'cookies'，则此项可选
-      # 或者使用 Cookies 字典：
-      # cookies:
-      #   __Secure-1PSID: "..."
-      #   __Secure-1PSIDTS: "..."
+      secure_1psid: "YOUR_SECURE_1PSID_HERE"
+      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"
       proxy: null # 可选代理 URL (null/空值则保持直连)
 ```
 
@@ -184,8 +180,6 @@ export CONFIG_GEMINI__CLIENTS__0__SECURE_1PSIDTS="your-secure-1psidts"
 # 覆盖 Client 0 的代理设置
 export CONFIG_GEMINI__CLIENTS__0__PROXY="socks5://127.0.0.1:1080"
 
-# 通过 JSON 字符串覆盖 Client 0 的 Cookie 配置
-export CONFIG_GEMINI__CLIENTS__0__COOKIES='{"__Secure-1PSID": "...", "__Secure-1PSIDTS": "..."}'
 
 # 覆盖对话存储大小限制
 export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
diff --git a/app/utils/config.py b/app/utils/config.py
index 1f1cdde..3a4ade9 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -41,24 +41,8 @@ class GeminiClientSettings(BaseModel):
     id: str = Field(..., description="Unique identifier for the client")
     secure_1psid: str | None = Field(default=None, description="Gemini Secure 1PSID")
     secure_1psidts: str | None = Field(default=None, description="Gemini Secure 1PSIDTS")
-    cookies: dict[str, str] | None = Field(
-        default=None, description="Gemini cookies as a dictionary"
-    )
     proxy: str | None = Field(default=None, description="Proxy URL for this Gemini client")
 
-    @field_validator("cookies", mode="before")
-    @classmethod
-    def _parse_cookies(cls, v: Any) -> Any:
-        if isinstance(v, str) and v.strip().startswith("{"):
-            try:
-                return orjson.loads(v)
-            except orjson.JSONDecodeError:
-                try:
-                    return ast.literal_eval(v)
-                except (ValueError, SyntaxError):
-                    return v
-        return v
-
     @field_validator("proxy", mode="before")
     @classmethod
     def _blank_proxy_to_none(cls, value: str | None) -> str | None:
@@ -103,11 +87,11 @@ class GeminiConfig(BaseModel):
     )
     timeout: int = Field(default=450, ge=30, description="Init timeout in seconds")
     watchdog_timeout: int = Field(default=90, ge=30, description="Watchdog timeout in seconds")
-    auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini cookies")
+    auto_refresh: bool = Field(True, description="Enable auto-refresh for Gemini sessions")
     refresh_interval: int = Field(
         default=600,
         ge=60,
-        description="Interval in seconds to refresh Gemini cookies (Not less than 60s)",
+        description="Interval in seconds to refresh Gemini sessions (Not less than 60s)",
     )
     verbose: bool = Field(False, description="Enable verbose logging for Gemini API requests")
     max_chars_per_request: int = Field(
diff --git a/config/config.yaml b/config/config.yaml
index 0218f14..9321c61 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -19,9 +19,8 @@ cors:
 gemini:
   clients:
     - id: "example-id-1"   # Arbitrary client ID
-      secure_1psid: "YOUR_SECURE_1PSID_HERE"      # Optional: Gemini Secure 1PSID
-      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"  # Optional: Gemini Secure 1PSIDTS
-      cookies: null                               # Optional: Cookies dictionary (e.g. {__Secure-1PSID: "...", __Secure-1PSIDTS: "..."})
+      secure_1psid: "YOUR_SECURE_1PSID_HERE"      # Gemini Secure 1PSID
+      secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"  # Gemini Secure 1PSIDTS
       proxy: null          # Optional proxy URL (null/empty means direct connection)
   timeout: 450             # Init timeout in seconds (Not less than 30s)
   watchdog_timeout: 90     # Watchdog timeout in seconds (Not less than 30s)
diff --git a/uv.lock b/uv.lock
index 250849f..b987a84 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post289"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#d7d034db881abde77b475a869aea7911423b6d21" }
+version = "0.0.post290"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#ee035ca48c998997a605d028a16f9eb595e9254d" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From f4b319c4818c2fc0c81905657c76fb0bae920813 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Mar 2026 10:07:29 +0700
Subject: [PATCH 209/236] Resolved content drift during streaming.

---
 app/server/chat.py | 194 +++++++++++++++++++++------------------------
 1 file changed, 89 insertions(+), 105 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 3e7154b..ee5d9cf 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1244,29 +1244,23 @@ def make_chunk(delta_content: dict) -> str:
             if final_chunk.thoughts:
                 f_thoughts = final_chunk.thoughts
                 ft_len, ct_len = len(f_thoughts), len(full_thoughts)
-                if ft_len >= ct_len and f_thoughts.startswith(full_thoughts):
-                    if ft_len > ct_len:
-                        drift_t = f_thoughts[ct_len:]
-                        full_thoughts = f_thoughts
-                        yield make_chunk(
-                            {"delta": {"reasoning_content": drift_t}, "finish_reason": None}
-                        )
-                else:
-                    logger.debug("Significant thoughts drift detected, preferring accumulated.")
+                if ft_len > ct_len and f_thoughts.startswith(full_thoughts):
+                    drift_t = f_thoughts[ct_len:]
+                    full_thoughts = f_thoughts
+                    yield make_chunk(
+                        {"delta": {"reasoning_content": drift_t}, "finish_reason": None}
+                    )
 
             if final_chunk.text:
                 f_text = final_chunk.text
                 f_len, c_len = len(f_text), len(full_text)
-                if f_len >= c_len and f_text.startswith(full_text):
-                    if f_len > c_len:
-                        drift = f_text[c_len:]
-                        full_text = f_text
-                        if visible_drift := suppressor.process(drift):
-                            yield make_chunk(
-                                {"delta": {"content": visible_drift}, "finish_reason": None}
-                            )
-                else:
-                    logger.debug("Significant text drift detected, preferring accumulated state.")
+                if f_len > c_len and f_text.startswith(full_text):
+                    drift = f_text[c_len:]
+                    full_text = f_text
+                    if visible_drift := suppressor.process(drift):
+                        yield make_chunk(
+                            {"delta": {"content": visible_drift}, "finish_reason": None}
+                        )
 
         if remaining_text := suppressor.flush():
             yield make_chunk({"delta": {"content": remaining_text}, "finish_reason": None})
@@ -1673,113 +1667,103 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
             if last.thoughts:
                 l_thoughts = last.thoughts
                 lt_len, ct_len = len(l_thoughts), len(full_thoughts)
-                if lt_len >= ct_len and l_thoughts.startswith(full_thoughts):
-                    if lt_len > ct_len:
-                        drift_t = l_thoughts[ct_len:]
-                        full_thoughts = l_thoughts
-                        if not thought_open:
-                            thought_index = next_output_index
-                            next_output_index += 1
-                            yield make_event(
-                                "response.output_item.added",
-                                {
-                                    **base_event,
-                                    "type": "response.output_item.added",
-                                    "output_index": thought_index,
-                                    "item": ResponseReasoningItem(
-                                        id=thought_item_id,
-                                        type="reasoning",
-                                        status="in_progress",
-                                        summary=[],
-                                    ).model_dump(mode="json"),
-                                },
-                            )
-                            yield make_event(
-                                "response.reasoning_summary_part.added",
-                                {
-                                    **base_event,
-                                    "type": "response.reasoning_summary_part.added",
-                                    "item_id": thought_item_id,
-                                    "output_index": thought_index,
-                                    "summary_index": 0,
-                                    "part": SummaryTextContent(text="").model_dump(mode="json"),
-                                },
-                            )
-                            thought_open = True
-
+                if lt_len > ct_len and l_thoughts.startswith(full_thoughts):
+                    drift_t = l_thoughts[ct_len:]
+                    full_thoughts = l_thoughts
+                    if not thought_open:
+                        thought_index = next_output_index
+                        next_output_index += 1
                         yield make_event(
-                            "response.reasoning_summary_text.delta",
+                            "response.output_item.added",
                             {
                                 **base_event,
-                                "type": "response.reasoning_summary_text.delta",
+                                "type": "response.output_item.added",
+                                "output_index": thought_index,
+                                "item": ResponseReasoningItem(
+                                    id=thought_item_id,
+                                    type="reasoning",
+                                    status="in_progress",
+                                    summary=[],
+                                ).model_dump(mode="json"),
+                            },
+                        )
+                        yield make_event(
+                            "response.reasoning_summary_part.added",
+                            {
+                                **base_event,
+                                "type": "response.reasoning_summary_part.added",
                                 "item_id": thought_item_id,
                                 "output_index": thought_index,
                                 "summary_index": 0,
-                                "delta": drift_t,
+                                "part": SummaryTextContent(text="").model_dump(mode="json"),
                             },
                         )
-                else:
-                    logger.debug(
-                        "Significant thoughts drift detected in Responses API, preferring accumulated."
+                        thought_open = True
+
+                    yield make_event(
+                        "response.reasoning_summary_text.delta",
+                        {
+                            **base_event,
+                            "type": "response.reasoning_summary_text.delta",
+                            "item_id": thought_item_id,
+                            "output_index": thought_index,
+                            "summary_index": 0,
+                            "delta": drift_t,
+                        },
                     )
 
             if last.text:
                 l_text = last.text
                 l_len, c_len = len(l_text), len(full_text)
-                if l_len >= c_len and l_text.startswith(full_text):
-                    if l_len > c_len:
-                        drift = l_text[c_len:]
-                        full_text = l_text
-                        if visible := suppressor.process(drift):
-                            if not message_open:
-                                message_index = next_output_index
-                                next_output_index += 1
-                                yield make_event(
-                                    "response.output_item.added",
-                                    {
-                                        **base_event,
-                                        "type": "response.output_item.added",
-                                        "output_index": message_index,
-                                        "item": ResponseOutputMessage(
-                                            id=message_item_id,
-                                            type="message",
-                                            status="in_progress",
-                                            role="assistant",
-                                            content=[],
-                                        ).model_dump(mode="json"),
-                                    },
-                                )
-                                yield make_event(
-                                    "response.content_part.added",
-                                    {
-                                        **base_event,
-                                        "type": "response.content_part.added",
-                                        "item_id": message_item_id,
-                                        "output_index": message_index,
-                                        "content_index": 0,
-                                        "part": ResponseOutputText(
-                                            type="output_text", text=""
-                                        ).model_dump(mode="json"),
-                                    },
-                                )
-                                message_open = True
-
+                if l_len > c_len and l_text.startswith(full_text):
+                    drift = l_text[c_len:]
+                    full_text = l_text
+                    if visible := suppressor.process(drift):
+                        if not message_open:
+                            message_index = next_output_index
+                            next_output_index += 1
                             yield make_event(
-                                "response.output_text.delta",
+                                "response.output_item.added",
                                 {
                                     **base_event,
-                                    "type": "response.output_text.delta",
+                                    "type": "response.output_item.added",
+                                    "output_index": message_index,
+                                    "item": ResponseOutputMessage(
+                                        id=message_item_id,
+                                        type="message",
+                                        status="in_progress",
+                                        role="assistant",
+                                        content=[],
+                                    ).model_dump(mode="json"),
+                                },
+                            )
+                            yield make_event(
+                                "response.content_part.added",
+                                {
+                                    **base_event,
+                                    "type": "response.content_part.added",
                                     "item_id": message_item_id,
                                     "output_index": message_index,
                                     "content_index": 0,
-                                    "delta": visible,
-                                    "logprobs": [],
+                                    "part": ResponseOutputText(
+                                        type="output_text", text=""
+                                    ).model_dump(mode="json"),
                                 },
                             )
-                else:
-                    logger.debug(
-                        "Significant text drift detected in Responses API, preferring accumulated."
-                    )
+                            message_open = True
+
+                        yield make_event(
+                            "response.output_text.delta",
+                            {
+                                **base_event,
+                                "type": "response.output_text.delta",
+                                "item_id": message_item_id,
+                                "output_index": message_index,
+                                "content_index": 0,
+                                "delta": visible,
+                                "logprobs": [],
+                            },
+                        )
 
         remaining = suppressor.flush()
         if remaining and message_open:

From 2d3cf205ff780fc66457afc58cf448542430faa2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 11 Mar 2026 12:09:19 +0700
Subject: [PATCH 210/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index b987a84..223d9b6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post290"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#ee035ca48c998997a605d028a16f9eb595e9254d" }
+version = "0.0.post291"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#00765f99cb95b0fbc8aaa668f84ebdf829fafe5a" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 00895cbe5f689668410f00b1437a5427212cda2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Mar 2026 14:35:14 +0700
Subject: [PATCH 211/236] Update regex patterns to prevent exposing system tags

---
 app/server/chat.py  | 14 +++++++++-----
 app/utils/helper.py | 44 +++++++++++++++++++-------------------------
 2 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ee5d9cf..a07a931 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -1079,12 +1079,16 @@ def process(self, chunk: str) -> str:
             match = STREAM_MASTER_RE.search(self.buffer)
             if not match:
                 tail_match = STREAM_TAIL_RE.search(self.buffer)
-                keep_len = len(tail_match.group(0)) if tail_match else 0
-                yield_len = len(self.buffer) - keep_len
-                if yield_len > 0:
+                if tail_match:
+                    yield_len = len(self.buffer) - len(tail_match.group(0))
+                    if yield_len > 0:
+                        if self._is_outputting():
+                            output.append(self.buffer[:yield_len])
+                        self.buffer = self.buffer[yield_len:]
+                else:
                     if self._is_outputting():
-                        output.append(self.buffer[:yield_len])
-                    self.buffer = self.buffer[yield_len:]
+                        output.append(self.buffer)
+                    self.buffer = ""
                 break
 
             start, end = match.span()
diff --git a/app/utils/helper.py b/app/utils/helper.py
index 3b1d90f..e7c0816 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -36,36 +36,32 @@
     "CRITICAL: Do NOT mix natural language with protocol tags. Either respond naturally OR provide the protocol block alone. There is no middle ground.\n"
 )
 TOOL_BLOCK_RE = re.compile(
-    r"\\?\[\s*ToolCalls\s*\\?]\s*(.*?)\s*\\?\[\s*\\?/\s*ToolCalls\s*\\?]",
+    r"\\?\[ToolCalls\\?](.*?)\\?\[\\?/ToolCalls\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TOOL_CALL_RE = re.compile(
-    r"\\?\[\s*Call\s*\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\s*\\?/\s*Call\s*\\?]",
+    r"\\?\[Call\\?:(?P<name>[^]]+)\\?](?P<body>.*?)\\?\[\\?/Call\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_BLOCK_RE = re.compile(
-    r"\\?\[\s*ToolResults\s*\\?]\s*(.*?)\s*\\?\[\s*\\?/\s*ToolResults\s*\\?]",
+    r"\\?\[ToolResults\\?](.*?)\\?\[\\?/ToolResults\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 RESPONSE_ITEM_RE = re.compile(
-    r"\\?\[\s*Result\s*\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\s*\\?/\s*Result\s*\\?]",
+    r"\\?\[Result\\?:(?P<name>[^]]+)\\?](?P<body>.*?)\\?\[\\?/Result\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_ARG_RE = re.compile(
-    r"\\?\[\s*CallParameter\s*\\?:\s*(?P<name>(?:[^]\\]|\\.)+)\s*\\?]\s*(?P<body>.*?)\s*\\?\[\s*\\?/\s*CallParameter\s*\\?]",
+    r"\\?\[CallParameter\\?:(?P<name>[^]]+)\\?](?P<body>.*?)\\?\[\\?/CallParameter\\?]",
     re.DOTALL | re.IGNORECASE,
 )
 TAGGED_RESULT_RE = re.compile(
-    r"\\?\[\s*ToolResult\s*\\?]\s*(.*?)\s*\\?\[\s*\\?/\s*ToolResult\s*\\?]",
+    r"\\?\[ToolResult\\?](.*?)\\?\[\\?/ToolResult\\?]",
     re.DOTALL | re.IGNORECASE,
 )
-CONTROL_TOKEN_RE = re.compile(
-    r"\\?\s*<\s*\\?\|\s*im\s*\\?_(?:start|end)\s*\\?\|\s*>\s*", re.IGNORECASE
-)
-CHATML_START_RE = re.compile(
-    r"\\?\s*<\s*\\?\|\s*im\s*\\?_start\s*\\?\|\s*>\s*(\w+)\s*\n?", re.IGNORECASE
-)
-CHATML_END_RE = re.compile(r"\\?\s*<\s*\\?\|\s*im\s*\\?_end\s*\\?\|\s*>\s*", re.IGNORECASE)
+CONTROL_TOKEN_RE = re.compile(r"\\?<\\?\|im\\?_(?:start|end)\\?\|\\?>", re.IGNORECASE)
+CHATML_START_RE = re.compile(r"\\?<\\?\|im\\?_start\\?\|\\?>(\w+)\n?", re.IGNORECASE)
+CHATML_END_RE = re.compile(r"\\?<\\?\|im\\?_end\\?\|\\?>", re.IGNORECASE)
 COMMONMARK_UNESCAPE_RE = re.compile(r"\\([!\"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~])")
 PARAM_FENCE_RE = re.compile(r"^(?P<fence>`{3,})")
 TOOL_HINT_STRIPPED = TOOL_WRAP_HINT.strip()
@@ -89,19 +85,17 @@
 
 # --- Streaming Specific Patterns ---
 _START_PATTERNS = {
-    "TOOL": r"\\?\[\s*ToolCalls\s*\\?\]",
-    "ORPHAN": r"\\?\[\s*Call\s*\\?:\s*(?:[^\]\\]|\\.)+\s*\\?\]",
-    "RESP": r"\\?\[\s*ToolResults\s*\\?\]",
-    "ARG": r"\\?\[\s*CallParameter\s*\\?:\s*(?:[^\]\\]|\\.)+\s*\\?\]",
-    "RESULT": r"\\?\[\s*ToolResult\s*\\?\]",
-    "ITEM": r"\\?\[\s*Result\s*\\?:\s*(?:[^\]\\]|\\.)+\s*\\?\]",
-    "TAG": r"\\?\s*<\s*\\?\|\s*im\s*\\?_start\s*\\?\|\s*>",
+    "TOOL": r"\\?\[ToolCalls\\?]",
+    "ORPHAN": r"\\?\[Call\\?:[^]]+\\?]",
+    "RESP": r"\\?\[ToolResults\\?]",
+    "ARG": r"\\?\[CallParameter\\?:[^]]+\\?]",
+    "RESULT": r"\\?\[ToolResult\\?]",
+    "ITEM": r"\\?\[Result\\?:[^]]+\\?]",
+    "TAG": r"\\?<\\?\|im\\?_start\\?\|\\?>",
 }
 
-_PROTOCOL_ENDS = (
-    r"\\?\[\s*\\?/\s*(?:ToolCalls|Call|ToolResults|CallParameter|ToolResult|Result)\s*\\?\]"
-)
-_TAG_END = r"\\?\s*<\s*\\?\|\s*im\s*\\?_end\s*\\?\|\s*>"
+_PROTOCOL_ENDS = r"\\?\[\\?/(?:ToolCalls|Call|ToolResults|CallParameter|ToolResult|Result)\\?]"
+_TAG_END = r"\\?<\\?\|im\\?_end\\?\|\\?>"
 
 if TOOL_HINT_START_ESC and TOOL_HINT_END_ESC:
     _START_PATTERNS["HINT"] = rf"\n?{TOOL_HINT_START_ESC}:?\s*"
@@ -115,7 +109,7 @@
 
 STREAM_MASTER_RE = re.compile("|".join(_master_parts), re.IGNORECASE)
 STREAM_TAIL_RE = re.compile(
-    r"(?:\\|\\?\[[TCRP/]?\s*[^]]*|\\?\s*<\s*\\?\|?\s*i?\s*m?\s*\\?_?(?:s?t?a?r?t?|e?n?d?)\s*\\?\|?\s*>?|)$",
+    r"(?:\\|\\?\[[^]]*|\\?<\\?\|?i?m?\\?_?(?:s?t?a?r?t?|e?n?d?)\\?\|?\\?>?)$",
     re.IGNORECASE,
 )
 

From f1de210d0a7143bc3d711b6ceb87c9bfe20b8f86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 12 Mar 2026 22:38:20 +0700
Subject: [PATCH 212/236] Remove the unused <think> tag and clean up the
 redundant `extract_output` helper.

---
 app/server/chat.py     | 41 ++++++++++++++++-------------------------
 app/services/client.py | 15 +--------------
 uv.lock                |  4 ++--
 3 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index a07a931..0722c49 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -72,6 +72,7 @@
     estimate_tokens,
     extract_image_dimensions,
     extract_tool_calls,
+    normalize_llm_text,
     remove_tool_call_blocks,
     strip_system_hints,
     text_from_message,
@@ -1270,7 +1271,9 @@ def make_chunk(delta_content: dict) -> str:
             yield make_chunk({"delta": {"content": remaining_text}, "finish_reason": None})
 
         _, _, storage_output, detected_tool_calls = _process_llm_output(
-            full_thoughts, full_text, structured_requirement
+            normalize_llm_text(full_thoughts or ""),
+            normalize_llm_text(full_text or ""),
+            structured_requirement,
         )
 
         seen_hashes = {}
@@ -1823,7 +1826,9 @@ async def _make_async_gen(item: ModelOutput) -> AsyncGenerator[ModelOutput]:
             )
 
         _, assistant_text, storage_output, detected_tool_calls = _process_llm_output(
-            full_thoughts, full_text, structured_requirement
+            normalize_llm_text(full_thoughts or ""),
+            normalize_llm_text(full_text or ""),
+            structured_requirement,
         )
 
         image_items = []
@@ -2262,17 +2267,10 @@ async def create_chat_completion(
 
     assert isinstance(resp_or_stream, ModelOutput)
 
-    try:
-        thoughts = resp_or_stream.thoughts
-        raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
-    except Exception as exc:
-        logger.error(f"Gemini output parsing failed: {exc}")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
-        ) from exc
-
     thoughts, visible_output, storage_output, tool_calls = _process_llm_output(
-        thoughts, raw_clean, structured_requirement
+        normalize_llm_text(resp_or_stream.thoughts or ""),
+        normalize_llm_text(resp_or_stream.text or ""),
+        structured_requirement,
     )
 
     images = resp_or_stream.images or []
@@ -2412,8 +2410,8 @@ async def create_response(
 ):
     base_url = str(raw_request.base_url)
     base_messages = _convert_responses_to_app_messages(request.input)
-    struct_req = _build_structured_requirement(request.response_format)
-    extra_instr = [struct_req.instruction] if struct_req else []
+    structured_requirement = _build_structured_requirement(request.response_format)
+    extra_instr = [structured_requirement.instruction] if structured_requirement else []
 
     standard_tools, image_tools = [], []
     if request.tools:
@@ -2509,22 +2507,15 @@ async def create_response(
             session,
             request,
             base_url,
-            struct_req,
+            structured_requirement,
         )
 
     assert isinstance(resp_or_stream, ModelOutput)
 
-    try:
-        thoughts = resp_or_stream.thoughts
-        raw_clean = GeminiClientWrapper.extract_output(resp_or_stream, include_thoughts=False)
-    except Exception as exc:
-        logger.error(f"Gemini parsing failed: {exc}")
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY, detail="Malformed response."
-        ) from exc
-
     thoughts, assistant_text, storage_output, tool_calls = _process_llm_output(
-        thoughts, raw_clean, struct_req
+        normalize_llm_text(resp_or_stream.thoughts or ""),
+        normalize_llm_text(resp_or_stream.text or ""),
+        structured_requirement,
     )
     images = resp_or_stream.images or []
     if (
diff --git a/app/services/client.py b/app/services/client.py
index e6638ab..b9ba25e 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -3,14 +3,13 @@
 from typing import Any
 
 import orjson
-from gemini_webapi import GeminiClient, ModelOutput
+from gemini_webapi import GeminiClient
 from loguru import logger
 
 from app.models import AppMessage
 from app.utils import g_config
 from app.utils.helper import (
     add_tag,
-    normalize_llm_text,
     save_file_to_tempfile,
     save_url_to_tempfile,
 )
@@ -173,15 +172,3 @@ async def process_conversation(
 
         conversation.append(add_tag("assistant", "", unclose=True))
         return "\n".join(conversation), files
-
-    @staticmethod
-    def extract_output(response: ModelOutput, include_thoughts: bool = True) -> str:
-        text = ""
-        if include_thoughts and response.thoughts:
-            text += f"<think>{response.thoughts}</think>\n"
-        if response.text:
-            text += response.text
-        else:
-            text += str(response)
-
-        return normalize_llm_text(text)
diff --git a/uv.lock b/uv.lock
index 223d9b6..d62dfe3 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post291"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#00765f99cb95b0fbc8aaa668f84ebdf829fafe5a" }
+version = "0.0.post292"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#ea90f9e903ca4f624ff6e94b200735922dfbc08f" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 370f104d3ce70e3484599912d8aeb879e1b51c2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Mar 2026 09:50:40 +0700
Subject: [PATCH 213/236] Clean up the redundant code and update dependencies
 to latest versions

---
 app/server/chat.py  |   5 +-
 app/utils/config.py |  16 ++-----
 app/utils/helper.py |   2 +-
 pyproject.toml      |   6 +--
 uv.lock             | 109 ++++++++++++++++++++++----------------------
 5 files changed, 62 insertions(+), 76 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 0722c49..3190b19 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -73,7 +73,6 @@
     extract_image_dimensions,
     extract_tool_calls,
     normalize_llm_text,
-    remove_tool_call_blocks,
     strip_system_hints,
     text_from_message,
 )
@@ -357,9 +356,7 @@ def _process_llm_output(
         logger.debug(f"Detected {len(tool_calls)} tool call(s) in model output.")
 
     visible_output = visible_output.strip()
-
-    storage_output = remove_tool_call_blocks(raw_text)
-    storage_output = storage_output.strip()
+    storage_output = visible_output
 
     if structured_requirement and visible_output:
         try:
diff --git a/app/utils/config.py b/app/utils/config.py
index 3a4ade9..e569fbf 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -1,7 +1,7 @@
 import ast
 import os
 import sys
-from typing import Any, Literal
+from typing import Any, Literal, cast
 
 import orjson
 from loguru import logger
@@ -250,7 +250,7 @@ def extract_gemini_clients_env() -> dict[int, dict[str, Any]]:
             idx = int(index_str)
             env_overrides.setdefault(idx, {})[field] = v
             to_delete.append(k)
-    # Remove these environment variables to avoid Pydantic parsing errors
+
     for k in to_delete:
         del os.environ[k]
     return env_overrides
@@ -306,9 +306,8 @@ def extract_gemini_models_env() -> dict[int, dict[str, Any]]:
         if parsed_successfully and isinstance(models_list, list):
             for idx, model_data in enumerate(models_list):
                 if isinstance(model_data, dict):
-                    env_overrides[idx] = model_data
+                    env_overrides[idx] = cast(dict[str, Any], model_data)
 
-            # Remove the environment variable to avoid Pydantic parsing errors
             del os.environ[root_key]
 
     return env_overrides
@@ -328,12 +327,10 @@ def _merge_models_with_env(
     for idx in sorted(env_overrides):
         overrides = env_overrides[idx]
         if idx < len(result_models):
-            # Update existing model: overwrite fields found in env
             model_dict = result_models[idx].model_dump()
             model_dict.update(overrides)
             result_models[idx] = GeminiModelConfig(**model_dict)
         elif idx == len(result_models):
-            # Append new models
             new_model = GeminiModelConfig(**overrides)
             result_models.append(new_model)
         else:
@@ -352,20 +349,13 @@ def initialize_config() -> Config:
         Config: Configuration object
     """
     try:
-        # First, extract and remove Gemini clients related environment variables
         env_clients_overrides = extract_gemini_clients_env()
-        # Extract and remove Gemini models related environment variables
         env_models_overrides = extract_gemini_models_env()
-
-        # Then, initialize Config with pydantic_settings
         config = Config()
 
-        # Synthesize clients
         config.gemini.clients = _merge_clients_with_env(
             config.gemini.clients, env_clients_overrides
         )
-
-        # Synthesize models
         config.gemini.models = _merge_models_with_env(config.gemini.models, env_models_overrides)
 
         return config
diff --git a/app/utils/helper.py b/app/utils/helper.py
index e7c0816..a2d8471 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -33,7 +33,7 @@
     "[/CallParameter]\n"
     "[/Call]\n"
     "[/ToolCalls]\n\n"
-    "CRITICAL: Do NOT mix natural language with protocol tags. Either respond naturally OR provide the protocol block alone. There is no middle ground.\n"
+    "CRITICAL: Do NOT mix natural language with protocol tags. Either respond naturally OR provide the protocol block alone. There is no middle ground."
 )
 TOOL_BLOCK_RE = re.compile(
     r"\\?\[ToolCalls\\?](.*?)\\?\[\\?/ToolCalls\\?]",
diff --git a/pyproject.toml b/pyproject.toml
index 3afe92a..d5b96ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ dependencies = [
     "fastapi>=0.135.1",
     "gemini-webapi>=1.21.0",
     "httptools>=0.7.1",
-    "lmdb>=1.7.5",
+    "lmdb>=1.8.1",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
     "pydantic-settings[yaml]>=2.13.1",
@@ -23,8 +23,8 @@ Repository = "https://github.com/Nativu5/Gemini-FastAPI"
 [project.optional-dependencies]
 dev = [
     "pytest>=9.0.2",
-    "ruff>=0.15.5",
-    "ty>=0.0.21",
+    "ruff>=0.15.6",
+    "ty>=0.0.22",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index d62dfe3..d1f1f66 100644
--- a/uv.lock
+++ b/uv.lock
@@ -159,13 +159,13 @@ requires-dist = [
     { name = "fastapi", specifier = ">=0.135.1" },
     { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi" },
     { name = "httptools", specifier = ">=0.7.1" },
-    { name = "lmdb", specifier = ">=1.7.5" },
+    { name = "lmdb", specifier = ">=1.8.1" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.5" },
-    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.21" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.6" },
+    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.22" },
     { name = "uvicorn", specifier = ">=0.41.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post292"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#ea90f9e903ca4f624ff6e94b200735922dfbc08f" }
+version = "0.0.post293"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#be8c783bbf89c94fbcebb76a3f556d1ad830bb12" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },
@@ -229,17 +229,16 @@ wheels = [
 
 [[package]]
 name = "lmdb"
-version = "1.7.5"
+version = "1.8.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c7/a3/3756f2c6adba4a1413dba55e6c81a20b38a868656517308533e33cb59e1c/lmdb-1.7.5.tar.gz", hash = "sha256:f0604751762cb097059d5412444c4057b95f386c7ed958363cf63f453e5108da", size = 883490, upload-time = "2025-10-15T03:39:44.038Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/23/19/392f028e7ebcc1cc8212fe8a315a909b7a556278456f0bab9234d3a3b665/lmdb-1.8.1.tar.gz", hash = "sha256:44ef24033929e9cc227a7e17287473c452b462d716f118db885c667c80f57429", size = 886349, upload-time = "2026-03-12T23:21:48.42Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/38/f8/03275084218eacdbdf7e185d693e1db4cb79c35d18fac47fa0d388522a0d/lmdb-1.7.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:66ae02fa6179e46bb69fe446b7e956afe8706ae17ec1d4cd9f7056e161019156", size = 101508, upload-time = "2025-10-15T03:39:07.228Z" },
-    { url = "https://files.pythonhosted.org/packages/20/b9/bc33ae2e4940359ba2fc412e6a755a2f126bc5062b4aaf35edd3a791f9a5/lmdb-1.7.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bf65c573311ac8330c7908257f76b28ae3576020123400a81a6b650990dc028c", size = 100105, upload-time = "2025-10-15T03:39:08.491Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/f6/22f84b776a64d3992f052ecb637c35f1764a39df4f2190ecc5a3a1295bd7/lmdb-1.7.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97bcb3fc12841a8828db918e494fe0fd016a73d2680ad830d75719bb3bf4e76a", size = 301500, upload-time = "2025-10-15T03:39:09.463Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/4d/8e6be8d7d5a30d47fa0ce4b55e3a8050ad689556e6e979d206b4ac67b733/lmdb-1.7.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:865f374f6206ab4aacb92ffb1dc612ee1a31a421db7c89733abe06b81ac87cb0", size = 302285, upload-time = "2025-10-15T03:39:10.856Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/dc/7e04fb31a8f88951db81ac677e3ccb3e09248eda40e6ad52f74fd9370c32/lmdb-1.7.5-cp313-cp313-win_amd64.whl", hash = "sha256:82a04d5ca2a6a799c8db7f209354c48aebb49ff338530f5813721fc4c68e4450", size = 99447, upload-time = "2025-10-15T03:39:12.151Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/50/e3f97efab17b3fad4afde99b3c957ecac4ffbefada6874a57ad0c695660a/lmdb-1.7.5-cp313-cp313-win_arm64.whl", hash = "sha256:0ad85a15acbfe8a42fdef92ee5e869610286d38507e976755f211be0fc905ca7", size = 94145, upload-time = "2025-10-15T03:39:13.461Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/2c/982cb5afed533d0cb8038232b40c19b5b85a2d887dec74dfd39e8351ef4b/lmdb-1.7.5-py3-none-any.whl", hash = "sha256:fc344bb8bc0786c87c4ccb19b31f09a38c08bd159ada6f037d669426fea06f03", size = 148539, upload-time = "2025-10-15T03:39:42.982Z" },
+    { url = "https://files.pythonhosted.org/packages/75/5f/ec2fe6bb0986fae28db80292ffe5146ed7cc4d478d683d67050d2691a538/lmdb-1.8.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8b197007a5762411b7484e533ed1d03dca1b8ba1eee390233ac6e62ff45bd417", size = 101865, upload-time = "2026-03-12T23:21:12.902Z" },
+    { url = "https://files.pythonhosted.org/packages/75/35/1e43bba9658292c2ac787f5d003baa5ed37cad1b52666526edf4c908fb7d/lmdb-1.8.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a9454649e62bc6f4a45f9aed175fcdd6fc2e91922bf970fd561053c616281d0a", size = 100462, upload-time = "2026-03-12T23:21:14.369Z" },
+    { url = "https://files.pythonhosted.org/packages/64/8e/2b1a0caa42b6f980a8b8663a272b9a52d4fd51ef0ca36cdec768cea02978/lmdb-1.8.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a51881d284116d82ead233b20ae7f6dbec8624dd7b0593a755c84e0d0bc4cc29", size = 303313, upload-time = "2026-03-12T23:21:15.636Z" },
+    { url = "https://files.pythonhosted.org/packages/40/51/8061694cf7b883d2a166965cbaa961ea1ce692ce1782ac58091b5aa0fdb5/lmdb-1.8.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04c5472bfc38377a2b32ae3b494d82d9c8db7c64e9053ca1b7c86aa862ebaaf9", size = 303895, upload-time = "2026-03-12T23:21:16.769Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/0f/c189c40d833ecd64f3e375a0bb02378110fc958916053ee687ec2c7d5079/lmdb-1.8.1-cp313-cp313-win_amd64.whl", hash = "sha256:fe0e34b2b20f47a108c3e04b397d1e27f080a7b0256c33efb5aef7bd1bccb923", size = 99707, upload-time = "2026-03-12T23:21:17.862Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/c3/3c87bede5b62163b768e6a4bca893f59d3996cb6fc4052bfd67847c0efd7/lmdb-1.8.1-cp313-cp313-win_arm64.whl", hash = "sha256:c3550849cdbaf0ead6265cb5b10134b223c2cede7ce7a1f3390a55975e3a06d4", size = 94605, upload-time = "2026-03-12T23:21:19.102Z" },
 ]
 
 [[package]]
@@ -418,27 +417,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.5"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" },
-    { url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" },
-    { url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" },
-    { url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" },
-    { url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" },
-    { url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" },
-    { url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" },
+version = "0.15.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/df/f8629c19c5318601d3121e230f74cbee7a3732339c52b21daa2b82ef9c7d/ruff-0.15.6.tar.gz", hash = "sha256:8394c7bb153a4e3811a4ecdacd4a8e6a4fa8097028119160dffecdcdf9b56ae4", size = 4597916, upload-time = "2026-03-12T23:05:47.51Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/2f/4e03a7e5ce99b517e98d3b4951f411de2b0fa8348d39cf446671adcce9a2/ruff-0.15.6-py3-none-linux_armv6l.whl", hash = "sha256:7c98c3b16407b2cf3d0f2b80c80187384bc92c6774d85fefa913ecd941256fff", size = 10508953, upload-time = "2026-03-12T23:05:17.246Z" },
+    { url = "https://files.pythonhosted.org/packages/70/60/55bcdc3e9f80bcf39edf0cd272da6fa511a3d94d5a0dd9e0adf76ceebdb4/ruff-0.15.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ee7dcfaad8b282a284df4aa6ddc2741b3f4a18b0555d626805555a820ea181c3", size = 10942257, upload-time = "2026-03-12T23:05:23.076Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/f9/005c29bd1726c0f492bfa215e95154cf480574140cb5f867c797c18c790b/ruff-0.15.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3bd9967851a25f038fc8b9ae88a7fbd1b609f30349231dffaa37b6804923c4bb", size = 10322683, upload-time = "2026-03-12T23:05:33.738Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/74/2f861f5fd7cbb2146bddb5501450300ce41562da36d21868c69b7a828169/ruff-0.15.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13f4594b04e42cd24a41da653886b04d2ff87adbf57497ed4f728b0e8a4866f8", size = 10660986, upload-time = "2026-03-12T23:05:53.245Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/a1/309f2364a424eccb763cdafc49df843c282609f47fe53aa83f38272389e0/ruff-0.15.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2ed8aea2f3fe57886d3f00ea5b8aae5bf68d5e195f487f037a955ff9fbaac9e", size = 10332177, upload-time = "2026-03-12T23:05:56.145Z" },
+    { url = "https://files.pythonhosted.org/packages/30/41/7ebf1d32658b4bab20f8ac80972fb19cd4e2c6b78552be263a680edc55ac/ruff-0.15.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70789d3e7830b848b548aae96766431c0dc01a6c78c13381f423bf7076c66d15", size = 11170783, upload-time = "2026-03-12T23:06:01.742Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/6d488f6adca047df82cd62c304638bcb00821c36bd4881cfca221561fdfc/ruff-0.15.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:542aaf1de3154cea088ced5a819ce872611256ffe2498e750bbae5247a8114e9", size = 12044201, upload-time = "2026-03-12T23:05:28.697Z" },
+    { url = "https://files.pythonhosted.org/packages/71/68/e6f125df4af7e6d0b498f8d373274794bc5156b324e8ab4bf5c1b4fc0ec7/ruff-0.15.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c22e6f02c16cfac3888aa636e9eba857254d15bbacc9906c9689fdecb1953ab", size = 11421561, upload-time = "2026-03-12T23:05:31.236Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/9f/f85ef5fd01a52e0b472b26dc1b4bd228b8f6f0435975442ffa4741278703/ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98893c4c0aadc8e448cfa315bd0cc343a5323d740fe5f28ef8a3f9e21b381f7e", size = 11310928, upload-time = "2026-03-12T23:05:45.288Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/26/b75f8c421f5654304b89471ed384ae8c7f42b4dff58fa6ce1626d7f2b59a/ruff-0.15.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:70d263770d234912374493e8cc1e7385c5d49376e41dfa51c5c3453169dc581c", size = 11235186, upload-time = "2026-03-12T23:05:50.677Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/d4/d5a6d065962ff7a68a86c9b4f5500f7d101a0792078de636526c0edd40da/ruff-0.15.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:55a1ad63c5a6e54b1f21b7514dfadc0c7fb40093fa22e95143cf3f64ebdcd512", size = 10635231, upload-time = "2026-03-12T23:05:37.044Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/56/7c3acf3d50910375349016cf33de24be021532042afbed87942858992491/ruff-0.15.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8dc473ba093c5ec238bb1e7429ee676dca24643c471e11fbaa8a857925b061c0", size = 10340357, upload-time = "2026-03-12T23:06:04.748Z" },
+    { url = "https://files.pythonhosted.org/packages/06/54/6faa39e9c1033ff6a3b6e76b5df536931cd30caf64988e112bbf91ef5ce5/ruff-0.15.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:85b042377c2a5561131767974617006f99f7e13c63c111b998f29fc1e58a4cfb", size = 10860583, upload-time = "2026-03-12T23:05:58.978Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/509a201b843b4dfb0b32acdedf68d951d3377988cae43949ba4c4133a96a/ruff-0.15.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cef49e30bc5a86a6a92098a7fbf6e467a234d90b63305d6f3ec01225a9d092e0", size = 11410976, upload-time = "2026-03-12T23:05:39.955Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/25/3fc9114abf979a41673ce877c08016f8e660ad6cf508c3957f537d2e9fa9/ruff-0.15.6-py3-none-win32.whl", hash = "sha256:bbf67d39832404812a2d23020dda68fee7f18ce15654e96fb1d3ad21a5fe436c", size = 10616872, upload-time = "2026-03-12T23:05:42.451Z" },
+    { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" },
 ]
 
 [[package]]
@@ -455,26 +454,26 @@ wheels = [
 
 [[package]]
 name = "ty"
-version = "0.0.21"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ee/20/2ba8fd9493c89c41dfe9dbb73bc70a28b28028463bc0d2897ba8be36230a/ty-0.0.21.tar.gz", hash = "sha256:a4c2ba5d67d64df8fcdefd8b280ac1149d24a73dbda82fa953a0dff9d21400ed", size = 5297967, upload-time = "2026-03-06T01:57:13.809Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/36/70/edf38bb37517531681d1c37f5df64744e5ad02673c02eb48447eae4bea08/ty-0.0.21-py3-none-linux_armv6l.whl", hash = "sha256:7bdf2f572378de78e1f388d24691c89db51b7caf07cf90f2bfcc1d6b18b70a76", size = 10299222, upload-time = "2026-03-06T01:57:16.64Z" },
-    { url = "https://files.pythonhosted.org/packages/72/62/0047b0bd19afeefbc7286f20a5f78a2aa39f92b4d89853f0d7185ab89edc/ty-0.0.21-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7e9613994610431ab8625025bd2880dbcb77c5c9fabdd21134cda12d840a529d", size = 10130513, upload-time = "2026-03-06T01:57:29.93Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/20/0b93a9e91aaed23155780258cdfdb4726ef68b6985378ac069bc427291a0/ty-0.0.21-py3-none-macosx_11_0_arm64.whl", hash = "sha256:56d3b198b64dd0a19b2b66e257deaed2ecea568e722ae5352f3c6fb62027f89d", size = 9605425, upload-time = "2026-03-06T01:57:27.115Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/fd/9945e2fa2996a1287b1e1d7ce050e97e1f420233b271e770934bfa0880a0/ty-0.0.21-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d23d2c34f7a77d974bb08f0860ef700addc8a683d81a0319f71c08f87506cfd0", size = 10108298, upload-time = "2026-03-06T01:57:35.429Z" },
-    { url = "https://files.pythonhosted.org/packages/52/e7/4ec52fcb15f3200826c9f048472c062549a05b0d1ef0b51f32d527b513c4/ty-0.0.21-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56b01fd2519637a4ca88344f61c96225f540c98ff18bca321d4eaa7bb0f7aa2f", size = 10121556, upload-time = "2026-03-06T01:57:03.242Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/c0/ad457be2a8abea0f25549598bd098554540ced66229488daa0d558dad3c8/ty-0.0.21-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9de7e11c63c6afc40f3e9ba716374add171aee7fabc70b5146a510705c6d41b", size = 10603264, upload-time = "2026-03-06T01:56:52.134Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/5b/2ecc7a2175243a4bcb72f5298ae41feabbb93b764bb0dc45722f3752c2c2/ty-0.0.21-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62f7f5b235c4f7876db305c36997aea07b7af29b1a068f373d0e2547e25f32ff", size = 11196428, upload-time = "2026-03-06T01:57:32.94Z" },
-    { url = "https://files.pythonhosted.org/packages/37/f5/aff507d6a901f328ef96a298032b0c11aaaf950a146ed7dd3b5bf2cd3acf/ty-0.0.21-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee8399f7c453a425291e6688efe430cfae7ab0ac4ffd50eba9f872bf878b54f6", size = 10866355, upload-time = "2026-03-06T01:56:57.831Z" },
-    { url = "https://files.pythonhosted.org/packages/be/30/822bbcb92d55b65989aa7ed06d9585f28ade9c9447369194ed4b0fb3b5b9/ty-0.0.21-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210e7568c9f886c4d01308d751949ee714ad7ad9d7d928d2ba90d329dd880367", size = 10738177, upload-time = "2026-03-06T01:57:11.256Z" },
-    { url = "https://files.pythonhosted.org/packages/57/cc/46e7991b6469e93ac2c7e533a028983e402485580150ac864c56352a3a82/ty-0.0.21-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:53508e345b11569f78b21ba8e2b4e61df38a9754947fb3cd9f2ef574367338fb", size = 10079158, upload-time = "2026-03-06T01:57:00.516Z" },
-    { url = "https://files.pythonhosted.org/packages/15/c2/0bbdadfbd008240f8f1a87dc877433cb3884436097926107ccf06e618199/ty-0.0.21-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:553e43571f4a35604c36cfd07d8b61a5eb7a714e3c67f8c4ff2cf674fefbaef9", size = 10150535, upload-time = "2026-03-06T01:57:08.815Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/b5/2dbdb7b57b5362200ef0a39738ebd31331726328336def0143ac097ee59d/ty-0.0.21-py3-none-musllinux_1_2_i686.whl", hash = "sha256:666f6822e3b9200abfa7e95eb0ddd576460adb8d66b550c0ad2c70abc84a2048", size = 10319803, upload-time = "2026-03-06T01:57:19.106Z" },
-    { url = "https://files.pythonhosted.org/packages/72/84/70e52c0b7abc7c2086f9876ef454a73b161d3125315536d8d7e911c94ca4/ty-0.0.21-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a0854d008347ce4a5fb351af132f660a390ab2a1163444d075251d43e6f74b9b", size = 10826239, upload-time = "2026-03-06T01:57:21.727Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/8a/1f72480fd013bbc6cd1929002abbbcde9a0b08ead6a15154de9d7f7fa37e/ty-0.0.21-py3-none-win32.whl", hash = "sha256:bef3ab4c7b966bcc276a8ac6c11b63ba222d21355b48d471ea782c4104eee4e0", size = 9693196, upload-time = "2026-03-06T01:57:24.126Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/f8/1104808b875c26c640e536945753a78562d606bef4e241d9dbf3d92477f6/ty-0.0.21-py3-none-win_amd64.whl", hash = "sha256:a709d576e5bea84b745d43058d8b9cd4f27f74a0b24acb4b0cbb7d3d41e0d050", size = 10668660, upload-time = "2026-03-06T01:56:55.06Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/b8/25e0adc404bbf986977657b25318991f93097b49f8aea640d93c0b0db68e/ty-0.0.21-py3-none-win_arm64.whl", hash = "sha256:f72047996598ac20553fb7e21ba5741e3c82dee4e9eadf10d954551a5fe09391", size = 10104161, upload-time = "2026-03-06T01:57:06.072Z" },
+version = "0.0.22"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f5/ee/b73c99daf598ae66a2d5d3ba6de7729d2152ab732dee7ccb8ab9446cc6d7/ty-0.0.22.tar.gz", hash = "sha256:391fc4d3a543950341b750d7f4aa94866a73e7cdbf3e9e4e4e8cfc8b7bef4f10", size = 5333861, upload-time = "2026-03-12T17:40:30.052Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d7/f7/078f554f612723597f76cc6af70da4daed63ed721241a3f60788259c9adf/ty-0.0.22-py3-none-linux_armv6l.whl", hash = "sha256:03d37220d81016cb9d2a9c9ec11704d84f2df838f1dbf1296d91ea7fba57f8b5", size = 10328232, upload-time = "2026-03-12T17:40:19.402Z" },
+    { url = "https://files.pythonhosted.org/packages/90/0b/4cfe84485d1b20bb50cdbc990f6e66b8c50cff569c7544adf0805b57ddb9/ty-0.0.22-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3249c65b24829a312cd5cbf722ff5551ffe17b0a9781a8a372ca037d23aa1c71", size = 10148554, upload-time = "2026-03-12T17:40:25.586Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/7e/df31baf70d63880c9719d2cc8403b0b99c3c0d0f68f390a1109d9b231933/ty-0.0.22-py3-none-macosx_11_0_arm64.whl", hash = "sha256:470778f4335f1660f017fe2970afb7e4ce4f8b608795b19406976b8902b221a5", size = 9627910, upload-time = "2026-03-12T17:40:17.447Z" },
+    { url = "https://files.pythonhosted.org/packages/99/0f/a418bcca9c87083533d6c73b65e56c6ade26b8d76a7558b3d3cc0f0eb52a/ty-0.0.22-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75649b04b84ace92cb5c6e27013247f220f58a9a30b30eb2301992814deea0c4", size = 10155025, upload-time = "2026-03-12T17:40:21.344Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/3d/1974c567a58f369602065409d9109c0a81f5abbf1ae552433a89d07141a9/ty-0.0.22-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc270f2344210cbed7d965ddeade61ffa81d93dffcdc0fded3540dccb860a9e1", size = 10133614, upload-time = "2026-03-12T17:40:23.549Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/c1/2da9e27c79a1fe9209589a73c989e416a7380bd77dcdf22960b3d30252bf/ty-0.0.22-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:548215b226f9726ea4d9beb77055363a8a398eb42809f042895f7a285afcb538", size = 10647101, upload-time = "2026-03-12T17:40:15.569Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/93/4e12c2f0ec792fd4ab9c9f70e59465d09345a453ebedb67d3bf99fd75a71/ty-0.0.22-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed0bd1d34eba800b82ebee65269a85a9bbb2a325237e4baaf1413223f69e1899", size = 11231886, upload-time = "2026-03-12T17:40:06.875Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/9f/c255a078e4f2ce135497fffa4a5d3a122e4c49a00416fb78d72d7b79e119/ty-0.0.22-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbd429a31507da9a1b0873a21215113c42cc683aa5fba96c978794485db5560a", size = 10901527, upload-time = "2026-03-12T17:40:34.429Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/0d/d1bdee7e16d978ea929837fb03463efc116ee8ad05d215a5efd5d80e56d3/ty-0.0.22-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7eb85a437b3be817796e7c0f84243611de53c7d4ea102a0dca179debfe7cec0", size = 10726505, upload-time = "2026-03-12T17:40:36.342Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/d4/6548d2a353f794582ec94d886b310589c70316fe43476a558e53073ea911/ty-0.0.22-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b8e32e362e0666cc0769d2862a273def6b61117b8fbb9df493274d536afcd02e", size = 10128777, upload-time = "2026-03-12T17:40:38.517Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/d2/eb9185d3fe1fa12decb1c0a045416063bc40122187769b3dfb324da9e51c/ty-0.0.22-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:667deb1aaf802f396a626cc5a52cd55d935e8d0b46d1be068cf874f7d6f4bdb5", size = 10164992, upload-time = "2026-03-12T17:40:27.833Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/ec/067bb6d78cc6f5c4f55f0c3f760eb792b144697b454938fb9d10652caeb2/ty-0.0.22-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e160681dbf602160e091d94a68207d1393733aedd95e3dc0b2d010bb39a70d78", size = 10342871, upload-time = "2026-03-12T17:40:13.447Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/04/dd3a87f54f78ceef5e6ab2add2f3bb85d45829318740f459886654b71a5d/ty-0.0.22-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:255598763079b80513d98084c4897df688e42666d7e4371349f97d258166389d", size = 10823909, upload-time = "2026-03-12T17:40:11.444Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/29/4b12e8ff99dec65487ec5342bd5b51fae1482e93a669d098777d55ca5eda/ty-0.0.22-py3-none-win32.whl", hash = "sha256:de0d88d9f788defddfec5507bf356bfc8b90ee301b7d6204f7609e7ac270276f", size = 9746013, upload-time = "2026-03-12T17:40:32.272Z" },
+    { url = "https://files.pythonhosted.org/packages/84/16/e246795ed66ff8ee1a47497019f86ea1b4fb238bfca3068f2e08c52ef03b/ty-0.0.22-py3-none-win_amd64.whl", hash = "sha256:c216f750769ac9f3e9e61feabf3fd44c0697dce762bdcd105443d47e1a81c2b9", size = 10709350, upload-time = "2026-03-12T17:40:40.82Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/a4/5aafcebc4f597164381b0a82e7a8780d8f9f52df3884b16909a76282a0da/ty-0.0.22-py3-none-win_arm64.whl", hash = "sha256:49795260b9b9e3d6f04424f8ddb34907fac88c33a91b83478a74cead5dde567f", size = 10137248, upload-time = "2026-03-12T17:40:09.244Z" },
 ]
 
 [[package]]

From a5960098551ddedec086486113a72baed1138eed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 13 Mar 2026 13:23:47 +0700
Subject: [PATCH 214/236] Include random delays to stagger the start times for
 clients in the pool.

---
 app/server/health.py | 12 +++---------
 app/services/pool.py | 23 +++++++++++++----------
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/app/server/health.py b/app/server/health.py
index 444c938..449081f 100644
--- a/app/server/health.py
+++ b/app/server/health.py
@@ -11,19 +11,13 @@
 async def health_check():
     pool = GeminiClientPool()
     db = LMDBConversationStore()
-
-    try:
-        await pool.init()
-    except Exception as e:
-        logger.error(f"Failed to initialize Gemini clients: {e}")
-        return HealthCheckResponse(ok=False, error=str(e))
-
     client_status = pool.status()
+    stat = db.stats()
 
     if not all(client_status.values()):
-        logger.warning("One or more Gemini clients not running")
+        down_clients = [client_id for client_id, status in client_status.items() if not status]
+        logger.warning(f"One or more Gemini clients not running: {', '.join(down_clients)}")
 
-    stat = db.stats()
     if not stat:
         logger.error("Failed to retrieve LMDB conversation store stats")
         return HealthCheckResponse(
diff --git a/app/services/pool.py b/app/services/pool.py
index febe91d..60ce5ee 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -1,4 +1,5 @@
 import asyncio
+import random
 from collections import deque
 
 from loguru import logger
@@ -32,18 +33,20 @@ def __init__(self) -> None:
             self._restart_locks[c.id] = asyncio.Lock()
 
     async def init(self) -> None:
-        """Initialize all clients in the pool."""
-        success_count = 0
-        for client in self._clients:
-            if not client.running():
-                try:
-                    await client.init()
-                except Exception:
-                    logger.exception(f"Failed to initialize client {client.id}")
+        """Initialize all clients in the pool with staggered start times."""
+        clients_to_init = [c for c in self._clients if not c.running()]
+        for i, client in enumerate(clients_to_init):
+            try:
+                await client.init()
+            except Exception:
+                logger.error(f"Failed to initialize client {client.id}")
 
-            if client.running():
-                success_count += 1
+            if i < len(clients_to_init) - 1:
+                delay = random.uniform(5, 30)
+                logger.info(f"Staggering next initialization by {delay:.2f}s")
+                await asyncio.sleep(delay)
 
+        success_count = sum(1 for client in self._clients if client.running())
         if success_count == 0:
             raise RuntimeError("Failed to initialize any Gemini clients")
 

From d038d65d733c1924c2010d6a41e3503910034075 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Mar 2026 11:07:05 +0700
Subject: [PATCH 215/236] Allow remembering and recovering conversation
 metadata during retention days, but update the README to specify that users
 need to enable Gemini Apps activity for it to work as expected.

---
 README.md          | 23 ++++++++++++++---------
 README.zh.md       | 28 +++++++++++++++-------------
 app/server/chat.py | 28 ++++++++--------------------
 pyproject.toml     |  2 +-
 uv.lock            | 40 ++++++++++++++++++++--------------------
 5 files changed, 58 insertions(+), 63 deletions(-)

diff --git a/README.md b/README.md
index 7d96646..9bc463e 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,22 @@
 # Gemini-FastAPI
 
 [![Python 3.13](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/)
-[![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
+[![FastAPI](https://img.shields.io/badge/FastAPI-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
 [ English | [中文](README.zh.md) ]
 
 Web-based Gemini models wrapped into an OpenAI-compatible API. Powered by [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API).
 
-**✅ Call Gemini's web-based models via API without an API Key, completely free!**
+**Call Gemini's web-based models via API without an API Key, completely free!**
 
 ## Features
 
-- **🔐 No Google API Key Required**: Use web cookies to freely access Gemini's models via API.
-- **🔍 Google Search Included**: Get up-to-date answers using web-based Gemini's search capabilities.
-- **💾 Conversation Persistence**: LMDB-based storage supporting multi-turn conversations.
-- **🖼️ Multi-modal Support**: Support for handling text, images, and file uploads.
-- **⚖️ Multi-account Load Balancing**: Distribute requests across multiple accounts with per-account proxy settings.
+- **No Google API Key Required**: Use web cookies to freely access Gemini's models via API.
+- **Google Search Included**: Get up-to-date answers using web-based Gemini's search capabilities.
+- **Conversation Persistence**: LMDB-based storage supporting multi-turn conversations.
+- **Multi-modal Support**: Support for handling text, images, and file uploads.
+- **Multi-account Load Balancing**: Distribute requests across multiple accounts with per-account proxy settings.
 
 ## Quick Start
 
@@ -25,7 +25,7 @@ Web-based Gemini models wrapped into an OpenAI-compatible API. Powered by [Hanao
 ### Prerequisites
 
 - Python 3.13
-- Google account with Gemini access on web
+- Google account with Gemini access on web (Enable **[Gemini Apps activity](https://myactivity.google.com/product/gemini)** for best conversation persistence)
 - `secure_1psid` and `secure_1psidts` cookies from Gemini web interface
 
 ### Installation
@@ -96,7 +96,7 @@ These endpoints are designed to be compatible with OpenAI's API structure, allow
 ### Utility Endpoints
 
 - **`GET /health`**: Health check endpoint. Returns the status of the server, configured Gemini clients, and conversation storage.
-- **`GET /images/{filename}`**: Internal endpoint to serve generated images. Requires a valid token (automatically included in image URLs returned by the API).
+- **`GET /media/{filename}`**: Internal endpoint to serve generated media. Requires a valid token (automatically included in image URLs returned by the API).
 
 ## Docker Deployment
 
@@ -205,6 +205,11 @@ To use Gemini-FastAPI, you need to extract your Gemini session cookies:
    - `__Secure-1PSID`
    - `__Secure-1PSIDTS`
 
+> [!IMPORTANT]
+> **Enable [Gemini Apps activity](https://myactivity.google.com/product/gemini)** to ensure stable conversation persistence.
+>
+> While active chat turns may work temporarily without it, any transient error, TLS session restart, or server reboot can cause Google to expire the conversation metadata. If this setting is disabled, the model will **completely lose the context of your multi-turn conversation**, making old threads unreachable even if they are stored in your local LMDB.
+
 > [!TIP]
 > For detailed instructions, refer to the [HanaokaYuzu/Gemini-API authentication guide](https://github.com/HanaokaYuzu/Gemini-API?tab=readme-ov-file#authentication).
 
diff --git a/README.zh.md b/README.zh.md
index e446667..fb85a48 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -1,22 +1,22 @@
 # Gemini-FastAPI
 
 [![Python 3.13](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/)
-[![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
+[![FastAPI](https://img.shields.io/badge/FastAPI-green.svg)](https://fastapi.tiangolo.com/)
 [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 
 [ [English](README.md) | 中文 ]
 
 将 Gemini 网页端模型封装为兼容 OpenAI API 的 API Server。基于 [HanaokaYuzu/Gemini-API](https://github.com/HanaokaYuzu/Gemini-API) 实现。
 
-**✅ 无需 API Key，免费通过 API 调用 Gemini 网页端模型！**
+**无需 API Key，免费通过 API 调用 Gemini 网页端模型！**
 
 ## 功能特性
 
-- 🔐 **无需 Google API Key**：只需网页 Cookie，即可免费通过 API 调用 Gemini 模型。
-- 🔍 **内置 Google 搜索**：API 已内置 Gemini 网页端的搜索能力，模型响应更加准确。
-- 💾 **会话持久化**：基于 LMDB 存储，支持多轮对话历史记录。
-- 🖼️ **多模态支持**：可处理文本、图片及文件上传。
-- ⚖️ **多账户负载均衡**：支持多账户分发请求，可为每个账户单独配置代理。
+- **无需 Google API Key**：只需网页 Cookie，即可免费通过 API 调用 Gemini 模型。
+- **内置 Google 搜索**：API 已内置 Gemini 网页端的搜索能力，模型响应更加准确。
+- **会话持久化**：基于 LMDB 存储，支持多轮对话历史记录。
+- **多模态支持**：可处理文本、图片及文件上传。
+- **多账户负载均衡**：支持多账户分发请求，可为每个账户单独配置代理。
 
 ## 快速开始
 
@@ -25,7 +25,7 @@
 ### 前置条件
 
 - Python 3.13
-- 拥有网页版 Gemini 访问权限的 Google 账号
+- 拥有网页版 Gemini 访问权限的 Google 账号 (开启 **[Gemini Apps 应用活动](https://myactivity.google.com/product/gemini)** 以获得最佳会话持久化体验)
 - 从 Gemini 网页获取的 `secure_1psid` 和 `secure_1psidts` Cookie
 
 ### 安装
@@ -93,10 +93,10 @@ python run.py
 
 - **`POST /v1/responses`**: 用于复杂交互模式的专用接口，支持分步输出、生成图片及工具调用等更丰富的响应项。
 
-### 辅助与系统接口
+### 实用工具接口
 
-- **`GET /health`**: 健康检查接口。返回服务器运行状态、已配置的 Gemini 客户端健康度以及对话存储统计信息。
-- **`GET /images/{filename}`**: 用于访问生成的图片的内部接口。需携带有效 Token（API 返回的图片 URL 中已自动包含该 Token）。
+- **`GET /health`**: 健康检查接口。返回服务器、已配置的 Gemini 客户端以及对话存储的状态。
+- **`GET /media/{filename}`**: 用于分发生成的媒体内容的内部接口。需要有效的 Token（API 返回的图片 URL 中已自动包含该 Token）。
 
 ## Docker 部署
 
@@ -204,8 +204,10 @@ export CONFIG_STORAGE__MAX_SIZE=268435456  # 256 MB
    - `__Secure-1PSID`
    - `__Secure-1PSIDTS`
 
-> [!TIP]
-> 详细操作请参考 [HanaokaYuzu/Gemini-API 认证指南](https://github.com/HanaokaYuzu/Gemini-API?tab=readme-ov-file#authentication)。
+> [!IMPORTANT]
+> **请开启 [Gemini Apps 应用活动](https://myactivity.google.com/product/gemini)** 以确保稳定的会话持久化。
+>
+> 虽然在没有开启该设置的情况下，连续的聊天过程可能暂时正常，但任何瞬时错误、TLS 会话重启或服务器重启都可能导致 Google 端过期的会话元数据。如果该设置被禁用，模型将 **完全丢失多轮对话的上下文**，导致即使本地 LMDB 中存有历史记录，旧对话也将无法继续。
 
 ### 代理设置
 
diff --git a/app/server/chat.py b/app/server/chat.py
index 3190b19..e492c07 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -78,7 +78,6 @@
 )
 
 MAX_CHARS_PER_REQUEST = int(g_config.gemini.max_chars_per_request * 0.9)
-METADATA_TTL_MINUTES = 60
 
 router = APIRouter()
 
@@ -972,24 +971,13 @@ async def _find_reusable_session(
         if search_history[-1].role in {"assistant", "system", "tool"}:
             try:
                 if conv := db.find(model.model_name, search_history):
-                    now = datetime.now()
-                    updated_at = conv.updated_at or conv.created_at or now
-                    age_minutes = (now - updated_at).total_seconds() / 60
-                    if age_minutes <= METADATA_TTL_MINUTES:
-                        client = await pool.acquire(conv.client_id)
-                        session = client.start_chat(metadata=conv.metadata, model=model)
-                        remain = messages[search_end:]
-                        logger.debug(
-                            f"Match found at prefix length {search_end}/{len(messages)}. Client: {conv.client_id}"
-                        )
-                        return session, client, remain
-                    else:
-                        logger.debug(
-                            f"Matched conversation at length {search_end} is too old ({age_minutes:.1f}m), skipping reuse."
-                        )
-                else:
-                    # Log that we tried this prefix but failed
-                    pass
+                    client = await pool.acquire(conv.client_id)
+                    session = client.start_chat(metadata=conv.metadata, model=model)
+                    remain = messages[search_end:]
+                    logger.debug(
+                        f"Match found at prefix length {search_end}/{len(messages)}. Client: {conv.client_id}"
+                    )
+                    return session, client, remain
             except Exception as e:
                 logger.warning(
                     f"Error checking LMDB for reusable session at length {search_end}: {e}"
@@ -1024,7 +1012,7 @@ async def _send_with_split(
     file_obj.name = "message.txt"
     try:
         final_files: list[Any] = list(files) if files else []
-        final_files.append(file_obj)
+        final_files.insert(0, file_obj)
         instruction = (
             "The user's input exceeds the character limit and is provided in the attached file `message.txt`.\n\n"
             "**System Instruction:**\n"
diff --git a/pyproject.toml b/pyproject.toml
index d5b96ff..35d3097 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ Repository = "https://github.com/Nativu5/Gemini-FastAPI"
 dev = [
     "pytest>=9.0.2",
     "ruff>=0.15.6",
-    "ty>=0.0.22",
+    "ty>=0.0.23",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index d1f1f66..820072e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -454,26 +454,26 @@ wheels = [
 
 [[package]]
 name = "ty"
-version = "0.0.22"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f5/ee/b73c99daf598ae66a2d5d3ba6de7729d2152ab732dee7ccb8ab9446cc6d7/ty-0.0.22.tar.gz", hash = "sha256:391fc4d3a543950341b750d7f4aa94866a73e7cdbf3e9e4e4e8cfc8b7bef4f10", size = 5333861, upload-time = "2026-03-12T17:40:30.052Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/f7/078f554f612723597f76cc6af70da4daed63ed721241a3f60788259c9adf/ty-0.0.22-py3-none-linux_armv6l.whl", hash = "sha256:03d37220d81016cb9d2a9c9ec11704d84f2df838f1dbf1296d91ea7fba57f8b5", size = 10328232, upload-time = "2026-03-12T17:40:19.402Z" },
-    { url = "https://files.pythonhosted.org/packages/90/0b/4cfe84485d1b20bb50cdbc990f6e66b8c50cff569c7544adf0805b57ddb9/ty-0.0.22-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3249c65b24829a312cd5cbf722ff5551ffe17b0a9781a8a372ca037d23aa1c71", size = 10148554, upload-time = "2026-03-12T17:40:25.586Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/7e/df31baf70d63880c9719d2cc8403b0b99c3c0d0f68f390a1109d9b231933/ty-0.0.22-py3-none-macosx_11_0_arm64.whl", hash = "sha256:470778f4335f1660f017fe2970afb7e4ce4f8b608795b19406976b8902b221a5", size = 9627910, upload-time = "2026-03-12T17:40:17.447Z" },
-    { url = "https://files.pythonhosted.org/packages/99/0f/a418bcca9c87083533d6c73b65e56c6ade26b8d76a7558b3d3cc0f0eb52a/ty-0.0.22-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75649b04b84ace92cb5c6e27013247f220f58a9a30b30eb2301992814deea0c4", size = 10155025, upload-time = "2026-03-12T17:40:21.344Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/3d/1974c567a58f369602065409d9109c0a81f5abbf1ae552433a89d07141a9/ty-0.0.22-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc270f2344210cbed7d965ddeade61ffa81d93dffcdc0fded3540dccb860a9e1", size = 10133614, upload-time = "2026-03-12T17:40:23.549Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/c1/2da9e27c79a1fe9209589a73c989e416a7380bd77dcdf22960b3d30252bf/ty-0.0.22-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:548215b226f9726ea4d9beb77055363a8a398eb42809f042895f7a285afcb538", size = 10647101, upload-time = "2026-03-12T17:40:15.569Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/93/4e12c2f0ec792fd4ab9c9f70e59465d09345a453ebedb67d3bf99fd75a71/ty-0.0.22-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed0bd1d34eba800b82ebee65269a85a9bbb2a325237e4baaf1413223f69e1899", size = 11231886, upload-time = "2026-03-12T17:40:06.875Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/9f/c255a078e4f2ce135497fffa4a5d3a122e4c49a00416fb78d72d7b79e119/ty-0.0.22-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbd429a31507da9a1b0873a21215113c42cc683aa5fba96c978794485db5560a", size = 10901527, upload-time = "2026-03-12T17:40:34.429Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/0d/d1bdee7e16d978ea929837fb03463efc116ee8ad05d215a5efd5d80e56d3/ty-0.0.22-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7eb85a437b3be817796e7c0f84243611de53c7d4ea102a0dca179debfe7cec0", size = 10726505, upload-time = "2026-03-12T17:40:36.342Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/d4/6548d2a353f794582ec94d886b310589c70316fe43476a558e53073ea911/ty-0.0.22-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b8e32e362e0666cc0769d2862a273def6b61117b8fbb9df493274d536afcd02e", size = 10128777, upload-time = "2026-03-12T17:40:38.517Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/d2/eb9185d3fe1fa12decb1c0a045416063bc40122187769b3dfb324da9e51c/ty-0.0.22-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:667deb1aaf802f396a626cc5a52cd55d935e8d0b46d1be068cf874f7d6f4bdb5", size = 10164992, upload-time = "2026-03-12T17:40:27.833Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/ec/067bb6d78cc6f5c4f55f0c3f760eb792b144697b454938fb9d10652caeb2/ty-0.0.22-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e160681dbf602160e091d94a68207d1393733aedd95e3dc0b2d010bb39a70d78", size = 10342871, upload-time = "2026-03-12T17:40:13.447Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/04/dd3a87f54f78ceef5e6ab2add2f3bb85d45829318740f459886654b71a5d/ty-0.0.22-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:255598763079b80513d98084c4897df688e42666d7e4371349f97d258166389d", size = 10823909, upload-time = "2026-03-12T17:40:11.444Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/29/4b12e8ff99dec65487ec5342bd5b51fae1482e93a669d098777d55ca5eda/ty-0.0.22-py3-none-win32.whl", hash = "sha256:de0d88d9f788defddfec5507bf356bfc8b90ee301b7d6204f7609e7ac270276f", size = 9746013, upload-time = "2026-03-12T17:40:32.272Z" },
-    { url = "https://files.pythonhosted.org/packages/84/16/e246795ed66ff8ee1a47497019f86ea1b4fb238bfca3068f2e08c52ef03b/ty-0.0.22-py3-none-win_amd64.whl", hash = "sha256:c216f750769ac9f3e9e61feabf3fd44c0697dce762bdcd105443d47e1a81c2b9", size = 10709350, upload-time = "2026-03-12T17:40:40.82Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/a4/5aafcebc4f597164381b0a82e7a8780d8f9f52df3884b16909a76282a0da/ty-0.0.22-py3-none-win_arm64.whl", hash = "sha256:49795260b9b9e3d6f04424f8ddb34907fac88c33a91b83478a74cead5dde567f", size = 10137248, upload-time = "2026-03-12T17:40:09.244Z" },
+version = "0.0.23"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/75/ba/d3c998ff4cf6b5d75b39356db55fe1b7caceecc522b9586174e6a5dee6f7/ty-0.0.23.tar.gz", hash = "sha256:5fb05db58f202af366f80ef70f806e48f5237807fe424ec787c9f289e3f3a4ef", size = 5341461, upload-time = "2026-03-13T12:34:23.125Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f4/21/aab32603dfdfacd4819e52fa8c6074e7bd578218a5142729452fc6a62db6/ty-0.0.23-py3-none-linux_armv6l.whl", hash = "sha256:e810eef1a5f1cfc0731a58af8d2f334906a96835829767aed00026f1334a8dd7", size = 10329096, upload-time = "2026-03-13T12:34:09.432Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/a9/dd3287a82dce3df546ec560296208d4905dcf06346b6e18c2f3c63523bd1/ty-0.0.23-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e43d36bd89a151ddcad01acaeff7dcc507cb73ff164c1878d2d11549d39a061c", size = 10156631, upload-time = "2026-03-13T12:34:53.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/01/3f25909b02fac29bb0a62b2251f8d62e65d697781ffa4cf6b47a4c075c85/ty-0.0.23-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bd6a340969577b4645f231572c4e46012acba2d10d4c0c6570fe1ab74e76ae00", size = 9653211, upload-time = "2026-03-13T12:34:15.049Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/60/bfc0479572a6f4b90501c869635faf8d84c8c68ffc5dd87d04f049affabc/ty-0.0.23-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:341441783e626eeb7b1ec2160432956aed5734932ab2d1c26f94d0c98b229937", size = 10156143, upload-time = "2026-03-13T12:34:34.468Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/81/8a93e923535a340f54bea20ff196f6b2787782b2f2f399bd191c4bc132d6/ty-0.0.23-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ce1dc66c26d4167e2c78d12fa870ef5a7ec9cc344d2baaa6243297cfa88bd52", size = 10136632, upload-time = "2026-03-13T12:34:28.832Z" },
+    { url = "https://files.pythonhosted.org/packages/da/cb/2ac81c850c58acc9f976814404d28389c9c1c939676e32287b9cff61381e/ty-0.0.23-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bae1e7a294bf8528836f7617dc5c360ea2dddb63789fc9471ae6753534adca05", size = 10655025, upload-time = "2026-03-13T12:34:37.105Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/9b/bac771774c198c318ae699fc013d8cd99ed9caf993f661fba11238759244/ty-0.0.23-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d2b162768764d9dc177c83fb497a51532bb67cbebe57b8fa0f2668436bf53f3c", size = 11230107, upload-time = "2026-03-13T12:34:20.751Z" },
+    { url = "https://files.pythonhosted.org/packages/14/09/7644fb0e297265e18243f878aca343593323b9bb19ed5278dcbc63781be0/ty-0.0.23-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d28384e48ca03b34e4e2beee0e230c39bbfb68994bb44927fec61ef3642900da", size = 10934177, upload-time = "2026-03-13T12:34:17.904Z" },
+    { url = "https://files.pythonhosted.org/packages/18/14/69a25a0cad493fb6a947302471b579a03516a3b00e7bece77fdc6b4afb9b/ty-0.0.23-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:559d9a299df793cb7a7902caed5eda8a720ff69164c31c979673e928f02251ee", size = 10752487, upload-time = "2026-03-13T12:34:31.785Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/2a/42fc3cbccf95af0a62308ebed67e084798ab7a85ef073c9986ef18032743/ty-0.0.23-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:32a7b8a14a98e1d20a9d8d2af23637ed7efdb297ac1fa2450b8e465d05b94482", size = 10133007, upload-time = "2026-03-13T12:34:42.838Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/69/307833f1b52fa3670e0a1d496e43ef7df556ecde838192d3fcb9b35e360d/ty-0.0.23-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6f803b9b9cca87af793467973b9abdd4b83e6b96d9b5e749d662cff7ead70b6d", size = 10169698, upload-time = "2026-03-13T12:34:12.351Z" },
+    { url = "https://files.pythonhosted.org/packages/89/ae/5dd379ec22d0b1cba410d7af31c366fcedff191d5b867145913a64889f66/ty-0.0.23-py3-none-musllinux_1_2_i686.whl", hash = "sha256:4a0bf086ec8e2197b7ea7ebfcf4be36cb6a52b235f8be61647ef1b2d99d6ffd3", size = 10346080, upload-time = "2026-03-13T12:34:40.012Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c7/dfc83203d37998620bba9c4873a080c8850a784a8a46f56f8163c5b4e320/ty-0.0.23-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:252539c3fcd7aeb9b8d5c14e2040682c3e1d7ff640906d63fd2c4ce35865a4ba", size = 10848162, upload-time = "2026-03-13T12:34:45.421Z" },
+    { url = "https://files.pythonhosted.org/packages/89/08/05481511cfbcc1fd834b6c67aaae090cb609a079189ddf2032139ccfc490/ty-0.0.23-py3-none-win32.whl", hash = "sha256:51b591d19eef23bbc3807aef77d38fa1f003c354e1da908aa80ea2dca0993f77", size = 9748283, upload-time = "2026-03-13T12:34:50.607Z" },
+    { url = "https://files.pythonhosted.org/packages/31/2e/eaed4ff5c85e857a02415084c394e02c30476b65e158eec1938fdaa9a205/ty-0.0.23-py3-none-win_amd64.whl", hash = "sha256:1e137e955f05c501cfbb81dd2190c8fb7d01ec037c7e287024129c722a83c9ad", size = 10698355, upload-time = "2026-03-13T12:34:26.134Z" },
+    { url = "https://files.pythonhosted.org/packages/91/29/b32cb7b4c7d56b9ed50117f8ad6e45834aec293e4cb14749daab4e9236d5/ty-0.0.23-py3-none-win_arm64.whl", hash = "sha256:a0399bd13fd2cd6683fd0a2d59b9355155d46546d8203e152c556ddbdeb20842", size = 10155890, upload-time = "2026-03-13T12:34:48.082Z" },
 ]
 
 [[package]]

From b929c69bc20ae48774ed2b8afa8f0c010447cd5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 14 Mar 2026 20:38:44 +0700
Subject: [PATCH 216/236] Implement a graceful shutdown to prevent losing
 cookies.

---
 app/main.py          |  5 +++++
 app/services/pool.py | 12 ++++++++++++
 uv.lock              |  6 +++---
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/app/main.py b/app/main.py
index 9782726..47e3956 100644
--- a/app/main.py
+++ b/app/main.py
@@ -76,6 +76,11 @@ async def lifespan(app: FastAPI):
         yield
     finally:
         cleanup_stop_event.set()
+        try:
+            await pool.close()
+        except Exception:
+            logger.exception("Failed to close Gemini client pool gracefully.")
+
         try:
             await cleanup_task
         except asyncio.CancelledError:
diff --git a/app/services/pool.py b/app/services/pool.py
index 60ce5ee..847e79a 100644
--- a/app/services/pool.py
+++ b/app/services/pool.py
@@ -99,6 +99,18 @@ def clients(self) -> list[GeminiClientWrapper]:
         """Return managed clients."""
         return self._clients
 
+    async def close(self) -> None:
+        """Close all clients in the pool."""
+        if not self._clients:
+            return
+
+        logger.info(f"Closing {len(self._clients)} Gemini clients...")
+        await asyncio.gather(
+            *(client.close() for client in self._clients if client.running()),
+            return_exceptions=True,
+        )
+        logger.info("All Gemini clients closed.")
+
     def status(self) -> dict[str, bool]:
         """Return running status for each client."""
         return {client.id: client.running() for client in self._clients}
diff --git a/uv.lock b/uv.lock
index 820072e..421c419 100644
--- a/uv.lock
+++ b/uv.lock
@@ -165,7 +165,7 @@ requires-dist = [
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.6" },
-    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.22" },
+    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.23" },
     { name = "uvicorn", specifier = ">=0.41.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post293"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#be8c783bbf89c94fbcebb76a3f556d1ad830bb12" }
+version = "0.0.post295"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#52b60b6b44b4b1386b9f8977afd916ce782384f1" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 07811acf5ccd1556bc18650aca59a829a73e6d52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 17 Mar 2026 16:42:21 +0700
Subject: [PATCH 217/236] Add healthcheck to Dockerfile

---
 Dockerfile | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 5c669c2..2499479 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,10 +3,14 @@ FROM ghcr.io/astral-sh/uv:python3.13-trixie-slim
 LABEL org.opencontainers.image.title="Gemini-FastAPI" \
       org.opencontainers.image.description="Web-based Gemini models wrapped into an OpenAI-compatible API."
 
+USER root
+
 WORKDIR /app
 
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    tini git \
+    tini curl ca-certificates git \
     && rm -rf /var/lib/apt/lists/*
 
 ENV UV_COMPILE_BYTECODE=1 \
@@ -22,6 +26,9 @@ COPY run.py .
 
 EXPOSE 8000
 
+HEALTHCHECK --interval=30s --timeout=10s --start-period=300s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+
 ENTRYPOINT ["/usr/bin/tini", "--"]
 
 CMD ["uv", "run", "--no-dev", "run.py"]

From 24fdb6325da9f469e9c2f01953abdffaaff54dce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 17 Mar 2026 22:08:42 +0700
Subject: [PATCH 218/236] Fix a rare issue where whitespace leaks occur during
 a tool call while streaming.

---
 app/server/chat.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index e492c07..9ef558a 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -947,7 +947,7 @@ async def _get_available_models(pool: GeminiClientPool) -> list[ModelData]:
                             ModelData(
                                 id=am.id,
                                 created=now,
-                                owned_by="gemini-web",
+                                owned_by="google",
                             )
                         )
                         seen_model_ids.add(am.id)
@@ -1045,6 +1045,8 @@ def state(self):
 
     def _is_outputting(self) -> bool:
         """Determines if the current state allows yielding text to the stream."""
+        if self.state == "POST_BLOCK":
+            return False
         return self.state == "NORMAL" or (self.state == "IN_BLOCK" and self.current_role != "tool")
 
     def process(self, chunk: str) -> str:
@@ -1062,6 +1064,13 @@ def process(self, chunk: str) -> str:
                 else:
                     break
 
+            if self.state == "POST_BLOCK":
+                stripped = self.buffer.lstrip()
+                if not stripped:
+                    break
+                self.buffer = stripped
+                self.stack[-1] = "NORMAL"
+
             match = STREAM_MASTER_RE.search(self.buffer)
             if not match:
                 tail_match = STREAM_TAIL_RE.search(self.buffer)
@@ -1096,7 +1105,13 @@ def process(self, chunk: str) -> str:
                 else:
                     self.stack = ["NORMAL"]
 
-                if self.state == "NORMAL":
+                if self.state == "NORMAL" and matched_group in (
+                    "PROTOCOL_EXIT",
+                    "HINT_EXIT",
+                ):
+                    self.stack[-1] = "POST_BLOCK"
+
+                if self.state in ("NORMAL", "POST_BLOCK"):
                     self.current_role = ""
 
             self.buffer = self.buffer[end:]

From aa4569e069cc411d17764e5510667475e2d8787a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Mar 2026 15:47:07 +0700
Subject: [PATCH 219/236] Update dependencies to latest versions

---
 pyproject.toml |  6 +++---
 uv.lock        | 32 ++++++++++++++++----------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 35d3097..84e6b22 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,11 +9,11 @@ dependencies = [
     "fastapi>=0.135.1",
     "gemini-webapi>=1.21.0",
     "httptools>=0.7.1",
-    "lmdb>=1.8.1",
+    "lmdb>=2.0.0",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
     "pydantic-settings[yaml]>=2.13.1",
-    "uvicorn>=0.41.0",
+    "uvicorn>=0.42.0",
     "uvloop>=0.22.1; sys_platform != 'win32'",
 ]
 
@@ -65,4 +65,4 @@ quote-style = "double"
 indent-style = "space"
 
 [tool.uv.sources]
-gemini-webapi = { git = "https://github.com/luuquangvu/Gemini-API.git", rev = "move-httpx-to-curl_cffi" }
+gemini-webapi = { git = "https://github.com/HanaokaYuzu/Gemini-API.git" }
diff --git a/uv.lock b/uv.lock
index 421c419..e30b63d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -157,16 +157,16 @@ dev = [
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.135.1" },
-    { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi" },
+    { name = "gemini-webapi", git = "https://github.com/HanaokaYuzu/Gemini-API.git" },
     { name = "httptools", specifier = ">=0.7.1" },
-    { name = "lmdb", specifier = ">=1.8.1" },
+    { name = "lmdb", specifier = ">=2.0.0" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.6" },
     { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.23" },
-    { name = "uvicorn", specifier = ">=0.41.0" },
+    { name = "uvicorn", specifier = ">=0.42.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post295"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=move-httpx-to-curl_cffi#52b60b6b44b4b1386b9f8977afd916ce782384f1" }
+version = "1.21.0.post4"
+source = { git = "https://github.com/HanaokaYuzu/Gemini-API.git#37f3149d4d985af5100f20cecded0c257e8bbb36" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },
@@ -229,16 +229,16 @@ wheels = [
 
 [[package]]
 name = "lmdb"
-version = "1.8.1"
+version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/23/19/392f028e7ebcc1cc8212fe8a315a909b7a556278456f0bab9234d3a3b665/lmdb-1.8.1.tar.gz", hash = "sha256:44ef24033929e9cc227a7e17287473c452b462d716f118db885c667c80f57429", size = 886349, upload-time = "2026-03-12T23:21:48.42Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/14/d7/c8b34be72e94a18512eccb8fb4611ba84621d57c4579cb7431454f0f9b4a/lmdb-2.0.0.tar.gz", hash = "sha256:3130b625004f6aed2cbda7ea70c41375bfe57b3b2389051eaa8fcef2ff18411c", size = 899492, upload-time = "2026-03-18T03:56:06.816Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/75/5f/ec2fe6bb0986fae28db80292ffe5146ed7cc4d478d683d67050d2691a538/lmdb-1.8.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8b197007a5762411b7484e533ed1d03dca1b8ba1eee390233ac6e62ff45bd417", size = 101865, upload-time = "2026-03-12T23:21:12.902Z" },
-    { url = "https://files.pythonhosted.org/packages/75/35/1e43bba9658292c2ac787f5d003baa5ed37cad1b52666526edf4c908fb7d/lmdb-1.8.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a9454649e62bc6f4a45f9aed175fcdd6fc2e91922bf970fd561053c616281d0a", size = 100462, upload-time = "2026-03-12T23:21:14.369Z" },
-    { url = "https://files.pythonhosted.org/packages/64/8e/2b1a0caa42b6f980a8b8663a272b9a52d4fd51ef0ca36cdec768cea02978/lmdb-1.8.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a51881d284116d82ead233b20ae7f6dbec8624dd7b0593a755c84e0d0bc4cc29", size = 303313, upload-time = "2026-03-12T23:21:15.636Z" },
-    { url = "https://files.pythonhosted.org/packages/40/51/8061694cf7b883d2a166965cbaa961ea1ce692ce1782ac58091b5aa0fdb5/lmdb-1.8.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04c5472bfc38377a2b32ae3b494d82d9c8db7c64e9053ca1b7c86aa862ebaaf9", size = 303895, upload-time = "2026-03-12T23:21:16.769Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/0f/c189c40d833ecd64f3e375a0bb02378110fc958916053ee687ec2c7d5079/lmdb-1.8.1-cp313-cp313-win_amd64.whl", hash = "sha256:fe0e34b2b20f47a108c3e04b397d1e27f080a7b0256c33efb5aef7bd1bccb923", size = 99707, upload-time = "2026-03-12T23:21:17.862Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/c3/3c87bede5b62163b768e6a4bca893f59d3996cb6fc4052bfd67847c0efd7/lmdb-1.8.1-cp313-cp313-win_arm64.whl", hash = "sha256:c3550849cdbaf0ead6265cb5b10134b223c2cede7ce7a1f3390a55975e3a06d4", size = 94605, upload-time = "2026-03-12T23:21:19.102Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/d8/69a1b30ef2ea79f46c9fb6b2658d2a86fb738e42213fb536f3086b4d8a5c/lmdb-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35f05c02e3629a7003398cf657278b2e93e74d2a4c77d3ee2485b65f4b2ada9a", size = 108272, upload-time = "2026-03-18T03:55:39.851Z" },
+    { url = "https://files.pythonhosted.org/packages/30/3d/f67c003ad3fc292c6f2a9215edb7f852b5ba643deeced7c377feecb5d764/lmdb-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b5589e4d0c64be988129338299e3c1236838d056009c8ee215967f595ac32ac8", size = 107106, upload-time = "2026-03-18T03:55:41.125Z" },
+    { url = "https://files.pythonhosted.org/packages/53/9a/974d36bcebee3309522748a5eb6d80dc1895ea1a7cb69f047c7ac041d77d/lmdb-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47a6ad68d763de441377f0d20036c60d7a88ad10c5303408f77412225cddc4b3", size = 324018, upload-time = "2026-03-18T03:55:42.547Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/88/d3b959f44012a877d30f0d9a40bc93c17605fc53c7b10d8f64b97b0c959c/lmdb-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8dc7156fc503000f9f76b1393cecac48a2d9d95f945a677c47d340aefad871a4", size = 327333, upload-time = "2026-03-18T03:55:44.13Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/e1/c659167677a46b86a013bc625418ccbd3a9e88cb1804d8c3c3b316acf504/lmdb-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:88670fb86764d93454cb39ca1e8c08cb6dc2ef9b9d71f3f6ff9ef416d4202265", size = 104562, upload-time = "2026-03-18T03:55:45.758Z" },
+    { url = "https://files.pythonhosted.org/packages/07/2f/8de426d45a42ef8c8cae4dd1953166f8539f7e7406970adea3cecae00649/lmdb-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:85a57eb3a85d231912819bcc8c75ed2baddba8f6d07ea79db62105a513ec2254", size = 98913, upload-time = "2026-03-18T03:55:47.013Z" },
 ]
 
 [[package]]
@@ -499,15 +499,15 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.41.0"
+version = "0.42.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/32/ce/eeb58ae4ac36fe09e3842eb02e0eb676bf2c53ae062b98f1b2531673efdd/uvicorn-0.41.0.tar.gz", hash = "sha256:09d11cf7008da33113824ee5a1c6422d89fbc2ff476540d69a34c87fab8b571a", size = 82633, upload-time = "2026-02-16T23:07:24.1Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/83/e4/d04a086285c20886c0daad0e026f250869201013d18f81d9ff5eada73a88/uvicorn-0.41.0-py3-none-any.whl", hash = "sha256:29e35b1d2c36a04b9e180d4007ede3bcb32a85fbdfd6c6aeb3f26839de088187", size = 68783, upload-time = "2026-02-16T23:07:22.357Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" },
 ]
 
 [[package]]

From 3a68e96aa546ac3c14873428f215619f66b8c56e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 18 Mar 2026 16:06:27 +0700
Subject: [PATCH 220/236] Remove unused code

---
 app/server/chat.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 9ef558a..ec31626 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -2365,11 +2365,6 @@ async def create_chat_completion(
         visible_output += media_markdown
         storage_output += media_markdown
 
-    if tool_calls:
-        logger.debug(
-            f"Detected tool calls: {reprlib.repr([tc.model_dump(mode='json') for tc in tool_calls])}"
-        )
-
     p_tok, c_tok, t_tok, r_tok = _calculate_usage(
         app_messages, storage_output, tool_calls, thoughts
     )

From 4c9f4bfb4a3b71272444d0d80ba0ffa82d72b102 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 19 Mar 2026 15:52:56 +0700
Subject: [PATCH 221/236] Update dependencies to latest versions

---
 pyproject.toml | 2 +-
 uv.lock        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 84e6b22..fff8e63 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,4 +65,4 @@ quote-style = "double"
 indent-style = "space"
 
 [tool.uv.sources]
-gemini-webapi = { git = "https://github.com/HanaokaYuzu/Gemini-API.git" }
+gemini-webapi = { git = "https://github.com/luuquangvu/Gemini-API.git", rev = "retrieve-dynamic-models" }
diff --git a/uv.lock b/uv.lock
index e30b63d..b63e813 100644
--- a/uv.lock
+++ b/uv.lock
@@ -157,7 +157,7 @@ dev = [
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.135.1" },
-    { name = "gemini-webapi", git = "https://github.com/HanaokaYuzu/Gemini-API.git" },
+    { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=2.0.0" },
     { name = "loguru", specifier = ">=0.7.3" },
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.21.0.post4"
-source = { git = "https://github.com/HanaokaYuzu/Gemini-API.git#37f3149d4d985af5100f20cecded0c257e8bbb36" }
+version = "0.0.post219"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#1440fc7e2b5079daaecf3b37c9880f4f1bde6619" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 49f11445e7b1167396f0a907e7de2728eea41e38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 19 Mar 2026 15:56:57 +0700
Subject: [PATCH 222/236] Update dependencies to latest versions

---
 pyproject.toml |  2 +-
 uv.lock        | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index fff8e63..a5a4ee9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ dependencies = [
     "fastapi>=0.135.1",
     "gemini-webapi>=1.21.0",
     "httptools>=0.7.1",
-    "lmdb>=2.0.0",
+    "lmdb>=2.1.0",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
     "pydantic-settings[yaml]>=2.13.1",
diff --git a/uv.lock b/uv.lock
index b63e813..bfa5668 100644
--- a/uv.lock
+++ b/uv.lock
@@ -159,7 +159,7 @@ requires-dist = [
     { name = "fastapi", specifier = ">=0.135.1" },
     { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models" },
     { name = "httptools", specifier = ">=0.7.1" },
-    { name = "lmdb", specifier = ">=2.0.0" },
+    { name = "lmdb", specifier = ">=2.1.0" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
@@ -229,16 +229,16 @@ wheels = [
 
 [[package]]
 name = "lmdb"
-version = "2.0.0"
+version = "2.1.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/14/d7/c8b34be72e94a18512eccb8fb4611ba84621d57c4579cb7431454f0f9b4a/lmdb-2.0.0.tar.gz", hash = "sha256:3130b625004f6aed2cbda7ea70c41375bfe57b3b2389051eaa8fcef2ff18411c", size = 899492, upload-time = "2026-03-18T03:56:06.816Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/a9/b06257089086ceb1d10ba8c65c605c9063e34e7b65fa456d2f66bfb2fc1d/lmdb-2.1.0.tar.gz", hash = "sha256:812be2c49aeb191565425c4e010956017d7290bc9bc3f3c7e4950336b4ddb764", size = 912813, upload-time = "2026-03-19T04:58:51.399Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e2/d8/69a1b30ef2ea79f46c9fb6b2658d2a86fb738e42213fb536f3086b4d8a5c/lmdb-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35f05c02e3629a7003398cf657278b2e93e74d2a4c77d3ee2485b65f4b2ada9a", size = 108272, upload-time = "2026-03-18T03:55:39.851Z" },
-    { url = "https://files.pythonhosted.org/packages/30/3d/f67c003ad3fc292c6f2a9215edb7f852b5ba643deeced7c377feecb5d764/lmdb-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b5589e4d0c64be988129338299e3c1236838d056009c8ee215967f595ac32ac8", size = 107106, upload-time = "2026-03-18T03:55:41.125Z" },
-    { url = "https://files.pythonhosted.org/packages/53/9a/974d36bcebee3309522748a5eb6d80dc1895ea1a7cb69f047c7ac041d77d/lmdb-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47a6ad68d763de441377f0d20036c60d7a88ad10c5303408f77412225cddc4b3", size = 324018, upload-time = "2026-03-18T03:55:42.547Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/88/d3b959f44012a877d30f0d9a40bc93c17605fc53c7b10d8f64b97b0c959c/lmdb-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8dc7156fc503000f9f76b1393cecac48a2d9d95f945a677c47d340aefad871a4", size = 327333, upload-time = "2026-03-18T03:55:44.13Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/e1/c659167677a46b86a013bc625418ccbd3a9e88cb1804d8c3c3b316acf504/lmdb-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:88670fb86764d93454cb39ca1e8c08cb6dc2ef9b9d71f3f6ff9ef416d4202265", size = 104562, upload-time = "2026-03-18T03:55:45.758Z" },
-    { url = "https://files.pythonhosted.org/packages/07/2f/8de426d45a42ef8c8cae4dd1953166f8539f7e7406970adea3cecae00649/lmdb-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:85a57eb3a85d231912819bcc8c75ed2baddba8f6d07ea79db62105a513ec2254", size = 98913, upload-time = "2026-03-18T03:55:47.013Z" },
+    { url = "https://files.pythonhosted.org/packages/91/92/8357de991821250e75ddafe32476c6c8e66fe773cda073ed020dbd1797e4/lmdb-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d3284f7b6f98ddecc4d126218b0509134cb36c167c0c104d0458521ca954c342", size = 108997, upload-time = "2026-03-19T04:58:26.566Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/21/aa5f034479bc844cd1d3db606c01177b69f64ff6436061bcde2bcb88111a/lmdb-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90a7d1caffd5cbdbecac88b598dace8dd038ac2c2b805c9faa0984a0b17bf997", size = 107790, upload-time = "2026-03-19T04:58:27.729Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/d9/94977566909c81c5e2407f546e4e179a63ef6417a749095d3c7682f71d76/lmdb-2.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6fb2c908b30a4f63e5f15c158904ad6e8f474b5cf90bc08789599a5aa2d01b6b", size = 324669, upload-time = "2026-03-19T04:58:28.92Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/42/08f14c83529a42456b26ab5be1ab332688f9bc36d38ccaf231c989ea41da/lmdb-2.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d48538aae010686a9dfabd1c0a976a0836c5044b8869249d05869ebd14c32639", size = 327653, upload-time = "2026-03-19T04:58:30.395Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/18/42a90551d15bc3435e0258bb65b32a4d75f03a017cf1ac164c614f811664/lmdb-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:52df6033da9a5e74cb407460d1a122958c437d0d142f1286bb238c2f550f03fb", size = 104803, upload-time = "2026-03-19T04:58:32.067Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/f7/60db1dbdf56e8e8c56674682d460d1e64a797c5c24abd2de9e5ebeb7c544/lmdb-2.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:170e40658d26730ccea9735cb0f73b9255b1a5c0f83d68e5f3f2f205d8a9e238", size = 99084, upload-time = "2026-03-19T04:58:33.437Z" },
 ]
 
 [[package]]

From db9065943c9ace5c7be7e1096ec3aad5a8d38bee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 19 Mar 2026 16:26:28 +0700
Subject: [PATCH 223/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index bfa5668..f8f9021 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post219"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#1440fc7e2b5079daaecf3b37c9880f4f1bde6619" }
+version = "0.0.post220"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#0477c51f5cc5ecc4aac9e5c11943d15974f4f24a" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From d32ee022bb98395d65f40693510747b795530bee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 19 Mar 2026 17:25:51 +0700
Subject: [PATCH 224/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index f8f9021..edfdd23 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post220"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#0477c51f5cc5ecc4aac9e5c11943d15974f4f24a" }
+version = "0.0.post221"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#00b1b72b3b43cb0fad1be9be7316d63cc78bfc12" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 08d5e1d7d602d11a786e7696f32c88ac31e50603 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 19 Mar 2026 17:35:50 +0700
Subject: [PATCH 225/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index edfdd23..21798c1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post221"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#00b1b72b3b43cb0fad1be9be7316d63cc78bfc12" }
+version = "0.0.post222"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#ac1f04e7da2281b988f6fbef1aa73252e28c4b23" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 5041132ff08e064daaffaedfab2b094eabcbb5d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 19 Mar 2026 18:06:40 +0700
Subject: [PATCH 226/236] Temporarily downgrade lmdb to version 1.x

https://github.com/jnwatson/py-lmdb/issues/431
---
 pyproject.toml |  2 +-
 uv.lock        | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a5a4ee9..c81b70b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ dependencies = [
     "fastapi>=0.135.1",
     "gemini-webapi>=1.21.0",
     "httptools>=0.7.1",
-    "lmdb>=2.1.0",
+    "lmdb~=1.8.1",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
     "pydantic-settings[yaml]>=2.13.1",
diff --git a/uv.lock b/uv.lock
index 21798c1..50bac58 100644
--- a/uv.lock
+++ b/uv.lock
@@ -159,7 +159,7 @@ requires-dist = [
     { name = "fastapi", specifier = ">=0.135.1" },
     { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models" },
     { name = "httptools", specifier = ">=0.7.1" },
-    { name = "lmdb", specifier = ">=2.1.0" },
+    { name = "lmdb", specifier = "~=1.8.1" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
@@ -229,16 +229,16 @@ wheels = [
 
 [[package]]
 name = "lmdb"
-version = "2.1.0"
+version = "1.8.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d8/a9/b06257089086ceb1d10ba8c65c605c9063e34e7b65fa456d2f66bfb2fc1d/lmdb-2.1.0.tar.gz", hash = "sha256:812be2c49aeb191565425c4e010956017d7290bc9bc3f3c7e4950336b4ddb764", size = 912813, upload-time = "2026-03-19T04:58:51.399Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/23/19/392f028e7ebcc1cc8212fe8a315a909b7a556278456f0bab9234d3a3b665/lmdb-1.8.1.tar.gz", hash = "sha256:44ef24033929e9cc227a7e17287473c452b462d716f118db885c667c80f57429", size = 886349, upload-time = "2026-03-12T23:21:48.42Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/92/8357de991821250e75ddafe32476c6c8e66fe773cda073ed020dbd1797e4/lmdb-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d3284f7b6f98ddecc4d126218b0509134cb36c167c0c104d0458521ca954c342", size = 108997, upload-time = "2026-03-19T04:58:26.566Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/21/aa5f034479bc844cd1d3db606c01177b69f64ff6436061bcde2bcb88111a/lmdb-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90a7d1caffd5cbdbecac88b598dace8dd038ac2c2b805c9faa0984a0b17bf997", size = 107790, upload-time = "2026-03-19T04:58:27.729Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/d9/94977566909c81c5e2407f546e4e179a63ef6417a749095d3c7682f71d76/lmdb-2.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6fb2c908b30a4f63e5f15c158904ad6e8f474b5cf90bc08789599a5aa2d01b6b", size = 324669, upload-time = "2026-03-19T04:58:28.92Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/42/08f14c83529a42456b26ab5be1ab332688f9bc36d38ccaf231c989ea41da/lmdb-2.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d48538aae010686a9dfabd1c0a976a0836c5044b8869249d05869ebd14c32639", size = 327653, upload-time = "2026-03-19T04:58:30.395Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/18/42a90551d15bc3435e0258bb65b32a4d75f03a017cf1ac164c614f811664/lmdb-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:52df6033da9a5e74cb407460d1a122958c437d0d142f1286bb238c2f550f03fb", size = 104803, upload-time = "2026-03-19T04:58:32.067Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/f7/60db1dbdf56e8e8c56674682d460d1e64a797c5c24abd2de9e5ebeb7c544/lmdb-2.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:170e40658d26730ccea9735cb0f73b9255b1a5c0f83d68e5f3f2f205d8a9e238", size = 99084, upload-time = "2026-03-19T04:58:33.437Z" },
+    { url = "https://files.pythonhosted.org/packages/75/5f/ec2fe6bb0986fae28db80292ffe5146ed7cc4d478d683d67050d2691a538/lmdb-1.8.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8b197007a5762411b7484e533ed1d03dca1b8ba1eee390233ac6e62ff45bd417", size = 101865, upload-time = "2026-03-12T23:21:12.902Z" },
+    { url = "https://files.pythonhosted.org/packages/75/35/1e43bba9658292c2ac787f5d003baa5ed37cad1b52666526edf4c908fb7d/lmdb-1.8.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a9454649e62bc6f4a45f9aed175fcdd6fc2e91922bf970fd561053c616281d0a", size = 100462, upload-time = "2026-03-12T23:21:14.369Z" },
+    { url = "https://files.pythonhosted.org/packages/64/8e/2b1a0caa42b6f980a8b8663a272b9a52d4fd51ef0ca36cdec768cea02978/lmdb-1.8.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a51881d284116d82ead233b20ae7f6dbec8624dd7b0593a755c84e0d0bc4cc29", size = 303313, upload-time = "2026-03-12T23:21:15.636Z" },
+    { url = "https://files.pythonhosted.org/packages/40/51/8061694cf7b883d2a166965cbaa961ea1ce692ce1782ac58091b5aa0fdb5/lmdb-1.8.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04c5472bfc38377a2b32ae3b494d82d9c8db7c64e9053ca1b7c86aa862ebaaf9", size = 303895, upload-time = "2026-03-12T23:21:16.769Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/0f/c189c40d833ecd64f3e375a0bb02378110fc958916053ee687ec2c7d5079/lmdb-1.8.1-cp313-cp313-win_amd64.whl", hash = "sha256:fe0e34b2b20f47a108c3e04b397d1e27f080a7b0256c33efb5aef7bd1bccb923", size = 99707, upload-time = "2026-03-12T23:21:17.862Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/c3/3c87bede5b62163b768e6a4bca893f59d3996cb6fc4052bfd67847c0efd7/lmdb-1.8.1-cp313-cp313-win_arm64.whl", hash = "sha256:c3550849cdbaf0ead6265cb5b10134b223c2cede7ce7a1f3390a55975e3a06d4", size = 94605, upload-time = "2026-03-12T23:21:19.102Z" },
 ]
 
 [[package]]

From 5829ac01145ae39138541e70d5eaea1d228ee02a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 19 Mar 2026 18:28:22 +0700
Subject: [PATCH 227/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index 50bac58..2c09798 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post222"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#ac1f04e7da2281b988f6fbef1aa73252e28c4b23" }
+version = "0.0.post223"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#201b6562e8d8e4049c2c9abe13ad2790ca72420a" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 6a0f99b0ea6c444914c2f7a117c8bc39d23e47e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 19 Mar 2026 19:24:27 +0700
Subject: [PATCH 228/236] Update dependencies to latest versions

---
 uv.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/uv.lock b/uv.lock
index 2c09798..8568ac2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post223"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#201b6562e8d8e4049c2c9abe13ad2790ca72420a" }
+version = "0.0.post224"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#40d2cb47ee8586c9ba5afb1a12a3be78454b3bbb" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From 7abc979e64530a73b1ad3df259e6c2e2c3081b0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 20 Mar 2026 09:41:21 +0700
Subject: [PATCH 229/236] Update dependencies to latest versions

---
 app/server/chat.py |   9 ++--
 pyproject.toml     |   6 +--
 uv.lock            | 108 ++++++++++++++++++++++-----------------------
 3 files changed, 62 insertions(+), 61 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index ec31626..9df91ca 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -941,16 +941,17 @@ async def _get_available_models(pool: GeminiClientPool) -> list[ModelData]:
 
             client_models = client.list_models()
             if client_models:
-                for am in client_models:
-                    if am.id and am.id not in seen_model_ids:
+                for model in client_models:
+                    model_id = model.model_name if model.model_name else model.model_id
+                    if model_id and model_id not in seen_model_ids:
                         models_data.append(
                             ModelData(
-                                id=am.id,
+                                id=model_id,
                                 created=now,
                                 owned_by="google",
                             )
                         )
-                        seen_model_ids.add(am.id)
+                        seen_model_ids.add(model_id)
 
     return models_data
 
diff --git a/pyproject.toml b/pyproject.toml
index c81b70b..206eddc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ dependencies = [
     "fastapi>=0.135.1",
     "gemini-webapi>=1.21.0",
     "httptools>=0.7.1",
-    "lmdb~=1.8.1",
+    "lmdb>=2.1.1",
     "loguru>=0.7.3",
     "orjson>=3.11.7",
     "pydantic-settings[yaml]>=2.13.1",
@@ -23,8 +23,8 @@ Repository = "https://github.com/Nativu5/Gemini-FastAPI"
 [project.optional-dependencies]
 dev = [
     "pytest>=9.0.2",
-    "ruff>=0.15.6",
-    "ty>=0.0.23",
+    "ruff>=0.15.7",
+    "ty>=0.0.24",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index 8568ac2..00eba05 100644
--- a/uv.lock
+++ b/uv.lock
@@ -159,13 +159,13 @@ requires-dist = [
     { name = "fastapi", specifier = ">=0.135.1" },
     { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models" },
     { name = "httptools", specifier = ">=0.7.1" },
-    { name = "lmdb", specifier = "~=1.8.1" },
+    { name = "lmdb", specifier = ">=2.1.1" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.6" },
-    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.23" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.7" },
+    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.24" },
     { name = "uvicorn", specifier = ">=0.42.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post224"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#40d2cb47ee8586c9ba5afb1a12a3be78454b3bbb" }
+version = "0.0.post227"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#e53ea40be2896a882355bafae5f8d20cd373bcf9" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },
@@ -229,16 +229,16 @@ wheels = [
 
 [[package]]
 name = "lmdb"
-version = "1.8.1"
+version = "2.1.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/23/19/392f028e7ebcc1cc8212fe8a315a909b7a556278456f0bab9234d3a3b665/lmdb-1.8.1.tar.gz", hash = "sha256:44ef24033929e9cc227a7e17287473c452b462d716f118db885c667c80f57429", size = 886349, upload-time = "2026-03-12T23:21:48.42Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5a/4e/d78a06af228216102fcb4b703f4b6a2f565978224d5623e20e02d90aeed3/lmdb-2.1.1.tar.gz", hash = "sha256:0317062326ae2f66e3bbde6400907651ea58c27a250097511a28d13c467fa5f1", size = 913160, upload-time = "2026-03-19T13:56:26.139Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/75/5f/ec2fe6bb0986fae28db80292ffe5146ed7cc4d478d683d67050d2691a538/lmdb-1.8.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8b197007a5762411b7484e533ed1d03dca1b8ba1eee390233ac6e62ff45bd417", size = 101865, upload-time = "2026-03-12T23:21:12.902Z" },
-    { url = "https://files.pythonhosted.org/packages/75/35/1e43bba9658292c2ac787f5d003baa5ed37cad1b52666526edf4c908fb7d/lmdb-1.8.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a9454649e62bc6f4a45f9aed175fcdd6fc2e91922bf970fd561053c616281d0a", size = 100462, upload-time = "2026-03-12T23:21:14.369Z" },
-    { url = "https://files.pythonhosted.org/packages/64/8e/2b1a0caa42b6f980a8b8663a272b9a52d4fd51ef0ca36cdec768cea02978/lmdb-1.8.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a51881d284116d82ead233b20ae7f6dbec8624dd7b0593a755c84e0d0bc4cc29", size = 303313, upload-time = "2026-03-12T23:21:15.636Z" },
-    { url = "https://files.pythonhosted.org/packages/40/51/8061694cf7b883d2a166965cbaa961ea1ce692ce1782ac58091b5aa0fdb5/lmdb-1.8.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04c5472bfc38377a2b32ae3b494d82d9c8db7c64e9053ca1b7c86aa862ebaaf9", size = 303895, upload-time = "2026-03-12T23:21:16.769Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/0f/c189c40d833ecd64f3e375a0bb02378110fc958916053ee687ec2c7d5079/lmdb-1.8.1-cp313-cp313-win_amd64.whl", hash = "sha256:fe0e34b2b20f47a108c3e04b397d1e27f080a7b0256c33efb5aef7bd1bccb923", size = 99707, upload-time = "2026-03-12T23:21:17.862Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/c3/3c87bede5b62163b768e6a4bca893f59d3996cb6fc4052bfd67847c0efd7/lmdb-1.8.1-cp313-cp313-win_arm64.whl", hash = "sha256:c3550849cdbaf0ead6265cb5b10134b223c2cede7ce7a1f3390a55975e3a06d4", size = 94605, upload-time = "2026-03-12T23:21:19.102Z" },
+    { url = "https://files.pythonhosted.org/packages/af/64/0ad40c3d06f89d7e5b6894537a0bd2829b592ef191f61b704157ad641f33/lmdb-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f43f7a7907fa726597d235dd83febe7ec47b45b78952b848f0bdf50b58fe1bf1", size = 109123, upload-time = "2026-03-19T13:55:58.522Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/86257b169acc61282110166581167f099c08bbb76aea7bc4da0c0b64c8b2/lmdb-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:24382db5cd139866d222471f4fb14fb511b16bf1ed0686bdf7fc808bf07ed8a4", size = 107803, upload-time = "2026-03-19T13:55:59.907Z" },
+    { url = "https://files.pythonhosted.org/packages/26/2d/8889fa81eb232dd5fec10f3178e22f3ae4f385c46be6124e29709f3bfdbb/lmdb-2.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76e8c14a76bb7695c1778181dce9c352fb565b5e113c74981ec692d5d6820efb", size = 324703, upload-time = "2026-03-19T13:56:01.309Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/d9/ec2e2370d35214e12abd1c9dada369c460e694f0c6fe385a200a2a25eaf3/lmdb-2.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7700999c4fa7762577d4b3deedd48f6c25ce396dfb17f61dd48f50dcf99f78d6", size = 328101, upload-time = "2026-03-19T13:56:02.806Z" },
+    { url = "https://files.pythonhosted.org/packages/24/c7/e65ca28f46479e92dfc7250dab5259ae6eaa0e5075db47f52a4a1462adb1/lmdb-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:74e4442102423e185347108cc67933411ec13e41866f57f6e9868c6ef5642b88", size = 104800, upload-time = "2026-03-19T13:56:04.173Z" },
+    { url = "https://files.pythonhosted.org/packages/33/51/e8f12e4b7a0ef82b42d8a37201db99f8dd7d26113a6b0cbf5c441692e2ad/lmdb-2.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:26b0412a38fffd8587becde3c2b0ee606b8906ae0ee5060b0ed6d44610703dec", size = 99048, upload-time = "2026-03-19T13:56:05.499Z" },
 ]
 
 [[package]]
@@ -417,27 +417,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.6"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/51/df/f8629c19c5318601d3121e230f74cbee7a3732339c52b21daa2b82ef9c7d/ruff-0.15.6.tar.gz", hash = "sha256:8394c7bb153a4e3811a4ecdacd4a8e6a4fa8097028119160dffecdcdf9b56ae4", size = 4597916, upload-time = "2026-03-12T23:05:47.51Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/9e/2f/4e03a7e5ce99b517e98d3b4951f411de2b0fa8348d39cf446671adcce9a2/ruff-0.15.6-py3-none-linux_armv6l.whl", hash = "sha256:7c98c3b16407b2cf3d0f2b80c80187384bc92c6774d85fefa913ecd941256fff", size = 10508953, upload-time = "2026-03-12T23:05:17.246Z" },
-    { url = "https://files.pythonhosted.org/packages/70/60/55bcdc3e9f80bcf39edf0cd272da6fa511a3d94d5a0dd9e0adf76ceebdb4/ruff-0.15.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ee7dcfaad8b282a284df4aa6ddc2741b3f4a18b0555d626805555a820ea181c3", size = 10942257, upload-time = "2026-03-12T23:05:23.076Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/f9/005c29bd1726c0f492bfa215e95154cf480574140cb5f867c797c18c790b/ruff-0.15.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3bd9967851a25f038fc8b9ae88a7fbd1b609f30349231dffaa37b6804923c4bb", size = 10322683, upload-time = "2026-03-12T23:05:33.738Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/74/2f861f5fd7cbb2146bddb5501450300ce41562da36d21868c69b7a828169/ruff-0.15.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13f4594b04e42cd24a41da653886b04d2ff87adbf57497ed4f728b0e8a4866f8", size = 10660986, upload-time = "2026-03-12T23:05:53.245Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/a1/309f2364a424eccb763cdafc49df843c282609f47fe53aa83f38272389e0/ruff-0.15.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2ed8aea2f3fe57886d3f00ea5b8aae5bf68d5e195f487f037a955ff9fbaac9e", size = 10332177, upload-time = "2026-03-12T23:05:56.145Z" },
-    { url = "https://files.pythonhosted.org/packages/30/41/7ebf1d32658b4bab20f8ac80972fb19cd4e2c6b78552be263a680edc55ac/ruff-0.15.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70789d3e7830b848b548aae96766431c0dc01a6c78c13381f423bf7076c66d15", size = 11170783, upload-time = "2026-03-12T23:06:01.742Z" },
-    { url = "https://files.pythonhosted.org/packages/76/be/6d488f6adca047df82cd62c304638bcb00821c36bd4881cfca221561fdfc/ruff-0.15.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:542aaf1de3154cea088ced5a819ce872611256ffe2498e750bbae5247a8114e9", size = 12044201, upload-time = "2026-03-12T23:05:28.697Z" },
-    { url = "https://files.pythonhosted.org/packages/71/68/e6f125df4af7e6d0b498f8d373274794bc5156b324e8ab4bf5c1b4fc0ec7/ruff-0.15.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c22e6f02c16cfac3888aa636e9eba857254d15bbacc9906c9689fdecb1953ab", size = 11421561, upload-time = "2026-03-12T23:05:31.236Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/9f/f85ef5fd01a52e0b472b26dc1b4bd228b8f6f0435975442ffa4741278703/ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98893c4c0aadc8e448cfa315bd0cc343a5323d740fe5f28ef8a3f9e21b381f7e", size = 11310928, upload-time = "2026-03-12T23:05:45.288Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/26/b75f8c421f5654304b89471ed384ae8c7f42b4dff58fa6ce1626d7f2b59a/ruff-0.15.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:70d263770d234912374493e8cc1e7385c5d49376e41dfa51c5c3453169dc581c", size = 11235186, upload-time = "2026-03-12T23:05:50.677Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/d4/d5a6d065962ff7a68a86c9b4f5500f7d101a0792078de636526c0edd40da/ruff-0.15.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:55a1ad63c5a6e54b1f21b7514dfadc0c7fb40093fa22e95143cf3f64ebdcd512", size = 10635231, upload-time = "2026-03-12T23:05:37.044Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/56/7c3acf3d50910375349016cf33de24be021532042afbed87942858992491/ruff-0.15.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8dc473ba093c5ec238bb1e7429ee676dca24643c471e11fbaa8a857925b061c0", size = 10340357, upload-time = "2026-03-12T23:06:04.748Z" },
-    { url = "https://files.pythonhosted.org/packages/06/54/6faa39e9c1033ff6a3b6e76b5df536931cd30caf64988e112bbf91ef5ce5/ruff-0.15.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:85b042377c2a5561131767974617006f99f7e13c63c111b998f29fc1e58a4cfb", size = 10860583, upload-time = "2026-03-12T23:05:58.978Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/1e/509a201b843b4dfb0b32acdedf68d951d3377988cae43949ba4c4133a96a/ruff-0.15.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cef49e30bc5a86a6a92098a7fbf6e467a234d90b63305d6f3ec01225a9d092e0", size = 11410976, upload-time = "2026-03-12T23:05:39.955Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/25/3fc9114abf979a41673ce877c08016f8e660ad6cf508c3957f537d2e9fa9/ruff-0.15.6-py3-none-win32.whl", hash = "sha256:bbf67d39832404812a2d23020dda68fee7f18ce15654e96fb1d3ad21a5fe436c", size = 10616872, upload-time = "2026-03-12T23:05:42.451Z" },
-    { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" },
+version = "0.15.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/22/9e4f66ee588588dc6c9af6a994e12d26e19efbe874d1a909d09a6dac7a59/ruff-0.15.7.tar.gz", hash = "sha256:04f1ae61fc20fe0b148617c324d9d009b5f63412c0b16474f3d5f1a1a665f7ac", size = 4601277, upload-time = "2026-03-19T16:26:22.605Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/2f/0b08ced94412af091807b6119ca03755d651d3d93a242682bf020189db94/ruff-0.15.7-py3-none-linux_armv6l.whl", hash = "sha256:a81cc5b6910fb7dfc7c32d20652e50fa05963f6e13ead3c5915c41ac5d16668e", size = 10489037, upload-time = "2026-03-19T16:26:32.47Z" },
+    { url = "https://files.pythonhosted.org/packages/91/4a/82e0fa632e5c8b1eba5ee86ecd929e8ff327bbdbfb3c6ac5d81631bef605/ruff-0.15.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:722d165bd52403f3bdabc0ce9e41fc47070ac56d7a91b4e0d097b516a53a3477", size = 10955433, upload-time = "2026-03-19T16:27:00.205Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/10/12586735d0ff42526ad78c049bf51d7428618c8b5c467e72508c694119df/ruff-0.15.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7fbc2448094262552146cbe1b9643a92f66559d3761f1ad0656d4991491af49e", size = 10269302, upload-time = "2026-03-19T16:26:26.183Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/5d/32b5c44ccf149a26623671df49cbfbd0a0ae511ff3df9d9d2426966a8d57/ruff-0.15.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b39329b60eba44156d138275323cc726bbfbddcec3063da57caa8a8b1d50adf", size = 10607625, upload-time = "2026-03-19T16:27:03.263Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/f1/f0001cabe86173aaacb6eb9bb734aa0605f9a6aa6fa7d43cb49cbc4af9c9/ruff-0.15.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87768c151808505f2bfc93ae44e5f9e7c8518943e5074f76ac21558ef5627c85", size = 10324743, upload-time = "2026-03-19T16:27:09.791Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/87/b8a8f3d56b8d848008559e7c9d8bf367934d5367f6d932ba779456e2f73b/ruff-0.15.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb0511670002c6c529ec66c0e30641c976c8963de26a113f3a30456b702468b0", size = 11138536, upload-time = "2026-03-19T16:27:06.101Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/f2/4fd0d05aab0c5934b2e1464784f85ba2eab9d54bffc53fb5430d1ed8b829/ruff-0.15.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0d19644f801849229db8345180a71bee5407b429dd217f853ec515e968a6912", size = 11994292, upload-time = "2026-03-19T16:26:48.718Z" },
+    { url = "https://files.pythonhosted.org/packages/64/22/fc4483871e767e5e95d1622ad83dad5ebb830f762ed0420fde7dfa9d9b08/ruff-0.15.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4806d8e09ef5e84eb19ba833d0442f7e300b23fe3f0981cae159a248a10f0036", size = 11398981, upload-time = "2026-03-19T16:26:54.513Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/99/66f0343176d5eab02c3f7fcd2de7a8e0dd7a41f0d982bee56cd1c24db62b/ruff-0.15.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dce0896488562f09a27b9c91b1f58a097457143931f3c4d519690dea54e624c5", size = 11242422, upload-time = "2026-03-19T16:26:29.277Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/3a/a7060f145bfdcce4c987ea27788b30c60e2c81d6e9a65157ca8afe646328/ruff-0.15.7-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:1852ce241d2bc89e5dc823e03cff4ce73d816b5c6cdadd27dbfe7b03217d2a12", size = 11232158, upload-time = "2026-03-19T16:26:42.321Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/53/90fbb9e08b29c048c403558d3cdd0adf2668b02ce9d50602452e187cd4af/ruff-0.15.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5f3e4b221fb4bd293f79912fc5e93a9063ebd6d0dcbd528f91b89172a9b8436c", size = 10577861, upload-time = "2026-03-19T16:26:57.459Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/aa/5f486226538fe4d0f0439e2da1716e1acf895e2a232b26f2459c55f8ddad/ruff-0.15.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:b15e48602c9c1d9bdc504b472e90b90c97dc7d46c7028011ae67f3861ceba7b4", size = 10327310, upload-time = "2026-03-19T16:26:35.909Z" },
+    { url = "https://files.pythonhosted.org/packages/99/9e/271afdffb81fe7bfc8c43ba079e9d96238f674380099457a74ccb3863857/ruff-0.15.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b4705e0e85cedc74b0a23cf6a179dbb3df184cb227761979cc76c0440b5ab0d", size = 10840752, upload-time = "2026-03-19T16:26:45.723Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/29/a4ae78394f76c7759953c47884eb44de271b03a66634148d9f7d11e721bd/ruff-0.15.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:112c1fa316a558bb34319282c1200a8bf0495f1b735aeb78bfcb2991e6087580", size = 11336961, upload-time = "2026-03-19T16:26:39.076Z" },
+    { url = "https://files.pythonhosted.org/packages/26/6b/8786ba5736562220d588a2f6653e6c17e90c59ced34a2d7b512ef8956103/ruff-0.15.7-py3-none-win32.whl", hash = "sha256:6d39e2d3505b082323352f733599f28169d12e891f7dd407f2d4f54b4c2886de", size = 10582538, upload-time = "2026-03-19T16:26:15.992Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/e9/346d4d3fffc6871125e877dae8d9a1966b254fbd92a50f8561078b88b099/ruff-0.15.7-py3-none-win_amd64.whl", hash = "sha256:4d53d712ddebcd7dace1bc395367aec12c057aacfe9adbb6d832302575f4d3a1", size = 11755839, upload-time = "2026-03-19T16:26:19.897Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/e8/726643a3ea68c727da31570bde48c7a10f1aa60eddd628d94078fec586ff/ruff-0.15.7-py3-none-win_arm64.whl", hash = "sha256:18e8d73f1c3fdf27931497972250340f92e8c861722161a9caeb89a58ead6ed2", size = 11023304, upload-time = "2026-03-19T16:26:51.669Z" },
 ]
 
 [[package]]
@@ -454,26 +454,26 @@ wheels = [
 
 [[package]]
 name = "ty"
-version = "0.0.23"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/75/ba/d3c998ff4cf6b5d75b39356db55fe1b7caceecc522b9586174e6a5dee6f7/ty-0.0.23.tar.gz", hash = "sha256:5fb05db58f202af366f80ef70f806e48f5237807fe424ec787c9f289e3f3a4ef", size = 5341461, upload-time = "2026-03-13T12:34:23.125Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f4/21/aab32603dfdfacd4819e52fa8c6074e7bd578218a5142729452fc6a62db6/ty-0.0.23-py3-none-linux_armv6l.whl", hash = "sha256:e810eef1a5f1cfc0731a58af8d2f334906a96835829767aed00026f1334a8dd7", size = 10329096, upload-time = "2026-03-13T12:34:09.432Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/a9/dd3287a82dce3df546ec560296208d4905dcf06346b6e18c2f3c63523bd1/ty-0.0.23-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e43d36bd89a151ddcad01acaeff7dcc507cb73ff164c1878d2d11549d39a061c", size = 10156631, upload-time = "2026-03-13T12:34:53.122Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/01/3f25909b02fac29bb0a62b2251f8d62e65d697781ffa4cf6b47a4c075c85/ty-0.0.23-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bd6a340969577b4645f231572c4e46012acba2d10d4c0c6570fe1ab74e76ae00", size = 9653211, upload-time = "2026-03-13T12:34:15.049Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/60/bfc0479572a6f4b90501c869635faf8d84c8c68ffc5dd87d04f049affabc/ty-0.0.23-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:341441783e626eeb7b1ec2160432956aed5734932ab2d1c26f94d0c98b229937", size = 10156143, upload-time = "2026-03-13T12:34:34.468Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/81/8a93e923535a340f54bea20ff196f6b2787782b2f2f399bd191c4bc132d6/ty-0.0.23-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ce1dc66c26d4167e2c78d12fa870ef5a7ec9cc344d2baaa6243297cfa88bd52", size = 10136632, upload-time = "2026-03-13T12:34:28.832Z" },
-    { url = "https://files.pythonhosted.org/packages/da/cb/2ac81c850c58acc9f976814404d28389c9c1c939676e32287b9cff61381e/ty-0.0.23-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bae1e7a294bf8528836f7617dc5c360ea2dddb63789fc9471ae6753534adca05", size = 10655025, upload-time = "2026-03-13T12:34:37.105Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/9b/bac771774c198c318ae699fc013d8cd99ed9caf993f661fba11238759244/ty-0.0.23-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d2b162768764d9dc177c83fb497a51532bb67cbebe57b8fa0f2668436bf53f3c", size = 11230107, upload-time = "2026-03-13T12:34:20.751Z" },
-    { url = "https://files.pythonhosted.org/packages/14/09/7644fb0e297265e18243f878aca343593323b9bb19ed5278dcbc63781be0/ty-0.0.23-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d28384e48ca03b34e4e2beee0e230c39bbfb68994bb44927fec61ef3642900da", size = 10934177, upload-time = "2026-03-13T12:34:17.904Z" },
-    { url = "https://files.pythonhosted.org/packages/18/14/69a25a0cad493fb6a947302471b579a03516a3b00e7bece77fdc6b4afb9b/ty-0.0.23-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:559d9a299df793cb7a7902caed5eda8a720ff69164c31c979673e928f02251ee", size = 10752487, upload-time = "2026-03-13T12:34:31.785Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/2a/42fc3cbccf95af0a62308ebed67e084798ab7a85ef073c9986ef18032743/ty-0.0.23-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:32a7b8a14a98e1d20a9d8d2af23637ed7efdb297ac1fa2450b8e465d05b94482", size = 10133007, upload-time = "2026-03-13T12:34:42.838Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/69/307833f1b52fa3670e0a1d496e43ef7df556ecde838192d3fcb9b35e360d/ty-0.0.23-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6f803b9b9cca87af793467973b9abdd4b83e6b96d9b5e749d662cff7ead70b6d", size = 10169698, upload-time = "2026-03-13T12:34:12.351Z" },
-    { url = "https://files.pythonhosted.org/packages/89/ae/5dd379ec22d0b1cba410d7af31c366fcedff191d5b867145913a64889f66/ty-0.0.23-py3-none-musllinux_1_2_i686.whl", hash = "sha256:4a0bf086ec8e2197b7ea7ebfcf4be36cb6a52b235f8be61647ef1b2d99d6ffd3", size = 10346080, upload-time = "2026-03-13T12:34:40.012Z" },
-    { url = "https://files.pythonhosted.org/packages/98/c7/dfc83203d37998620bba9c4873a080c8850a784a8a46f56f8163c5b4e320/ty-0.0.23-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:252539c3fcd7aeb9b8d5c14e2040682c3e1d7ff640906d63fd2c4ce35865a4ba", size = 10848162, upload-time = "2026-03-13T12:34:45.421Z" },
-    { url = "https://files.pythonhosted.org/packages/89/08/05481511cfbcc1fd834b6c67aaae090cb609a079189ddf2032139ccfc490/ty-0.0.23-py3-none-win32.whl", hash = "sha256:51b591d19eef23bbc3807aef77d38fa1f003c354e1da908aa80ea2dca0993f77", size = 9748283, upload-time = "2026-03-13T12:34:50.607Z" },
-    { url = "https://files.pythonhosted.org/packages/31/2e/eaed4ff5c85e857a02415084c394e02c30476b65e158eec1938fdaa9a205/ty-0.0.23-py3-none-win_amd64.whl", hash = "sha256:1e137e955f05c501cfbb81dd2190c8fb7d01ec037c7e287024129c722a83c9ad", size = 10698355, upload-time = "2026-03-13T12:34:26.134Z" },
-    { url = "https://files.pythonhosted.org/packages/91/29/b32cb7b4c7d56b9ed50117f8ad6e45834aec293e4cb14749daab4e9236d5/ty-0.0.23-py3-none-win_arm64.whl", hash = "sha256:a0399bd13fd2cd6683fd0a2d59b9355155d46546d8203e152c556ddbdeb20842", size = 10155890, upload-time = "2026-03-13T12:34:48.082Z" },
+version = "0.0.24"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7a/96/652a425030f95dc2c9548d9019e52502e17079e1daeefbc4036f1c0905b4/ty-0.0.24.tar.gz", hash = "sha256:9fe42f6b98207bdaef51f71487d6d087f2cb02555ee3939884d779b2b3cc8bfc", size = 5354286, upload-time = "2026-03-19T16:55:57.035Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/da/e5/34457ee11708e734ba81ad65723af83030e484f961e281d57d1eecf08951/ty-0.0.24-py3-none-linux_armv6l.whl", hash = "sha256:1ab4f1f61334d533a3fdf5d9772b51b1300ac5da4f3cdb0be9657a3ccb2ce3e7", size = 10394877, upload-time = "2026-03-19T16:55:54.246Z" },
+    { url = "https://files.pythonhosted.org/packages/44/81/bc9a1b1a87f43db15ab64ad781a4f999734ec3b470ad042624fa875b20e6/ty-0.0.24-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:facbf2c4aaa6985229e08f8f9bf152215eb078212f22b5c2411f35386688ab42", size = 10211109, upload-time = "2026-03-19T16:55:28.554Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/63/cfc805adeaa61d63ba3ea71127efa7d97c40ba36d97ee7bd957341d05107/ty-0.0.24-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b6d2a3b6d4470c483552a31e9b368c86f154dcc964bccb5406159dc9cd362246", size = 9694769, upload-time = "2026-03-19T16:55:34.309Z" },
+    { url = "https://files.pythonhosted.org/packages/33/09/edc220726b6ec44a58900401f6b27140997ef15026b791e26b69a6e69eb5/ty-0.0.24-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c94c25d0500939fd5f8f16ce41cbed5b20528702c1d649bf80300253813f0a2", size = 10176287, upload-time = "2026-03-19T16:55:37.17Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/bf/cbe2227be711e65017655d8ee4d050f4c92b113fb4dc4c3bd6a19d3a86d8/ty-0.0.24-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:89cbe7bc7df0fab02dbd8cda79b737df83f1ef7fb573b08c0ee043dc68cffb08", size = 10214832, upload-time = "2026-03-19T16:56:08.518Z" },
+    { url = "https://files.pythonhosted.org/packages/af/1d/d15803ee47e9143d10e10bd81ccc14761d08758082bda402950685f0ddfe/ty-0.0.24-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2c5d269bcc9b764850c99f457b5018a79b3ef40ecfbc03344e65effd6cf743", size = 10709892, upload-time = "2026-03-19T16:56:05.727Z" },
+    { url = "https://files.pythonhosted.org/packages/36/12/6db0d86c477147f67b9052de209421d76c3e855197b000c25fcbbe86b3a2/ty-0.0.24-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba44512db5b97c3bbd59d93e11296e8548d0c9a3bdd1280de36d7ff22d351896", size = 11280872, upload-time = "2026-03-19T16:56:02.899Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/fc/155fe83a97c06d33ccc9e0f428258b32df2e08a428300c715d34757f0111/ty-0.0.24-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a52b7f589c3205512a9c50ba5b2b1e8c0698b72e51b8b9285c90420c06f1cae8", size = 11060520, upload-time = "2026-03-19T16:55:59.956Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/f1/32c05a1c4c3c2a95c5b7361dee03a9bf1231d4ad096b161c838b45bce5a0/ty-0.0.24-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7981df5c709c054da4ac5d7c93f8feb8f45e69e829e4461df4d5f0988fe67d04", size = 10791455, upload-time = "2026-03-19T16:55:25.728Z" },
+    { url = "https://files.pythonhosted.org/packages/17/2c/53c1ea6bedfa4d4ab64d4de262d8f5e405ecbffefd364459c628c0310d33/ty-0.0.24-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b2860151ad95a00d0f0280b8fef79900d08dcd63276b57e6e5774f2c055979c5", size = 10156708, upload-time = "2026-03-19T16:55:45.563Z" },
+    { url = "https://files.pythonhosted.org/packages/45/39/7d2919cf194707169474d80720a5f3d793e983416f25e7ffcf80504c9df2/ty-0.0.24-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:5674a1146d927ab77ff198a88e0c4505134ced342a0e7d1beb4a076a728b7496", size = 10236263, upload-time = "2026-03-19T16:55:31.474Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/7f/48eac722f2fd12a5b7aae0effdcb75c46053f94b783d989e3ef0d7380082/ty-0.0.24-py3-none-musllinux_1_2_i686.whl", hash = "sha256:438ecbf1608a9b16dd84502f3f1b23ef2ef32bbd0ab3e0ca5a82f0e0d1cd41ea", size = 10402559, upload-time = "2026-03-19T16:55:39.602Z" },
+    { url = "https://files.pythonhosted.org/packages/75/e0/8cf868b9749ce1e5166462759545964e95b02353243594062b927d8bff2a/ty-0.0.24-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ddeed3098dd92a83964e7aa7b41e509ba3530eb539fc4cd8322ff64a09daf1f5", size = 10893684, upload-time = "2026-03-19T16:55:51.439Z" },
+    { url = "https://files.pythonhosted.org/packages/17/9f/f54bf3be01d2c2ed731d10a5afa3324dc66f987a6ae0a4a6cbfa2323d080/ty-0.0.24-py3-none-win32.whl", hash = "sha256:83013fb3a4764a8f8bcc6ca11ff8bdfd8c5f719fc249241cb2b8916e80778eb1", size = 9781542, upload-time = "2026-03-19T16:56:11.588Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/49/c004c5cc258b10b3a145666e9a9c28ae7678bc958c8926e8078d5d769081/ty-0.0.24-py3-none-win_amd64.whl", hash = "sha256:748a60eb6912d1cf27aaab105ffadb6f4d2e458a3fcadfbd3cf26db0d8062eeb", size = 10764801, upload-time = "2026-03-19T16:55:42.752Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/59/006a074e185bfccf5e4c026015245ab4fcd2362b13a8d24cf37a277909a9/ty-0.0.24-py3-none-win_arm64.whl", hash = "sha256:280a3d31e86d0721947238f17030c33f0911cae851d108ea9f4e3ab12a5ed01f", size = 10194093, upload-time = "2026-03-19T16:55:48.303Z" },
 ]
 
 [[package]]

From c3730cadfcd77b6e12edee1f49fc0f57b1e3fe87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Fri, 20 Mar 2026 21:33:49 +0700
Subject: [PATCH 230/236] Update dependencies to latest versions

---
 app/server/chat.py     | 8 ++++----
 app/services/client.py | 6 ------
 uv.lock                | 4 ++--
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/app/server/chat.py b/app/server/chat.py
index 9df91ca..8b3094d 100644
--- a/app/server/chat.py
+++ b/app/server/chat.py
@@ -923,16 +923,16 @@ async def _get_available_models(pool: GeminiClientPool) -> list[ModelData]:
     models_data = []
     seen_model_ids = set()
 
-    for m in g_config.gemini.models:
-        if m.model_name and m.model_name not in seen_model_ids:
+    for model in g_config.gemini.models:
+        if model.model_name and model.model_name not in seen_model_ids:
             models_data.append(
                 ModelData(
-                    id=m.model_name,
+                    id=model.model_name,
                     created=now,
                     owned_by="custom",
                 )
             )
-            seen_model_ids.add(m.model_name)
+            seen_model_ids.add(model.model_name)
 
     if strategy == "append":
         for client in pool.clients:
diff --git a/app/services/client.py b/app/services/client.py
index b9ba25e..a83e1e6 100644
--- a/app/services/client.py
+++ b/app/services/client.py
@@ -14,12 +14,6 @@
     save_url_to_tempfile,
 )
 
-_UNSET = object()
-
-
-def _resolve(value: Any, fallback: Any):
-    return fallback if value is _UNSET else value
-
 
 class GeminiClientWrapper(GeminiClient):
     """Gemini client with helper methods."""
diff --git a/uv.lock b/uv.lock
index 00eba05..7a61027 100644
--- a/uv.lock
+++ b/uv.lock
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post227"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#e53ea40be2896a882355bafae5f8d20cd373bcf9" }
+version = "0.0.post231"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#f894820435530a9dbccca7de1080e7899c015b64" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },

From ccbd7bb3b461e4f9d119662654d099b2c85e480c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Sat, 21 Mar 2026 20:55:01 +0700
Subject: [PATCH 231/236] Enable the debug logging level to let users see
 warnings about cookie issues.

---
 app/utils/config.py  | 2 +-
 app/utils/logging.py | 2 +-
 config/config.yaml   | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/utils/config.py b/app/utils/config.py
index e569fbf..f84385a 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -93,7 +93,7 @@ class GeminiConfig(BaseModel):
         ge=60,
         description="Interval in seconds to refresh Gemini sessions (Not less than 60s)",
     )
-    verbose: bool = Field(False, description="Enable verbose logging for Gemini API requests")
+    verbose: bool = Field(True, description="Enable verbose logging for Gemini API requests")
     max_chars_per_request: int = Field(
         default=1_000_000,
         ge=1,
diff --git a/app/utils/logging.py b/app/utils/logging.py
index 87fcc7f..1a5f9fe 100644
--- a/app/utils/logging.py
+++ b/app/utils/logging.py
@@ -65,4 +65,4 @@ def emit(self, record: logging.LogRecord) -> None:
             logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
 
     # Remove all existing handlers and add our interceptor
-    logging.basicConfig(handlers=[InterceptHandler()], level="INFO", force=True)
+    logging.basicConfig(handlers=[InterceptHandler()], level="DEBUG", force=True)
diff --git a/config/config.yaml b/config/config.yaml
index 9321c61..fbce483 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -18,7 +18,7 @@ cors:
 
 gemini:
   clients:
-    - id: "example-id-1"   # Arbitrary client ID
+    - id: "client-id-1"    # Arbitrary client ID
       secure_1psid: "YOUR_SECURE_1PSID_HERE"      # Gemini Secure 1PSID
       secure_1psidts: "YOUR_SECURE_1PSIDTS_HERE"  # Gemini Secure 1PSIDTS
       proxy: null          # Optional proxy URL (null/empty means direct connection)
@@ -26,7 +26,7 @@ gemini:
   watchdog_timeout: 90     # Watchdog timeout in seconds (Not less than 30s)
   auto_refresh: true       # Auto-refresh session cookies
   refresh_interval: 600    # Refresh interval in seconds (Not less than 60s)
-  verbose: false           # Enable verbose logging for Gemini requests
+  verbose: true            # Enable verbose logging for Gemini requests
   max_chars_per_request: 1000000     # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
   model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
   models: []
@@ -38,4 +38,4 @@ storage:
   retention_days: 14       # Number of days to retain conversations before cleanup
 
 logging:
-  level: "INFO"            # Log level: DEBUG, INFO, WARNING, ERROR
+  level: "DEBUG"           # Log level: DEBUG, INFO, WARNING, ERROR

From cd402d8a30e053d70f5f66cbddc31c484ac3d292 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Tue, 24 Mar 2026 09:20:41 +0700
Subject: [PATCH 232/236] Update dependencies to latest versions

---
 .github/workflows/lint.yaml |  5 +++--
 .github/workflows/track.yml |  4 +++-
 pyproject.toml              |  2 +-
 uv.lock                     | 18 +++++++++---------
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index f513a31..e3da9f0 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -2,8 +2,6 @@ name: Lint and Type Check
 
 on:
   push:
-    branches:
-      - main
     paths:
       - "**.py"
       - "pyproject.toml"
@@ -33,5 +31,8 @@ jobs:
       - name: Run Ruff
         run: uv run ruff check .
 
+      - name: Run Ruff Format
+        run: uv run ruff format . --check
+
       - name: Run Ty Check
         run: uv run ty check
diff --git a/.github/workflows/track.yml b/.github/workflows/track.yml
index 778ef03..9ebb9c6 100644
--- a/.github/workflows/track.yml
+++ b/.github/workflows/track.yml
@@ -11,8 +11,10 @@ jobs:
     permissions:
       contents: write
       pull-requests: write
+
     steps:
-      - uses: actions/checkout@v6
+      - name: Checkout repository
+        uses: actions/checkout@v6
 
       - name: Install uv
         uses: astral-sh/setup-uv@v7
diff --git a/pyproject.toml b/pyproject.toml
index 206eddc..0d36bb0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.135.1",
+    "fastapi>=0.135.2",
     "gemini-webapi>=1.21.0",
     "httptools>=0.7.1",
     "lmdb>=2.1.1",
diff --git a/uv.lock b/uv.lock
index 7a61027..ff13204 100644
--- a/uv.lock
+++ b/uv.lock
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.135.1"
+version = "0.135.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e7/7b/f8e0211e9380f7195ba3f3d40c292594fd81ba8ec4629e3854c353aaca45/fastapi-0.135.1.tar.gz", hash = "sha256:d04115b508d936d254cea545b7312ecaa58a7b3a0f84952535b4c9afae7668cd", size = 394962, upload-time = "2026-03-01T18:18:29.369Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/73/5903c4b13beae98618d64eb9870c3fac4f605523dd0312ca5c80dadbd5b9/fastapi-0.135.2.tar.gz", hash = "sha256:88a832095359755527b7f63bb4c6bc9edb8329a026189eed83d6c1afcf419d56", size = 395833, upload-time = "2026-03-23T14:12:41.697Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/72/42e900510195b23a56bde950d26a51f8b723846bfcaa0286e90287f0422b/fastapi-0.135.1-py3-none-any.whl", hash = "sha256:46e2fc5745924b7c840f71ddd277382af29ce1cdb7d5eab5bf697e3fb9999c9e", size = 116999, upload-time = "2026-03-01T18:18:30.831Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/ea/18f6d0457f9efb2fc6fa594857f92810cadb03024975726db6546b3d6fcf/fastapi-0.135.2-py3-none-any.whl", hash = "sha256:0af0447d541867e8db2a6a25c23a8c4bd80e2394ac5529bd87501bbb9e240ca5", size = 117407, upload-time = "2026-03-23T14:12:43.284Z" },
 ]
 
 [[package]]
@@ -156,7 +156,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
-    { name = "fastapi", specifier = ">=0.135.1" },
+    { name = "fastapi", specifier = ">=0.135.2" },
     { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=2.1.1" },
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post231"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#f894820435530a9dbccca7de1080e7899c015b64" }
+version = "0.0.post232"
+source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#57bb345555cd947f4f7d278088d3f41946fd3caf" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },
@@ -442,14 +442,14 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.52.1"
+version = "1.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" },
 ]
 
 [[package]]

From 44968c90a0eff0e60232b33308c5f33ee71a27f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 26 Mar 2026 19:18:55 +0700
Subject: [PATCH 233/236] Update dependencies to latest versions

---
 pyproject.toml |  8 +++----
 uv.lock        | 58 +++++++++++++++++++++++++-------------------------
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0d36bb0..49a424f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,9 +22,9 @@ Repository = "https://github.com/Nativu5/Gemini-FastAPI"
 
 [project.optional-dependencies]
 dev = [
-    "pytest>=9.0.2",
-    "ruff>=0.15.7",
-    "ty>=0.0.24",
+    "pytest",
+    "ruff",
+    "ty",
 ]
 
 [dependency-groups]
@@ -65,4 +65,4 @@ quote-style = "double"
 indent-style = "space"
 
 [tool.uv.sources]
-gemini-webapi = { git = "https://github.com/luuquangvu/Gemini-API.git", rev = "retrieve-dynamic-models" }
+gemini-webapi = { git = "https://github.com/HanaokaYuzu/Gemini-API.git" }
diff --git a/uv.lock b/uv.lock
index ff13204..d7ddf48 100644
--- a/uv.lock
+++ b/uv.lock
@@ -22,14 +22,14 @@ wheels = [
 
 [[package]]
 name = "anyio"
-version = "4.12.1"
+version = "4.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
+    { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" },
 ]
 
 [[package]]
@@ -157,15 +157,15 @@ dev = [
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
     { name = "fastapi", specifier = ">=0.135.2" },
-    { name = "gemini-webapi", git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models" },
+    { name = "gemini-webapi", git = "https://github.com/HanaokaYuzu/Gemini-API.git" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=2.1.1" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "orjson", specifier = ">=3.11.7" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.2" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.15.7" },
-    { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.24" },
+    { name = "pytest", marker = "extra == 'dev'" },
+    { name = "ruff", marker = "extra == 'dev'" },
+    { name = "ty", marker = "extra == 'dev'" },
     { name = "uvicorn", specifier = ">=0.42.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "0.0.post232"
-source = { git = "https://github.com/luuquangvu/Gemini-API.git?rev=retrieve-dynamic-models#57bb345555cd947f4f7d278088d3f41946fd3caf" }
+version = "1.21.0.post23"
+source = { git = "https://github.com/HanaokaYuzu/Gemini-API.git#7b669588f5f8110424d3678440eb558af983991c" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },
@@ -454,26 +454,26 @@ wheels = [
 
 [[package]]
 name = "ty"
-version = "0.0.24"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/7a/96/652a425030f95dc2c9548d9019e52502e17079e1daeefbc4036f1c0905b4/ty-0.0.24.tar.gz", hash = "sha256:9fe42f6b98207bdaef51f71487d6d087f2cb02555ee3939884d779b2b3cc8bfc", size = 5354286, upload-time = "2026-03-19T16:55:57.035Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/da/e5/34457ee11708e734ba81ad65723af83030e484f961e281d57d1eecf08951/ty-0.0.24-py3-none-linux_armv6l.whl", hash = "sha256:1ab4f1f61334d533a3fdf5d9772b51b1300ac5da4f3cdb0be9657a3ccb2ce3e7", size = 10394877, upload-time = "2026-03-19T16:55:54.246Z" },
-    { url = "https://files.pythonhosted.org/packages/44/81/bc9a1b1a87f43db15ab64ad781a4f999734ec3b470ad042624fa875b20e6/ty-0.0.24-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:facbf2c4aaa6985229e08f8f9bf152215eb078212f22b5c2411f35386688ab42", size = 10211109, upload-time = "2026-03-19T16:55:28.554Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/63/cfc805adeaa61d63ba3ea71127efa7d97c40ba36d97ee7bd957341d05107/ty-0.0.24-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b6d2a3b6d4470c483552a31e9b368c86f154dcc964bccb5406159dc9cd362246", size = 9694769, upload-time = "2026-03-19T16:55:34.309Z" },
-    { url = "https://files.pythonhosted.org/packages/33/09/edc220726b6ec44a58900401f6b27140997ef15026b791e26b69a6e69eb5/ty-0.0.24-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c94c25d0500939fd5f8f16ce41cbed5b20528702c1d649bf80300253813f0a2", size = 10176287, upload-time = "2026-03-19T16:55:37.17Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/bf/cbe2227be711e65017655d8ee4d050f4c92b113fb4dc4c3bd6a19d3a86d8/ty-0.0.24-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:89cbe7bc7df0fab02dbd8cda79b737df83f1ef7fb573b08c0ee043dc68cffb08", size = 10214832, upload-time = "2026-03-19T16:56:08.518Z" },
-    { url = "https://files.pythonhosted.org/packages/af/1d/d15803ee47e9143d10e10bd81ccc14761d08758082bda402950685f0ddfe/ty-0.0.24-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2c5d269bcc9b764850c99f457b5018a79b3ef40ecfbc03344e65effd6cf743", size = 10709892, upload-time = "2026-03-19T16:56:05.727Z" },
-    { url = "https://files.pythonhosted.org/packages/36/12/6db0d86c477147f67b9052de209421d76c3e855197b000c25fcbbe86b3a2/ty-0.0.24-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba44512db5b97c3bbd59d93e11296e8548d0c9a3bdd1280de36d7ff22d351896", size = 11280872, upload-time = "2026-03-19T16:56:02.899Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/fc/155fe83a97c06d33ccc9e0f428258b32df2e08a428300c715d34757f0111/ty-0.0.24-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a52b7f589c3205512a9c50ba5b2b1e8c0698b72e51b8b9285c90420c06f1cae8", size = 11060520, upload-time = "2026-03-19T16:55:59.956Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/f1/32c05a1c4c3c2a95c5b7361dee03a9bf1231d4ad096b161c838b45bce5a0/ty-0.0.24-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7981df5c709c054da4ac5d7c93f8feb8f45e69e829e4461df4d5f0988fe67d04", size = 10791455, upload-time = "2026-03-19T16:55:25.728Z" },
-    { url = "https://files.pythonhosted.org/packages/17/2c/53c1ea6bedfa4d4ab64d4de262d8f5e405ecbffefd364459c628c0310d33/ty-0.0.24-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b2860151ad95a00d0f0280b8fef79900d08dcd63276b57e6e5774f2c055979c5", size = 10156708, upload-time = "2026-03-19T16:55:45.563Z" },
-    { url = "https://files.pythonhosted.org/packages/45/39/7d2919cf194707169474d80720a5f3d793e983416f25e7ffcf80504c9df2/ty-0.0.24-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:5674a1146d927ab77ff198a88e0c4505134ced342a0e7d1beb4a076a728b7496", size = 10236263, upload-time = "2026-03-19T16:55:31.474Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/7f/48eac722f2fd12a5b7aae0effdcb75c46053f94b783d989e3ef0d7380082/ty-0.0.24-py3-none-musllinux_1_2_i686.whl", hash = "sha256:438ecbf1608a9b16dd84502f3f1b23ef2ef32bbd0ab3e0ca5a82f0e0d1cd41ea", size = 10402559, upload-time = "2026-03-19T16:55:39.602Z" },
-    { url = "https://files.pythonhosted.org/packages/75/e0/8cf868b9749ce1e5166462759545964e95b02353243594062b927d8bff2a/ty-0.0.24-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ddeed3098dd92a83964e7aa7b41e509ba3530eb539fc4cd8322ff64a09daf1f5", size = 10893684, upload-time = "2026-03-19T16:55:51.439Z" },
-    { url = "https://files.pythonhosted.org/packages/17/9f/f54bf3be01d2c2ed731d10a5afa3324dc66f987a6ae0a4a6cbfa2323d080/ty-0.0.24-py3-none-win32.whl", hash = "sha256:83013fb3a4764a8f8bcc6ca11ff8bdfd8c5f719fc249241cb2b8916e80778eb1", size = 9781542, upload-time = "2026-03-19T16:56:11.588Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/49/c004c5cc258b10b3a145666e9a9c28ae7678bc958c8926e8078d5d769081/ty-0.0.24-py3-none-win_amd64.whl", hash = "sha256:748a60eb6912d1cf27aaab105ffadb6f4d2e458a3fcadfbd3cf26db0d8062eeb", size = 10764801, upload-time = "2026-03-19T16:55:42.752Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/59/006a074e185bfccf5e4c026015245ab4fcd2362b13a8d24cf37a277909a9/ty-0.0.24-py3-none-win_arm64.whl", hash = "sha256:280a3d31e86d0721947238f17030c33f0911cae851d108ea9f4e3ab12a5ed01f", size = 10194093, upload-time = "2026-03-19T16:55:48.303Z" },
+version = "0.0.25"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/12/bf/3c3147c7237277b0e8a911ff89de7183408be96b31fb42b38edb666d287f/ty-0.0.25.tar.gz", hash = "sha256:8ae3891be17dfb6acab51a2df3a8f8f6c551eb60ea674c10946dc92aae8d4401", size = 5375500, upload-time = "2026-03-24T22:32:34.608Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/a4/6c289cbd1474285223124a4ffb55c078dbe9ae1d925d0b6a948643c7f115/ty-0.0.25-py3-none-linux_armv6l.whl", hash = "sha256:26d6d5aede5d54fb055779460f896d9c1473c6fb996716bd11cb90f027d8fee7", size = 10452747, upload-time = "2026-03-24T22:32:32.662Z" },
+    { url = "https://files.pythonhosted.org/packages/00/13/74cb9de356b9ceb3f281ab048f8c4ac2207122161b0ac0066886ce129abe/ty-0.0.25-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aedcfbc7b6b96dbc55b0da78fa02bd049373ff3d8a827f613dadd8bd17d10758", size = 10271349, upload-time = "2026-03-24T22:32:13.041Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/93/ffc5a20cc9e14fa9b32b0c54884864bede30d144ce2ae013805bce0c86d0/ty-0.0.25-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0a8fb3c1e28f73618941811e2568dca195178a1a6314651d4ee97086a4497253", size = 9730308, upload-time = "2026-03-24T22:32:19.24Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/78/52e05ef32a5f172fce70633a4e19d8e04364271a4322ae12382c7344b0de/ty-0.0.25-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814870b7f347b5d0276304cddb98a0958f08de183bf159abc920ebe321247ad4", size = 10247664, upload-time = "2026-03-24T22:32:08.669Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/64/0d0a47ed0aa1d634c666c2cc15d3b0af4b95d0fd3dbb796032bd493f3433/ty-0.0.25-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:781150e23825dc110cd5e1f50ca3d61664f7a5db5b4a55d5dbf7d3b1e246b917", size = 10261961, upload-time = "2026-03-24T22:32:43.935Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ba/4666b96f0499465efb97c244554107c541d74a1add393e62276b3de9b54f/ty-0.0.25-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc81ff2a0143911321251dc81d1c259fa5cdc56d043019a733c845d55409e2a", size = 10746076, upload-time = "2026-03-24T22:32:26.37Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/ed/aa958ccbcd85cc206600e48fbf0a1c27aef54b4b90112d9a73f69ed0c739/ty-0.0.25-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f03c5c5b5c10355ea030cbe3cd93b2e759b9492c66688288ea03a68086069f2e", size = 11287331, upload-time = "2026-03-24T22:32:21.607Z" },
+    { url = "https://files.pythonhosted.org/packages/26/e4/f4a004e1952e6042f5bfeeb7d09cffb379270ef009d9f8568471863e86e6/ty-0.0.25-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fc1ef49cd6262eb9223ccf6e258ac899aaa53e7dc2151ba65a2c9fa248dfa75", size = 11028804, upload-time = "2026-03-24T22:32:39.088Z" },
+    { url = "https://files.pythonhosted.org/packages/56/32/5c15bb8ea20ed54d43c734f253a2a5da95d41474caecf4ef3682df9f68f5/ty-0.0.25-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ad98da1393161096235a387cc36abecd31861060c68416761eccdb7c1bc326b", size = 10845246, upload-time = "2026-03-24T22:32:41.33Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/fe/4ddd83e810c8682fcfada0d1c9d38936a34a024d32d7736075c1e53a038e/ty-0.0.25-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2d4336aa5381eb4eab107c3dec75fe22943a648ef6646f5a8431ef1c8cdabb66", size = 10233515, upload-time = "2026-03-24T22:32:17.012Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/db/9fe54f6fb952e5b218f2e661e64ed656512edf2046cfbb9c159558e255db/ty-0.0.25-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e10ed39564227de2b7bd89398250b65daaedbef15a25cef8eee70078f5d9e0b2", size = 10275289, upload-time = "2026-03-24T22:32:28.21Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/e0/090d7b33791b42bc7ec29463ac6a634738e16b289e027608ebe542682773/ty-0.0.25-py3-none-musllinux_1_2_i686.whl", hash = "sha256:aca04e9ed9b61c706064a1c0b71a247c3f92f373d0222103f3bc54b649421796", size = 10461195, upload-time = "2026-03-24T22:32:24.252Z" },
+    { url = "https://files.pythonhosted.org/packages/42/31/5bf12bce01b80b72a7a4e627380779b41510e730f6000862a1d078e423f7/ty-0.0.25-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:18a5443e4ef339c1bd8c57fc13112c22080617ea582bfc22b497d82d65361325", size = 10931471, upload-time = "2026-03-24T22:32:14.985Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/5e/ab60c11f8a6dd2a0ae96daac83458ef2e9be1ae70481d1ad9c59d3eaf20f/ty-0.0.25-py3-none-win32.whl", hash = "sha256:a685b9a611b69195b5a557e05dbb7ebcd12815f6c32fb27fdf15edeb1fa33d8f", size = 9835974, upload-time = "2026-03-24T22:32:36.86Z" },
+    { url = "https://files.pythonhosted.org/packages/41/55/625acc2ef34646268bc2baa8fdd6e22fb47cd5965e2acd3be92c687fb6b0/ty-0.0.25-py3-none-win_amd64.whl", hash = "sha256:0d4d37a1f1ab7f2669c941c38c65144ff223eb51ececd7ccfc0d623afbc0f729", size = 10815449, upload-time = "2026-03-24T22:32:11.031Z" },
+    { url = "https://files.pythonhosted.org/packages/82/c7/0147bfb543df97740b45b222c54ff79ef20fa57f14b9d2c1dab3cd7d3faa/ty-0.0.25-py3-none-win_arm64.whl", hash = "sha256:d80b8cd965cbacbfd887ac2d985f5b6da09b7aa3569371e2894e0b30b26b89cd", size = 10225494, upload-time = "2026-03-24T22:32:30.611Z" },
 ]
 
 [[package]]

From cf0dffaa75553942c90f059922041cfccfe03345 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 2 Apr 2026 12:56:41 +0700
Subject: [PATCH 234/236] Update dependencies to latest versions

---
 app/server/middleware.py |   2 +-
 app/services/lmdb.py     |   7 +-
 pyproject.toml           |   6 +-
 scripts/dump_lmdb.py     |   5 +-
 uv.lock                  | 154 +++++++++++++++++++--------------------
 5 files changed, 88 insertions(+), 86 deletions(-)

diff --git a/app/server/middleware.py b/app/server/middleware.py
index 07840be..a0593b4 100644
--- a/app/server/middleware.py
+++ b/app/server/middleware.py
@@ -113,7 +113,7 @@ def add_cors_middleware(app: FastAPI):
     if g_config.cors.enabled:
         cors = g_config.cors
         app.add_middleware(
-            CORSMiddleware,  # type: ignore
+            CORSMiddleware,
             allow_origins=cors.allow_origins,
             allow_credentials=cors.allow_credentials,
             allow_methods=cors.allow_methods,
diff --git a/app/services/lmdb.py b/app/services/lmdb.py
index 2f45193..3284b8d 100644
--- a/app/services/lmdb.py
+++ b/app/services/lmdb.py
@@ -200,10 +200,11 @@ def _get_transaction(self, write: bool = False) -> Generator[Transaction]:
             raise
 
     @staticmethod
-    def _decode_index_value(data: bytes) -> list[str]:
+    def _decode_index_value(data: bytes | memoryview) -> list[str]:
         """Decode index value, handling both legacy single-string and new list-of-strings formats."""
         if not data:
             return []
+        data = bytes(data)
         if data.startswith(b"["):
             try:
                 val = orjson.loads(data)
@@ -452,7 +453,7 @@ def keys(self, prefix: str = "", limit: int | None = None) -> list[str]:
 
                 count = 0
                 for key, _ in cursor:
-                    key_str = key.decode("utf-8")
+                    key_str = bytes(key).decode("utf-8")
                     # Skip internal index mappings
                     if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
                         self.FUZZY_LOOKUP_PREFIX
@@ -484,7 +485,7 @@ def cleanup_expired(self, retention_days: int | None = None) -> int:
             with self._get_transaction(write=False) as txn:
                 cursor = txn.cursor()
                 for key_bytes, value_bytes in cursor:
-                    key_str = key_bytes.decode("utf-8")
+                    key_str = bytes(key_bytes).decode("utf-8")
                     if key_str.startswith(self.HASH_LOOKUP_PREFIX) or key_str.startswith(
                         self.FUZZY_LOOKUP_PREFIX
                     ):
diff --git a/pyproject.toml b/pyproject.toml
index 49a424f..6113b27 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,12 +6,12 @@ readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
     "curl-cffi>=0.14.0",
-    "fastapi>=0.135.2",
+    "fastapi>=0.135.3",
     "gemini-webapi>=1.21.0",
     "httptools>=0.7.1",
-    "lmdb>=2.1.1",
+    "lmdb>=2.2.0",
     "loguru>=0.7.3",
-    "orjson>=3.11.7",
+    "orjson>=3.11.8",
     "pydantic-settings[yaml]>=2.13.1",
     "uvicorn>=0.42.0",
     "uvloop>=0.22.1; sys_platform != 'win32'",
diff --git a/scripts/dump_lmdb.py b/scripts/dump_lmdb.py
index 15ce5df..3ef4805 100644
--- a/scripts/dump_lmdb.py
+++ b/scripts/dump_lmdb.py
@@ -8,8 +8,9 @@
 from lmdb import Transaction
 
 
-def _decode_value(value: bytes) -> Any:
+def _decode_value(value: bytes | memoryview) -> Any:
     """Decode a value from LMDB to Python data."""
+    value = bytes(value)
     try:
         return orjson.loads(value)
     except orjson.JSONDecodeError:
@@ -20,7 +21,7 @@ def _dump_all(txn: Transaction) -> list[dict[str, Any]]:
     """Return all records from the database."""
     result: list[dict[str, Any]] = []
     for key, value in txn.cursor():
-        result.append({"key": key.decode("utf-8"), "value": _decode_value(value)})
+        result.append({"key": bytes(key).decode("utf-8"), "value": _decode_value(value)})
     return result
 
 
diff --git a/uv.lock b/uv.lock
index d7ddf48..7dac18c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -110,7 +110,7 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.135.2"
+version = "0.135.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -119,9 +119,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/73/5903c4b13beae98618d64eb9870c3fac4f605523dd0312ca5c80dadbd5b9/fastapi-0.135.2.tar.gz", hash = "sha256:88a832095359755527b7f63bb4c6bc9edb8329a026189eed83d6c1afcf419d56", size = 395833, upload-time = "2026-03-23T14:12:41.697Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f7/e6/7adb4c5fa231e82c35b8f5741a9f2d055f520c29af5546fd70d3e8e1cd2e/fastapi-0.135.3.tar.gz", hash = "sha256:bd6d7caf1a2bdd8d676843cdcd2287729572a1ef524fc4d65c17ae002a1be654", size = 396524, upload-time = "2026-04-01T16:23:58.188Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8f/ea/18f6d0457f9efb2fc6fa594857f92810cadb03024975726db6546b3d6fcf/fastapi-0.135.2-py3-none-any.whl", hash = "sha256:0af0447d541867e8db2a6a25c23a8c4bd80e2394ac5529bd87501bbb9e240ca5", size = 117407, upload-time = "2026-03-23T14:12:43.284Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a4/5caa2de7f917a04ada20018eccf60d6cc6145b0199d55ca3711b0fc08312/fastapi-0.135.3-py3-none-any.whl", hash = "sha256:9b0f590c813acd13d0ab43dd8494138eb58e484bfac405db1f3187cfc5810d98", size = 117734, upload-time = "2026-04-01T16:23:59.328Z" },
 ]
 
 [[package]]
@@ -156,12 +156,12 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "curl-cffi", specifier = ">=0.14.0" },
-    { name = "fastapi", specifier = ">=0.135.2" },
+    { name = "fastapi", specifier = ">=0.135.3" },
     { name = "gemini-webapi", git = "https://github.com/HanaokaYuzu/Gemini-API.git" },
     { name = "httptools", specifier = ">=0.7.1" },
-    { name = "lmdb", specifier = ">=2.1.1" },
+    { name = "lmdb", specifier = ">=2.2.0" },
     { name = "loguru", specifier = ">=0.7.3" },
-    { name = "orjson", specifier = ">=3.11.7" },
+    { name = "orjson", specifier = ">=3.11.8" },
     { name = "pydantic-settings", extras = ["yaml"], specifier = ">=2.13.1" },
     { name = "pytest", marker = "extra == 'dev'" },
     { name = "ruff", marker = "extra == 'dev'" },
@@ -176,8 +176,8 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.21.0.post23"
-source = { git = "https://github.com/HanaokaYuzu/Gemini-API.git#7b669588f5f8110424d3678440eb558af983991c" }
+version = "1.21.0.post32"
+source = { git = "https://github.com/HanaokaYuzu/Gemini-API.git#6675735f294ecf91e1e101dbb37a7e2ec2eef597" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },
@@ -229,16 +229,16 @@ wheels = [
 
 [[package]]
 name = "lmdb"
-version = "2.1.1"
+version = "2.2.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5a/4e/d78a06af228216102fcb4b703f4b6a2f565978224d5623e20e02d90aeed3/lmdb-2.1.1.tar.gz", hash = "sha256:0317062326ae2f66e3bbde6400907651ea58c27a250097511a28d13c467fa5f1", size = 913160, upload-time = "2026-03-19T13:56:26.139Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/44/d94934efaf8f887b6959f131fde740fcaa831edfd13eb5425574637cddd5/lmdb-2.2.0.tar.gz", hash = "sha256:53020e20305c043ea6e68089bc242d744fba6073cdb268332299ba6dda2886d4", size = 933189, upload-time = "2026-03-30T01:26:19.049Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/af/64/0ad40c3d06f89d7e5b6894537a0bd2829b592ef191f61b704157ad641f33/lmdb-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f43f7a7907fa726597d235dd83febe7ec47b45b78952b848f0bdf50b58fe1bf1", size = 109123, upload-time = "2026-03-19T13:55:58.522Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/62/86257b169acc61282110166581167f099c08bbb76aea7bc4da0c0b64c8b2/lmdb-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:24382db5cd139866d222471f4fb14fb511b16bf1ed0686bdf7fc808bf07ed8a4", size = 107803, upload-time = "2026-03-19T13:55:59.907Z" },
-    { url = "https://files.pythonhosted.org/packages/26/2d/8889fa81eb232dd5fec10f3178e22f3ae4f385c46be6124e29709f3bfdbb/lmdb-2.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76e8c14a76bb7695c1778181dce9c352fb565b5e113c74981ec692d5d6820efb", size = 324703, upload-time = "2026-03-19T13:56:01.309Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/d9/ec2e2370d35214e12abd1c9dada369c460e694f0c6fe385a200a2a25eaf3/lmdb-2.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7700999c4fa7762577d4b3deedd48f6c25ce396dfb17f61dd48f50dcf99f78d6", size = 328101, upload-time = "2026-03-19T13:56:02.806Z" },
-    { url = "https://files.pythonhosted.org/packages/24/c7/e65ca28f46479e92dfc7250dab5259ae6eaa0e5075db47f52a4a1462adb1/lmdb-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:74e4442102423e185347108cc67933411ec13e41866f57f6e9868c6ef5642b88", size = 104800, upload-time = "2026-03-19T13:56:04.173Z" },
-    { url = "https://files.pythonhosted.org/packages/33/51/e8f12e4b7a0ef82b42d8a37201db99f8dd7d26113a6b0cbf5c441692e2ad/lmdb-2.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:26b0412a38fffd8587becde3c2b0ee606b8906ae0ee5060b0ed6d44610703dec", size = 99048, upload-time = "2026-03-19T13:56:05.499Z" },
+    { url = "https://files.pythonhosted.org/packages/64/43/543af71e8fa4c56623bb89c358121ab806426f26685f11539fe5452deffa/lmdb-2.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36e0cbe6b7d59f6e19b448942c5f9e91674f596a802743258f82e926a9a09632", size = 113550, upload-time = "2026-03-30T01:25:55.727Z" },
+    { url = "https://files.pythonhosted.org/packages/22/2c/4702d36c0073737554b20d1d62e879a066df963482f8e514866588ddd82d/lmdb-2.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e5d7a9dfd279a5884806fd478244961e4483cc6d7eb769caed1d7019a8608c20", size = 112135, upload-time = "2026-03-30T01:25:56.809Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/43/d015fea326ed0a634107f29740b002170a462b6d2481e509105c685520f5/lmdb-2.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d0dbe7902b2cdb60bf6c893f307ef2b2a5039afd22f029515b86183f05ab1353", size = 332108, upload-time = "2026-03-30T01:25:57.907Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/c9/503e7f173994b514936badcbcb7fa9f89a07a3cfe596c6fb95b1b91b8d70/lmdb-2.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c576cdb163ae61a7ef6eecbc20a6025a4abe085491c1dc0c667d726f4926b53", size = 336017, upload-time = "2026-03-30T01:25:59.234Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/94/b3b064acfd2f8acf5aaa53fff2c43963dbc1932ba8b8df4e27d75bf6a34a/lmdb-2.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:746eebcd4c0aeaf0eb2f897028929d270c5bc80ef4918500eec16db6f26f3fcc", size = 109574, upload-time = "2026-03-30T01:26:00.324Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/10/dc7488d1effc339cd9470f9d22ec0fd7052a3d4fdfae87765ecd41cb2e59/lmdb-2.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:006153aac9fb0415a5f3e8ac88789e5730dba3dd0743cd84c95e3951ff68bc3a", size = 103810, upload-time = "2026-03-30T01:26:01.559Z" },
 ]
 
 [[package]]
@@ -256,25 +256,25 @@ wheels = [
 
 [[package]]
 name = "orjson"
-version = "3.11.7"
+version = "3.11.8"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9d/1b/2024d06792d0779f9dbc51531b61c24f76c75b9f4ce05e6f3377a1814cea/orjson-3.11.8.tar.gz", hash = "sha256:96163d9cdc5a202703e9ad1b9ae757d5f0ca62f4fa0cc93d1f27b0e180cc404e", size = 5603832, upload-time = "2026-03-31T16:16:27.878Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/89/25/6e0e52cac5aab51d7b6dcd257e855e1dec1c2060f6b28566c509b4665f62/orjson-3.11.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1d98b30cc1313d52d4af17d9c3d307b08389752ec5f2e5febdfada70b0f8c733", size = 228390, upload-time = "2026-02-02T15:38:06.8Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/29/a77f48d2fc8a05bbc529e5ff481fb43d914f9e383ea2469d4f3d51df3d00/orjson-3.11.7-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:d897e81f8d0cbd2abb82226d1860ad2e1ab3ff16d7b08c96ca00df9d45409ef4", size = 125189, upload-time = "2026-02-02T15:38:08.181Z" },
-    { url = "https://files.pythonhosted.org/packages/89/25/0a16e0729a0e6a1504f9d1a13cdd365f030068aab64cec6958396b9969d7/orjson-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814be4b49b228cfc0b3c565acf642dd7d13538f966e3ccde61f4f55be3e20785", size = 128106, upload-time = "2026-02-02T15:38:09.41Z" },
-    { url = "https://files.pythonhosted.org/packages/66/da/a2e505469d60666a05ab373f1a6322eb671cb2ba3a0ccfc7d4bc97196787/orjson-3.11.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d06e5c5fed5caedd2e540d62e5b1c25e8c82431b9e577c33537e5fa4aa909539", size = 123363, upload-time = "2026-02-02T15:38:10.73Z" },
-    { url = "https://files.pythonhosted.org/packages/23/bf/ed73f88396ea35c71b38961734ea4a4746f7ca0768bf28fd551d37e48dd0/orjson-3.11.7-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31c80ce534ac4ea3739c5ee751270646cbc46e45aea7576a38ffec040b4029a1", size = 129007, upload-time = "2026-02-02T15:38:12.138Z" },
-    { url = "https://files.pythonhosted.org/packages/73/3c/b05d80716f0225fc9008fbf8ab22841dcc268a626aa550561743714ce3bf/orjson-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f50979824bde13d32b4320eedd513431c921102796d86be3eee0b58e58a3ecd1", size = 141667, upload-time = "2026-02-02T15:38:13.398Z" },
-    { url = "https://files.pythonhosted.org/packages/61/e8/0be9b0addd9bf86abfc938e97441dcd0375d494594b1c8ad10fe57479617/orjson-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e54f3808e2b6b945078c41aa8d9b5834b28c50843846e97807e5adb75fa9705", size = 130832, upload-time = "2026-02-02T15:38:14.698Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/ec/c68e3b9021a31d9ec15a94931db1410136af862955854ed5dd7e7e4f5bff/orjson-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12b80df61aab7b98b490fe9e4879925ba666fccdfcd175252ce4d9035865ace", size = 133373, upload-time = "2026-02-02T15:38:16.109Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/45/f3466739aaafa570cc8e77c6dbb853c48bf56e3b43738020e2661e08b0ac/orjson-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:996b65230271f1a97026fd0e6a753f51fbc0c335d2ad0c6201f711b0da32693b", size = 138307, upload-time = "2026-02-02T15:38:17.453Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/84/9f7f02288da1ffb31405c1be07657afd1eecbcb4b64ee2817b6fe0f785fa/orjson-3.11.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ab49d4b2a6a1d415ddb9f37a21e02e0d5dbfe10b7870b21bf779fc21e9156157", size = 408695, upload-time = "2026-02-02T15:38:18.831Z" },
-    { url = "https://files.pythonhosted.org/packages/18/07/9dd2f0c0104f1a0295ffbe912bc8d63307a539b900dd9e2c48ef7810d971/orjson-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:390a1dce0c055ddf8adb6aa94a73b45a4a7d7177b5c584b8d1c1947f2ba60fb3", size = 144099, upload-time = "2026-02-02T15:38:20.28Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/66/857a8e4a3292e1f7b1b202883bcdeb43a91566cf59a93f97c53b44bd6801/orjson-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1eb80451a9c351a71dfaf5b7ccc13ad065405217726b59fdbeadbcc544f9d223", size = 134806, upload-time = "2026-02-02T15:38:22.186Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/5b/6ebcf3defc1aab3a338ca777214966851e92efb1f30dc7fc8285216e6d1b/orjson-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7477aa6a6ec6139c5cb1cc7b214643592169a5494d200397c7fc95d740d5fcf3", size = 127914, upload-time = "2026-02-02T15:38:23.511Z" },
-    { url = "https://files.pythonhosted.org/packages/00/04/c6f72daca5092e3117840a1b1e88dfc809cc1470cf0734890d0366b684a1/orjson-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:b9f95dcdea9d4f805daa9ddf02617a89e484c6985fa03055459f90e87d7a0757", size = 124986, upload-time = "2026-02-02T15:38:24.836Z" },
-    { url = "https://files.pythonhosted.org/packages/03/ba/077a0f6f1085d6b806937246860fafbd5b17f3919c70ee3f3d8d9c713f38/orjson-3.11.7-cp313-cp313-win_arm64.whl", hash = "sha256:800988273a014a0541483dc81021247d7eacb0c845a9d1a34a422bc718f41539", size = 126045, upload-time = "2026-02-02T15:38:26.216Z" },
+    { url = "https://files.pythonhosted.org/packages/66/7f/95fba509bb2305fab0073558f1e8c3a2ec4b2afe58ed9fcb7d3b8beafe94/orjson-3.11.8-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:3f23426851d98478c8970da5991f84784a76682213cd50eb73a1da56b95239dc", size = 229180, upload-time = "2026-03-31T16:15:36.426Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/9d/b237215c743ca073697d759b5503abd2cb8a0d7b9c9e21f524bcf176ab66/orjson-3.11.8-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:ebaed4cef74a045b83e23537b52ef19a367c7e3f536751e355a2a394f8648559", size = 128754, upload-time = "2026-03-31T16:15:38.049Z" },
+    { url = "https://files.pythonhosted.org/packages/42/3d/27d65b6d11e63f133781425f132807aef793ed25075fec686fc8e46dd528/orjson-3.11.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97c8f5d3b62380b70c36ffacb2a356b7c6becec86099b177f73851ba095ef623", size = 131877, upload-time = "2026-03-31T16:15:39.484Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/cc/faee30cd8f00421999e40ef0eba7332e3a625ce91a58200a2f52c7fef235/orjson-3.11.8-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:436c4922968a619fb7fef1ccd4b8b3a76c13b67d607073914d675026e911a65c", size = 130361, upload-time = "2026-03-31T16:15:41.274Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/bb/a6c55896197f97b6d4b4e7c7fd77e7235517c34f5d6ad5aadd43c54c6d7c/orjson-3.11.8-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ab359aff0436d80bfe8a23b46b5fea69f1e18aaf1760a709b4787f1318b317f", size = 135521, upload-time = "2026-03-31T16:15:42.758Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/7c/ca3a3525aa32ff636ebb1778e77e3587b016ab2edb1b618b36ba96f8f2c0/orjson-3.11.8-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f89b6d0b3a8d81e1929d3ab3d92bbc225688bd80a770c49432543928fe09ac55", size = 146862, upload-time = "2026-03-31T16:15:44.341Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0c/18a9d7f18b5edd37344d1fd5be17e94dc652c67826ab749c6e5948a78112/orjson-3.11.8-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29c009e7a2ca9ad0ed1376ce20dd692146a5d9fe4310848904b6b4fee5c5c137", size = 132847, upload-time = "2026-03-31T16:15:46.368Z" },
+    { url = "https://files.pythonhosted.org/packages/23/91/7e722f352ad67ca573cee44de2a58fb810d0f4eb4e33276c6a557979fd8a/orjson-3.11.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:705b895b781b3e395c067129d8551655642dfe9437273211d5404e87ac752b53", size = 133637, upload-time = "2026-03-31T16:15:48.123Z" },
+    { url = "https://files.pythonhosted.org/packages/af/04/32845ce13ac5bd1046ddb02ac9432ba856cc35f6d74dde95864fe0ad5523/orjson-3.11.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:88006eda83858a9fdf73985ce3804e885c2befb2f506c9a3723cdeb5a2880e3e", size = 141906, upload-time = "2026-03-31T16:15:49.626Z" },
+    { url = "https://files.pythonhosted.org/packages/02/5e/c551387ddf2d7106d9039369862245c85738b828844d13b99ccb8d61fd06/orjson-3.11.8-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:55120759e61309af7fcf9e961c6f6af3dde5921cdb3ee863ef63fd9db126cae6", size = 423722, upload-time = "2026-03-31T16:15:51.176Z" },
+    { url = "https://files.pythonhosted.org/packages/00/a3/ecfe62434096f8a794d4976728cb59bcfc4a643977f21c2040545d37eb4c/orjson-3.11.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:98bdc6cb889d19bed01de46e67574a2eab61f5cc6b768ed50e8ac68e9d6ffab6", size = 147801, upload-time = "2026-03-31T16:15:52.939Z" },
+    { url = "https://files.pythonhosted.org/packages/18/6d/0dce10b9f6643fdc59d99333871a38fa5a769d8e2fc34a18e5d2bfdee900/orjson-3.11.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:708c95f925a43ab9f34625e45dcdadf09ec8a6e7b664a938f2f8d5650f6c090b", size = 136460, upload-time = "2026-03-31T16:15:54.431Z" },
+    { url = "https://files.pythonhosted.org/packages/01/d6/6dde4f31842d87099238f1f07b459d24edc1a774d20687187443ab044191/orjson-3.11.8-cp313-cp313-win32.whl", hash = "sha256:01c4e5a6695dc09098f2e6468a251bc4671c50922d4d745aff1a0a33a0cf5b8d", size = 131956, upload-time = "2026-03-31T16:15:56.081Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/f9/4e494a56e013db957fb77186b818b916d4695b8fa2aa612364974160e91b/orjson-3.11.8-cp313-cp313-win_amd64.whl", hash = "sha256:c154a35dd1330707450bb4d4e7dd1f17fa6f42267a40c1e8a1daa5e13719b4b8", size = 127410, upload-time = "2026-03-31T16:15:57.54Z" },
+    { url = "https://files.pythonhosted.org/packages/57/7f/803203d00d6edb6e9e7eef421d4e1adbb5ea973e40b3533f3cfd9aeb374e/orjson-3.11.8-cp313-cp313-win_arm64.whl", hash = "sha256:4861bde57f4d253ab041e374f44023460e60e71efaa121f3c5f0ed457c3a701e", size = 127338, upload-time = "2026-03-31T16:15:59.106Z" },
 ]
 
 [[package]]
@@ -365,11 +365,11 @@ yaml = [
 
 [[package]]
 name = "pygments"
-version = "2.19.2"
+version = "2.20.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
 ]
 
 [[package]]
@@ -417,27 +417,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.7"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a1/22/9e4f66ee588588dc6c9af6a994e12d26e19efbe874d1a909d09a6dac7a59/ruff-0.15.7.tar.gz", hash = "sha256:04f1ae61fc20fe0b148617c324d9d009b5f63412c0b16474f3d5f1a1a665f7ac", size = 4601277, upload-time = "2026-03-19T16:26:22.605Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/41/2f/0b08ced94412af091807b6119ca03755d651d3d93a242682bf020189db94/ruff-0.15.7-py3-none-linux_armv6l.whl", hash = "sha256:a81cc5b6910fb7dfc7c32d20652e50fa05963f6e13ead3c5915c41ac5d16668e", size = 10489037, upload-time = "2026-03-19T16:26:32.47Z" },
-    { url = "https://files.pythonhosted.org/packages/91/4a/82e0fa632e5c8b1eba5ee86ecd929e8ff327bbdbfb3c6ac5d81631bef605/ruff-0.15.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:722d165bd52403f3bdabc0ce9e41fc47070ac56d7a91b4e0d097b516a53a3477", size = 10955433, upload-time = "2026-03-19T16:27:00.205Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/10/12586735d0ff42526ad78c049bf51d7428618c8b5c467e72508c694119df/ruff-0.15.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7fbc2448094262552146cbe1b9643a92f66559d3761f1ad0656d4991491af49e", size = 10269302, upload-time = "2026-03-19T16:26:26.183Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/5d/32b5c44ccf149a26623671df49cbfbd0a0ae511ff3df9d9d2426966a8d57/ruff-0.15.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b39329b60eba44156d138275323cc726bbfbddcec3063da57caa8a8b1d50adf", size = 10607625, upload-time = "2026-03-19T16:27:03.263Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/f1/f0001cabe86173aaacb6eb9bb734aa0605f9a6aa6fa7d43cb49cbc4af9c9/ruff-0.15.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87768c151808505f2bfc93ae44e5f9e7c8518943e5074f76ac21558ef5627c85", size = 10324743, upload-time = "2026-03-19T16:27:09.791Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/87/b8a8f3d56b8d848008559e7c9d8bf367934d5367f6d932ba779456e2f73b/ruff-0.15.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb0511670002c6c529ec66c0e30641c976c8963de26a113f3a30456b702468b0", size = 11138536, upload-time = "2026-03-19T16:27:06.101Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/f2/4fd0d05aab0c5934b2e1464784f85ba2eab9d54bffc53fb5430d1ed8b829/ruff-0.15.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0d19644f801849229db8345180a71bee5407b429dd217f853ec515e968a6912", size = 11994292, upload-time = "2026-03-19T16:26:48.718Z" },
-    { url = "https://files.pythonhosted.org/packages/64/22/fc4483871e767e5e95d1622ad83dad5ebb830f762ed0420fde7dfa9d9b08/ruff-0.15.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4806d8e09ef5e84eb19ba833d0442f7e300b23fe3f0981cae159a248a10f0036", size = 11398981, upload-time = "2026-03-19T16:26:54.513Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/99/66f0343176d5eab02c3f7fcd2de7a8e0dd7a41f0d982bee56cd1c24db62b/ruff-0.15.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dce0896488562f09a27b9c91b1f58a097457143931f3c4d519690dea54e624c5", size = 11242422, upload-time = "2026-03-19T16:26:29.277Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/3a/a7060f145bfdcce4c987ea27788b30c60e2c81d6e9a65157ca8afe646328/ruff-0.15.7-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:1852ce241d2bc89e5dc823e03cff4ce73d816b5c6cdadd27dbfe7b03217d2a12", size = 11232158, upload-time = "2026-03-19T16:26:42.321Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/53/90fbb9e08b29c048c403558d3cdd0adf2668b02ce9d50602452e187cd4af/ruff-0.15.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5f3e4b221fb4bd293f79912fc5e93a9063ebd6d0dcbd528f91b89172a9b8436c", size = 10577861, upload-time = "2026-03-19T16:26:57.459Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/aa/5f486226538fe4d0f0439e2da1716e1acf895e2a232b26f2459c55f8ddad/ruff-0.15.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:b15e48602c9c1d9bdc504b472e90b90c97dc7d46c7028011ae67f3861ceba7b4", size = 10327310, upload-time = "2026-03-19T16:26:35.909Z" },
-    { url = "https://files.pythonhosted.org/packages/99/9e/271afdffb81fe7bfc8c43ba079e9d96238f674380099457a74ccb3863857/ruff-0.15.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b4705e0e85cedc74b0a23cf6a179dbb3df184cb227761979cc76c0440b5ab0d", size = 10840752, upload-time = "2026-03-19T16:26:45.723Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/29/a4ae78394f76c7759953c47884eb44de271b03a66634148d9f7d11e721bd/ruff-0.15.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:112c1fa316a558bb34319282c1200a8bf0495f1b735aeb78bfcb2991e6087580", size = 11336961, upload-time = "2026-03-19T16:26:39.076Z" },
-    { url = "https://files.pythonhosted.org/packages/26/6b/8786ba5736562220d588a2f6653e6c17e90c59ced34a2d7b512ef8956103/ruff-0.15.7-py3-none-win32.whl", hash = "sha256:6d39e2d3505b082323352f733599f28169d12e891f7dd407f2d4f54b4c2886de", size = 10582538, upload-time = "2026-03-19T16:26:15.992Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/e9/346d4d3fffc6871125e877dae8d9a1966b254fbd92a50f8561078b88b099/ruff-0.15.7-py3-none-win_amd64.whl", hash = "sha256:4d53d712ddebcd7dace1bc395367aec12c057aacfe9adbb6d832302575f4d3a1", size = 11755839, upload-time = "2026-03-19T16:26:19.897Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/e8/726643a3ea68c727da31570bde48c7a10f1aa60eddd628d94078fec586ff/ruff-0.15.7-py3-none-win_arm64.whl", hash = "sha256:18e8d73f1c3fdf27931497972250340f92e8c861722161a9caeb89a58ead6ed2", size = 11023304, upload-time = "2026-03-19T16:26:51.669Z" },
+version = "0.15.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/14/b0/73cf7550861e2b4824950b8b52eebdcc5adc792a00c514406556c5b80817/ruff-0.15.8.tar.gz", hash = "sha256:995f11f63597ee362130d1d5a327a87cb6f3f5eae3094c620bcc632329a4d26e", size = 4610921, upload-time = "2026-03-26T18:39:38.675Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/92/c445b0cd6da6e7ae51e954939cb69f97e008dbe750cfca89b8cedc081be7/ruff-0.15.8-py3-none-linux_armv6l.whl", hash = "sha256:cbe05adeba76d58162762d6b239c9056f1a15a55bd4b346cfd21e26cd6ad7bc7", size = 10527394, upload-time = "2026-03-26T18:39:41.566Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/92/f1c662784d149ad1414cae450b082cf736430c12ca78367f20f5ed569d65/ruff-0.15.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d3e3d0b6ba8dca1b7ef9ab80a28e840a20070c4b62e56d675c24f366ef330570", size = 10905693, upload-time = "2026-03-26T18:39:30.364Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/f2/7a631a8af6d88bcef997eb1bf87cc3da158294c57044aafd3e17030613de/ruff-0.15.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6ee3ae5c65a42f273f126686353f2e08ff29927b7b7e203b711514370d500de3", size = 10323044, upload-time = "2026-03-26T18:39:33.37Z" },
+    { url = "https://files.pythonhosted.org/packages/67/18/1bf38e20914a05e72ef3b9569b1d5c70a7ef26cd188d69e9ca8ef588d5bf/ruff-0.15.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdce027ada77baa448077ccc6ebb2fa9c3c62fd110d8659d601cf2f475858d94", size = 10629135, upload-time = "2026-03-26T18:39:44.142Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/e9/138c150ff9af60556121623d41aba18b7b57d95ac032e177b6a53789d279/ruff-0.15.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12e617fc01a95e5821648a6df341d80456bd627bfab8a829f7cfc26a14a4b4a3", size = 10348041, upload-time = "2026-03-26T18:39:52.178Z" },
+    { url = "https://files.pythonhosted.org/packages/02/f1/5bfb9298d9c323f842c5ddeb85f1f10ef51516ac7a34ba446c9347d898df/ruff-0.15.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:432701303b26416d22ba696c39f2c6f12499b89093b61360abc34bcc9bf07762", size = 11121987, upload-time = "2026-03-26T18:39:55.195Z" },
+    { url = "https://files.pythonhosted.org/packages/10/11/6da2e538704e753c04e8d86b1fc55712fdbdcc266af1a1ece7a51fff0d10/ruff-0.15.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d910ae974b7a06a33a057cb87d2a10792a3b2b3b35e33d2699fdf63ec8f6b17a", size = 11951057, upload-time = "2026-03-26T18:39:19.18Z" },
+    { url = "https://files.pythonhosted.org/packages/83/f0/c9208c5fd5101bf87002fed774ff25a96eea313d305f1e5d5744698dc314/ruff-0.15.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2033f963c43949d51e6fdccd3946633c6b37c484f5f98c3035f49c27395a8ab8", size = 11464613, upload-time = "2026-03-26T18:40:06.301Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/22/d7f2fabdba4fae9f3b570e5605d5eb4500dcb7b770d3217dca4428484b17/ruff-0.15.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f29b989a55572fb885b77464cf24af05500806ab4edf9a0fd8977f9759d85b1", size = 11257557, upload-time = "2026-03-26T18:39:57.972Z" },
+    { url = "https://files.pythonhosted.org/packages/71/8c/382a9620038cf6906446b23ce8632ab8c0811b8f9d3e764f58bedd0c9a6f/ruff-0.15.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:ac51d486bf457cdc985a412fb1801b2dfd1bd8838372fc55de64b1510eff4bec", size = 11169440, upload-time = "2026-03-26T18:39:22.205Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/0d/0994c802a7eaaf99380085e4e40c845f8e32a562e20a38ec06174b52ef24/ruff-0.15.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c9861eb959edab053c10ad62c278835ee69ca527b6dcd72b47d5c1e5648964f6", size = 10605963, upload-time = "2026-03-26T18:39:46.682Z" },
+    { url = "https://files.pythonhosted.org/packages/19/aa/d624b86f5b0aad7cef6bbf9cd47a6a02dfdc4f72c92a337d724e39c9d14b/ruff-0.15.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8d9a5b8ea13f26ae90838afc33f91b547e61b794865374f114f349e9036835fb", size = 10357484, upload-time = "2026-03-26T18:39:49.176Z" },
+    { url = "https://files.pythonhosted.org/packages/35/c3/e0b7835d23001f7d999f3895c6b569927c4d39912286897f625736e1fd04/ruff-0.15.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c2a33a529fb3cbc23a7124b5c6ff121e4d6228029cba374777bd7649cc8598b8", size = 10830426, upload-time = "2026-03-26T18:40:03.702Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/51/ab20b322f637b369383adc341d761eaaa0f0203d6b9a7421cd6e783d81b9/ruff-0.15.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:75e5cd06b1cf3f47a3996cfc999226b19aa92e7cce682dcd62f80d7035f98f49", size = 11345125, upload-time = "2026-03-26T18:39:27.799Z" },
+    { url = "https://files.pythonhosted.org/packages/37/e6/90b2b33419f59d0f2c4c8a48a4b74b460709a557e8e0064cf33ad894f983/ruff-0.15.8-py3-none-win32.whl", hash = "sha256:bc1f0a51254ba21767bfa9a8b5013ca8149dcf38092e6a9eb704d876de94dc34", size = 10571959, upload-time = "2026-03-26T18:39:36.117Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/a2/ef467cb77099062317154c63f234b8a7baf7cb690b99af760c5b68b9ee7f/ruff-0.15.8-py3-none-win_amd64.whl", hash = "sha256:04f79eff02a72db209d47d665ba7ebcad609d8918a134f86cb13dd132159fc89", size = 11743893, upload-time = "2026-03-26T18:39:25.01Z" },
+    { url = "https://files.pythonhosted.org/packages/15/e2/77be4fff062fa78d9b2a4dea85d14785dac5f1d0c1fb58ed52331f0ebe28/ruff-0.15.8-py3-none-win_arm64.whl", hash = "sha256:cf891fa8e3bb430c0e7fac93851a5978fc99c8fa2c053b57b118972866f8e5f2", size = 11048175, upload-time = "2026-03-26T18:40:01.06Z" },
 ]
 
 [[package]]
@@ -454,26 +454,26 @@ wheels = [
 
 [[package]]
 name = "ty"
-version = "0.0.25"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/12/bf/3c3147c7237277b0e8a911ff89de7183408be96b31fb42b38edb666d287f/ty-0.0.25.tar.gz", hash = "sha256:8ae3891be17dfb6acab51a2df3a8f8f6c551eb60ea674c10946dc92aae8d4401", size = 5375500, upload-time = "2026-03-24T22:32:34.608Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/97/a4/6c289cbd1474285223124a4ffb55c078dbe9ae1d925d0b6a948643c7f115/ty-0.0.25-py3-none-linux_armv6l.whl", hash = "sha256:26d6d5aede5d54fb055779460f896d9c1473c6fb996716bd11cb90f027d8fee7", size = 10452747, upload-time = "2026-03-24T22:32:32.662Z" },
-    { url = "https://files.pythonhosted.org/packages/00/13/74cb9de356b9ceb3f281ab048f8c4ac2207122161b0ac0066886ce129abe/ty-0.0.25-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aedcfbc7b6b96dbc55b0da78fa02bd049373ff3d8a827f613dadd8bd17d10758", size = 10271349, upload-time = "2026-03-24T22:32:13.041Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/93/ffc5a20cc9e14fa9b32b0c54884864bede30d144ce2ae013805bce0c86d0/ty-0.0.25-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0a8fb3c1e28f73618941811e2568dca195178a1a6314651d4ee97086a4497253", size = 9730308, upload-time = "2026-03-24T22:32:19.24Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/78/52e05ef32a5f172fce70633a4e19d8e04364271a4322ae12382c7344b0de/ty-0.0.25-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814870b7f347b5d0276304cddb98a0958f08de183bf159abc920ebe321247ad4", size = 10247664, upload-time = "2026-03-24T22:32:08.669Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/64/0d0a47ed0aa1d634c666c2cc15d3b0af4b95d0fd3dbb796032bd493f3433/ty-0.0.25-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:781150e23825dc110cd5e1f50ca3d61664f7a5db5b4a55d5dbf7d3b1e246b917", size = 10261961, upload-time = "2026-03-24T22:32:43.935Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/ba/4666b96f0499465efb97c244554107c541d74a1add393e62276b3de9b54f/ty-0.0.25-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc81ff2a0143911321251dc81d1c259fa5cdc56d043019a733c845d55409e2a", size = 10746076, upload-time = "2026-03-24T22:32:26.37Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/ed/aa958ccbcd85cc206600e48fbf0a1c27aef54b4b90112d9a73f69ed0c739/ty-0.0.25-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f03c5c5b5c10355ea030cbe3cd93b2e759b9492c66688288ea03a68086069f2e", size = 11287331, upload-time = "2026-03-24T22:32:21.607Z" },
-    { url = "https://files.pythonhosted.org/packages/26/e4/f4a004e1952e6042f5bfeeb7d09cffb379270ef009d9f8568471863e86e6/ty-0.0.25-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fc1ef49cd6262eb9223ccf6e258ac899aaa53e7dc2151ba65a2c9fa248dfa75", size = 11028804, upload-time = "2026-03-24T22:32:39.088Z" },
-    { url = "https://files.pythonhosted.org/packages/56/32/5c15bb8ea20ed54d43c734f253a2a5da95d41474caecf4ef3682df9f68f5/ty-0.0.25-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ad98da1393161096235a387cc36abecd31861060c68416761eccdb7c1bc326b", size = 10845246, upload-time = "2026-03-24T22:32:41.33Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/fe/4ddd83e810c8682fcfada0d1c9d38936a34a024d32d7736075c1e53a038e/ty-0.0.25-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2d4336aa5381eb4eab107c3dec75fe22943a648ef6646f5a8431ef1c8cdabb66", size = 10233515, upload-time = "2026-03-24T22:32:17.012Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/db/9fe54f6fb952e5b218f2e661e64ed656512edf2046cfbb9c159558e255db/ty-0.0.25-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e10ed39564227de2b7bd89398250b65daaedbef15a25cef8eee70078f5d9e0b2", size = 10275289, upload-time = "2026-03-24T22:32:28.21Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/e0/090d7b33791b42bc7ec29463ac6a634738e16b289e027608ebe542682773/ty-0.0.25-py3-none-musllinux_1_2_i686.whl", hash = "sha256:aca04e9ed9b61c706064a1c0b71a247c3f92f373d0222103f3bc54b649421796", size = 10461195, upload-time = "2026-03-24T22:32:24.252Z" },
-    { url = "https://files.pythonhosted.org/packages/42/31/5bf12bce01b80b72a7a4e627380779b41510e730f6000862a1d078e423f7/ty-0.0.25-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:18a5443e4ef339c1bd8c57fc13112c22080617ea582bfc22b497d82d65361325", size = 10931471, upload-time = "2026-03-24T22:32:14.985Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/5e/ab60c11f8a6dd2a0ae96daac83458ef2e9be1ae70481d1ad9c59d3eaf20f/ty-0.0.25-py3-none-win32.whl", hash = "sha256:a685b9a611b69195b5a557e05dbb7ebcd12815f6c32fb27fdf15edeb1fa33d8f", size = 9835974, upload-time = "2026-03-24T22:32:36.86Z" },
-    { url = "https://files.pythonhosted.org/packages/41/55/625acc2ef34646268bc2baa8fdd6e22fb47cd5965e2acd3be92c687fb6b0/ty-0.0.25-py3-none-win_amd64.whl", hash = "sha256:0d4d37a1f1ab7f2669c941c38c65144ff223eb51ececd7ccfc0d623afbc0f729", size = 10815449, upload-time = "2026-03-24T22:32:11.031Z" },
-    { url = "https://files.pythonhosted.org/packages/82/c7/0147bfb543df97740b45b222c54ff79ef20fa57f14b9d2c1dab3cd7d3faa/ty-0.0.25-py3-none-win_arm64.whl", hash = "sha256:d80b8cd965cbacbfd887ac2d985f5b6da09b7aa3569371e2894e0b30b26b89cd", size = 10225494, upload-time = "2026-03-24T22:32:30.611Z" },
+version = "0.0.27"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f4/de/e5cf1f151cf52fe1189e42d03d90909d7d1354fdc0c1847cbb63a0baa3da/ty-0.0.27.tar.gz", hash = "sha256:d7a8de3421d92420b40c94fe7e7d4816037560621903964dd035cf9bd0204a73", size = 5424130, upload-time = "2026-03-31T19:07:20.806Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/20/2a9ea661758bd67f2bfd54ce9daacb5a26c56c5f8b49fbd9a43b365a8a7d/ty-0.0.27-py3-none-linux_armv6l.whl", hash = "sha256:eb14456b8611c9e8287aa9b633f4d2a0d9f3082a31796969e0b50bdda8930281", size = 10571211, upload-time = "2026-03-31T19:07:23.28Z" },
+    { url = "https://files.pythonhosted.org/packages/da/b2/8887a51f705d075ddbe78ae7f0d4755ef48d0a90235f67aee289e9cee950/ty-0.0.27-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:02e662184703db7586118df611cf24a000d35dae38d950053d1dd7b6736fd2c4", size = 10427576, upload-time = "2026-03-31T19:07:15.499Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/c3/79d88163f508fb709ce19bc0b0a66c7c64b53d372d4caa56172c3d9b3ae8/ty-0.0.27-py3-none-macosx_11_0_arm64.whl", hash = "sha256:be5fc2899441f7f8f7ef40f9ffd006075a5ff6b06c44e8d2aa30e1b900c12f51", size = 9870359, upload-time = "2026-03-31T19:07:36.852Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/4d/ed1b0db0e1e46b5ed4976bbfe0d1825faf003b4e3774ef28c785ed73e4bb/ty-0.0.27-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30231e652b14742a76b64755e54bf0cb1cd4c128bcaf625222e0ca92a2094887", size = 10380488, upload-time = "2026-03-31T19:07:31.268Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/f2/20372f6d510b01570028433064880adec2f8abe68bf0c4603be61a560bef/ty-0.0.27-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a119b1168f64261b3205a37e40b5b6c4aac8fd58e4587988f4e4b22c3c79847", size = 10390248, upload-time = "2026-03-31T19:07:28.345Z" },
+    { url = "https://files.pythonhosted.org/packages/45/4b/46b31a7311306be1a560f7f20fdc37b5bf718787f60626cd265d9b637554/ty-0.0.27-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e38f4e187b6975d2cbebf0f1eb1221f8f64f6e509bad14d7bb2a91afc97e4956", size = 10878479, upload-time = "2026-03-31T19:07:39.393Z" },
+    { url = "https://files.pythonhosted.org/packages/42/ba/5231a2a1fb1cebe053a25de8fded95e1a30a1e77d3628a9e58487297bafc/ty-0.0.27-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a07b1a8fbb23844f6d22091275430d9ac617175f34aa99159b268193de210389", size = 11461232, upload-time = "2026-03-31T19:07:02.518Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/37/558abab3e1f6670493524f61280b4dfcc3219555f13889223e733381dfab/ty-0.0.27-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d3ec4033031f240836bb0337274bac5c49dde312c7c6d7575451ed719bf8ffa3", size = 11133002, upload-time = "2026-03-31T19:07:18.371Z" },
+    { url = "https://files.pythonhosted.org/packages/32/38/188c14a57f52160407ce62c6abb556011718fd0bcbe1dca690529ce84c46/ty-0.0.27-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:924a8849afd500d260bf5b7296165a05b7424fbb6b19113f30f3b999d682873f", size = 10986624, upload-time = "2026-03-31T19:07:13.066Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/f1/667a71393f47d2cd6ba9ed07541b8df3eb63aab1f2ee658e77d91b8362fa/ty-0.0.27-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d8270026c07e7423a1b3a3fd065b46ed1478748f0662518b523b57744f3fa025", size = 10366721, upload-time = "2026-03-31T19:07:00.131Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/aa/8edafe41be898bda774249abc5be6edd733e53fb1777d59ea9331e38537d/ty-0.0.27-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e26e9735d3bdfd95d881111ad1cf570eab8188d8c3be36d6bcaad044d38984d8", size = 10412239, upload-time = "2026-03-31T19:07:05.297Z" },
+    { url = "https://files.pythonhosted.org/packages/53/ff/8bafaed4a18d38264f46bdfc427de7ea2974cf9064e4e0bdb1b6e6c724e3/ty-0.0.27-py3-none-musllinux_1_2_i686.whl", hash = "sha256:7c09cc9a699810609acc0090af8d0db68adaee6e60a7c3e05ab80cc954a83db7", size = 10573507, upload-time = "2026-03-31T19:06:57.064Z" },
+    { url = "https://files.pythonhosted.org/packages/16/2e/63a8284a2fefd08ab56ecbad0fde7dd4b2d4045a31cf24c1d1fcd9643227/ty-0.0.27-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2d3e02853bb037221a456e034b1898aaa573e6374fbb53884e33cb7513ccb85a", size = 11090233, upload-time = "2026-03-31T19:07:34.139Z" },
+    { url = "https://files.pythonhosted.org/packages/14/d3/d6fa1cafdfa2b34dbfa304fc6833af8e1669fc34e24d214fa76d2a2e5a25/ty-0.0.27-py3-none-win32.whl", hash = "sha256:34e7377f2047c14dbbb7bf5322e84114db7a5f2cb470db6bee63f8f3550cfc1e", size = 9984415, upload-time = "2026-03-31T19:07:07.98Z" },
+    { url = "https://files.pythonhosted.org/packages/85/e6/dd4e27da9632b3472d5711ca49dbd3709dbd3e8c73f3af6db9c254235ca9/ty-0.0.27-py3-none-win_amd64.whl", hash = "sha256:3f7e4145aad8b815ed69b324c93b5b773eb864dda366ca16ab8693ff88ce6f36", size = 10961535, upload-time = "2026-03-31T19:07:10.566Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/1a/824b3496d66852ed7d5d68d9787711131552b68dce8835ce9410db32e618/ty-0.0.27-py3-none-win_arm64.whl", hash = "sha256:95bf8d01eb96bb2ba3ffc39faff19da595176448e80871a7b362f4d2de58476c", size = 10376689, upload-time = "2026-03-31T19:07:25.732Z" },
 ]
 
 [[package]]

From 2ca633929506a3abc7c4052ccbfe9186d5943431 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Thu, 2 Apr 2026 14:38:32 +0700
Subject: [PATCH 235/236] Pinning workflow actions to a full-length commit SHA
 to comply with security standards.

---
 .github/workflows/docker.yaml | 12 ++++++------
 .github/workflows/lint.yaml   |  7 +++++--
 .github/workflows/track.yml   |  8 ++++----
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 7dc537a..da8135d 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -29,16 +29,16 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
 
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v4
+        uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v4
+        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4
 
       - name: Log in to Container Registry
-        uses: docker/login-action@v4
+        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4
         with:
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
@@ -46,7 +46,7 @@ jobs:
 
       - name: Extract metadata
         id: meta
-        uses: docker/metadata-action@v6
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6
         with:
           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
           tags: |
@@ -58,7 +58,7 @@ jobs:
             type=raw,value=latest,enable={{is_default_branch}}
 
       - name: Build and push Docker image
-        uses: docker/build-push-action@v7
+        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7
         with:
           context: .
           push: true
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index e3da9f0..dc799c9 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -14,16 +14,19 @@ on:
       - "uv.lock"
       - ".github/workflows/lint.yaml"
 
+permissions:
+  contents: read
+
 jobs:
   lint:
     runs-on: ubuntu-latest
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
 
       - name: Install dependencies
         run: uv sync --all-groups
diff --git a/.github/workflows/track.yml b/.github/workflows/track.yml
index 9ebb9c6..39c4465 100644
--- a/.github/workflows/track.yml
+++ b/.github/workflows/track.yml
@@ -2,7 +2,7 @@ name: Update gemini-webapi
 
 on:
   schedule:
-    - cron: "0 0 * * *" # Runs every day at midnight
+    - cron: "0 0 * * *"
   workflow_dispatch:
 
 jobs:
@@ -14,10 +14,10 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
 
       - name: Update gemini-webapi
         id: update
@@ -56,7 +56,7 @@ jobs:
 
       - name: Create Pull Request
         if: steps.update.outputs.updated == 'true'
-        uses: peter-evans/create-pull-request@v8
+        uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
           commit-message: ":arrow_up: update gemini-webapi to ${{ steps.update.outputs.version }}"

From b1773c287104df7fc04100f6d56e2fc98b4255e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C6=B0u=20Quang=20V=C5=A9?= <luuquangvu89@gmail.com>
Date: Wed, 8 Apr 2026 17:50:19 +0700
Subject: [PATCH 236/236] Update dependencies to latest versions

---
 Dockerfile          |   2 +-
 app/utils/helper.py |   6 +-
 pyproject.toml      |   9 +--
 uv.lock             | 172 +++++++++++++++++++++++++++-----------------
 4 files changed, 114 insertions(+), 75 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2499479..068aa8c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,7 +10,7 @@ WORKDIR /app
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    tini curl ca-certificates git \
+    tini curl ca-certificates \
     && rm -rf /var/lib/apt/lists/*
 
 ENV UV_COMPILE_BYTECODE=1 \
diff --git a/app/utils/helper.py b/app/utils/helper.py
index a2d8471..781ec33 100644
--- a/app/utils/helper.py
+++ b/app/utils/helper.py
@@ -11,7 +11,7 @@
 from urllib.parse import urlparse
 
 import orjson
-from curl_cffi.requests import AsyncSession
+from curl_cffi import CurlFollow, requests
 from loguru import logger
 
 from app.models import AppMessage, AppToolCall, AppToolCallFunction
@@ -199,7 +199,9 @@ async def save_url_to_tempfile(url: str, tempdir: Path | None = None) -> Path:
         elif not suffix:
             suffix = ".bin"
     else:
-        async with AsyncSession(impersonate="chrome", allow_redirects=True) as client:
+        async with requests.AsyncSession(
+            impersonate="chrome", allow_redirects=CurlFollow.SAFE
+        ) as client:
             resp = await client.get(url)
             resp.raise_for_status()
             data = resp.content
diff --git a/pyproject.toml b/pyproject.toml
index 6113b27..31740e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,15 +5,15 @@ description = "FastAPI Server built on Gemini Web API"
 readme = "README.md"
 requires-python = "==3.13.*"
 dependencies = [
-    "curl-cffi>=0.14.0",
+    "curl-cffi>=0.15.0",
     "fastapi>=0.135.3",
-    "gemini-webapi>=1.21.0",
+    "gemini-webapi>=2.0.0",
     "httptools>=0.7.1",
     "lmdb>=2.2.0",
     "loguru>=0.7.3",
     "orjson>=3.11.8",
     "pydantic-settings[yaml]>=2.13.1",
-    "uvicorn>=0.42.0",
+    "uvicorn>=0.44.0",
     "uvloop>=0.22.1; sys_platform != 'win32'",
 ]
 
@@ -63,6 +63,3 @@ extend-immutable-calls = [
 [tool.ruff.format]
 quote-style = "double"
 indent-style = "space"
-
-[tool.uv.sources]
-gemini-webapi = { git = "https://github.com/HanaokaYuzu/Gemini-API.git" }
diff --git a/uv.lock b/uv.lock
index 7dac18c..82c3743 100644
--- a/uv.lock
+++ b/uv.lock
@@ -66,14 +66,14 @@ wheels = [
 
 [[package]]
 name = "click"
-version = "8.3.1"
+version = "8.3.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/75/31212c6bf2503fdf920d87fee5d7a86a2e3bcf444984126f13d8e4016804/click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5", size = 302856, upload-time = "2026-04-03T19:14:45.118Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/20/71885d8b97d4f3dde17b1fdb92dbd4908b00541c5a3379787137285f602e/click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d", size = 108379, upload-time = "2026-04-03T19:14:43.505Z" },
 ]
 
 [[package]]
@@ -87,25 +87,27 @@ wheels = [
 
 [[package]]
 name = "curl-cffi"
-version = "0.14.0"
+version = "0.15.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "cffi" },
+    { name = "rich" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9b/c9/0067d9a25ed4592b022d4558157fcdb6e123516083700786d38091688767/curl_cffi-0.14.0.tar.gz", hash = "sha256:5ffbc82e59f05008ec08ea432f0e535418823cda44178ee518906a54f27a5f0f", size = 162633, upload-time = "2025-12-16T03:25:07.931Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/5b/89fcfebd3e5e85134147ac99e9f2b2271165fd4d71984fc65da5f17819b7/curl_cffi-0.15.0.tar.gz", hash = "sha256:ea0c67652bf6893d34ee0f82c944f37e488f6147e9421bef1771cc6545b02ded", size = 196437, upload-time = "2026-04-03T11:12:31.525Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/aa/f0/0f21e9688eaac85e705537b3a87a5588d0cefb2f09d83e83e0e8be93aa99/curl_cffi-0.14.0-cp39-abi3-macosx_14_0_arm64.whl", hash = "sha256:e35e89c6a69872f9749d6d5fda642ed4fc159619329e99d577d0104c9aad5893", size = 3087277, upload-time = "2025-12-16T03:24:49.607Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/a3/0419bd48fce5b145cb6a2344c6ac17efa588f5b0061f212c88e0723da026/curl_cffi-0.14.0-cp39-abi3-macosx_15_0_x86_64.whl", hash = "sha256:5945478cd28ad7dfb5c54473bcfb6743ee1d66554d57951fdf8fc0e7d8cf4e45", size = 5804650, upload-time = "2025-12-16T03:24:51.518Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/07/a238dd062b7841b8caa2fa8a359eb997147ff3161288f0dd46654d898b4d/curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c42e8fa3c667db9ccd2e696ee47adcd3cd5b0838d7282f3fc45f6c0ef3cfdfa7", size = 8231918, upload-time = "2025-12-16T03:24:52.862Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/d2/ce907c9b37b5caf76ac08db40cc4ce3d9f94c5500db68a195af3513eacbc/curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:060fe2c99c41d3cb7f894de318ddf4b0301b08dca70453d769bd4e74b36b8483", size = 8654624, upload-time = "2025-12-16T03:24:54.579Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/ae/6256995b18c75e6ef76b30753a5109e786813aa79088b27c8eabb1ef85c9/curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b158c41a25388690dd0d40b5bc38d1e0f512135f17fdb8029868cbc1993d2e5b", size = 8010654, upload-time = "2025-12-16T03:24:56.507Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/10/ff64249e516b103cb762e0a9dca3ee0f04cf25e2a1d5d9838e0f1273d071/curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_i686.whl", hash = "sha256:1439fbef3500fb723333c826adf0efb0e2e5065a703fb5eccce637a2250db34a", size = 7781969, upload-time = "2025-12-16T03:24:57.885Z" },
-    { url = "https://files.pythonhosted.org/packages/51/76/d6f7bb76c2d12811aa7ff16f5e17b678abdd1b357b9a8ac56310ceccabd5/curl_cffi-0.14.0-cp39-abi3-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7176f2c2d22b542e3cf261072a81deb018cfa7688930f95dddef215caddb469", size = 7969133, upload-time = "2025-12-16T03:24:59.261Z" },
-    { url = "https://files.pythonhosted.org/packages/23/7c/cca39c0ed4e1772613d3cba13091c0e9d3b89365e84b9bf9838259a3cd8f/curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:03f21ade2d72978c2bb8670e9b6de5260e2755092b02d94b70b906813662998d", size = 9080167, upload-time = "2025-12-16T03:25:00.946Z" },
-    { url = "https://files.pythonhosted.org/packages/75/03/a942d7119d3e8911094d157598ae0169b1c6ca1bd3f27d7991b279bcc45b/curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:58ebf02de64ee5c95613209ddacb014c2d2f86298d7080c0a1c12ed876ee0690", size = 9520464, upload-time = "2025-12-16T03:25:02.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/77/78900e9b0833066d2274bda75cba426fdb4cef7fbf6a4f6a6ca447607bec/curl_cffi-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:6e503f9a103f6ae7acfb3890c843b53ec030785a22ae7682a22cc43afb94123e", size = 1677416, upload-time = "2025-12-16T03:25:04.902Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/7c/d2ba86b0b3e1e2830bd94163d047de122c69a8df03c5c7c36326c456ad82/curl_cffi-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:2eed50a969201605c863c4c31269dfc3e0da52916086ac54553cfa353022425c", size = 1425067, upload-time = "2025-12-16T03:25:06.454Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/42/54ddd442c795f30ce5dd4e49f87ce77505958d3777cd96a91567a3975d2a/curl_cffi-0.15.0-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:bda66404010e9ed743b1b83c20c86f24fe21a9a6873e17479d6e67e29d8ded28", size = 2795267, upload-time = "2026-04-03T11:11:46.48Z" },
+    { url = "https://files.pythonhosted.org/packages/83/2d/3915e238579b3c5a92cead5c79130c3b8d20caaba7616cc4d894650e1d6b/curl_cffi-0.15.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:a25620d9bf989c9c029a7d1642999c4c265abb0bad811deb2f77b0b5b2b12e5b", size = 2573544, upload-time = "2026-04-03T11:11:47.951Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/b3/9d2f1057749a1b07ba1989db3c1503ce8bed998310bae9aea2c43aa64f20/curl_cffi-0.15.0-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:582e570aa2586b96ed47cf4a17586b9a3c462cbe43f780487c3dc245c6ef1527", size = 10515369, upload-time = "2026-04-03T11:11:50.126Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/1d/6d10dded5ce3fd8157e558ebd97d09e551b77a62cdc1c31e93d0a633cee5/curl_cffi-0.15.0-cp310-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:838e48212447d9c81364b04707a5c861daf08f8320f9ecb3406a8919d1d5c3b3", size = 10160045, upload-time = "2026-04-03T11:11:52.664Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/12/c70b835487ace3b9ba1502631912e3440082b8ae3a162f60b59cb0b6444d/curl_cffi-0.15.0-cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b6c847d86283b07ae69bb72c82eb8a59242277142aa35b89850f89e792a02fc", size = 11090433, upload-time = "2026-04-03T11:11:55.049Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/0d/78edcc4f71934225db99df68197a107386d59080742fc7bf6bb4d007924f/curl_cffi-0.15.0-cp310-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9e5e69eee735f659287e2c84444319d68a1fa68dd37abf228943a4074864283a", size = 10479178, upload-time = "2026-04-03T11:11:57.685Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/84/1e101c1acb1ea2f0b4992f5c3024f596d8e21db0d53540b9d583f673c4e7/curl_cffi-0.15.0-cp310-abi3-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:aa1323950224db24f4c510d010b3affa02196ca853fb424191fa917a513d3f4b", size = 10317051, upload-time = "2026-04-03T11:12:00.295Z" },
+    { url = "https://files.pythonhosted.org/packages/28/42/8ef236b22a6c23d096c85a1dc507efe37bfdfc7a2f8a4b34efb590197369/curl_cffi-0.15.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:41f80170ba844009273b2660da1964ec31e99e5719d16b3422ada87177e32e13", size = 11299660, upload-time = "2026-04-03T11:12:02.791Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/01/56aeb055d962da87a1be0d74c6c644e251c7e88129b5471dc44ac724e678/curl_cffi-0.15.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1977e1e12cfb5c11352cbb74acef1bed24eb7d226dab61ca57c168c21acd4d61", size = 11945049, upload-time = "2026-04-03T11:12:05.912Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/8c/2abf99a38d6340d66cf0557e0c750ef3f8883dfc5d450087e01c85861343/curl_cffi-0.15.0-cp310-abi3-win_amd64.whl", hash = "sha256:5a0c1896a0d5a5ac1eb89cd24b008d2b718dd1df6fd2f75451b59ca66e49e572", size = 1661649, upload-time = "2026-04-03T11:12:07.948Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/39/dfd54f2240d3a9b96d77bacc62b97813b35e2aa8ecf5cd5013c683f1ba96/curl_cffi-0.15.0-cp310-abi3-win_arm64.whl", hash = "sha256:a6d57f8389273a3a1f94370473c74897467bcc36af0a17336989780c507fa43d", size = 1410741, upload-time = "2026-04-03T11:12:10.073Z" },
+    { url = "https://files.pythonhosted.org/packages/19/6a/c24df8a4fc22fa84070dcd94abeba43c15e08cc09e35869565c0bad196fd/curl_cffi-0.15.0-cp313-abi3-android_24_arm64_v8a.whl", hash = "sha256:4682dc38d4336e0eb0b185374db90a760efde63cbea994b4e63f3521d44c4c92", size = 7190427, upload-time = "2026-04-03T11:12:12.142Z" },
 ]
 
 [[package]]
@@ -155,9 +157,9 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "curl-cffi", specifier = ">=0.14.0" },
+    { name = "curl-cffi", specifier = ">=0.15.0" },
     { name = "fastapi", specifier = ">=0.135.3" },
-    { name = "gemini-webapi", git = "https://github.com/HanaokaYuzu/Gemini-API.git" },
+    { name = "gemini-webapi", specifier = ">=2.0.0" },
     { name = "httptools", specifier = ">=0.7.1" },
     { name = "lmdb", specifier = ">=2.2.0" },
     { name = "loguru", specifier = ">=0.7.3" },
@@ -166,7 +168,7 @@ requires-dist = [
     { name = "pytest", marker = "extra == 'dev'" },
     { name = "ruff", marker = "extra == 'dev'" },
     { name = "ty", marker = "extra == 'dev'" },
-    { name = "uvicorn", specifier = ">=0.42.0" },
+    { name = "uvicorn", specifier = ">=0.44.0" },
     { name = "uvloop", marker = "sys_platform != 'win32'", specifier = ">=0.22.1" },
 ]
 provides-extras = ["dev"]
@@ -176,14 +178,18 @@ dev = [{ name = "gemini-fastapi", extras = ["dev"] }]
 
 [[package]]
 name = "gemini-webapi"
-version = "1.21.0.post32"
-source = { git = "https://github.com/HanaokaYuzu/Gemini-API.git#6675735f294ecf91e1e101dbb37a7e2ec2eef597" }
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "curl-cffi" },
     { name = "loguru" },
     { name = "orjson" },
     { name = "pydantic" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/2c/2a/1d46470ec287f978565b000bf4fd9cc3eee8f4193ceec70094dc12a724a3/gemini_webapi-2.0.0.tar.gz", hash = "sha256:5ee9d8ad9fd4c7fdc50ebfd152c9cf1dc4c30f7e9984bae6dffed9a0cb039735", size = 300490, upload-time = "2026-04-06T21:19:16.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4f/9b/bae89ced30ce74ae96793c05c2eb3e92de6fe3e619f91802aba686dd8027/gemini_webapi-2.0.0-py3-none-any.whl", hash = "sha256:6f2b922a5923afbb432c3c95cb6edd66e73cf5d99c95e9e40e63912e4131aff8", size = 92815, upload-time = "2026-04-06T21:19:14.933Z" },
+]
 
 [[package]]
 name = "h11"
@@ -254,6 +260,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
 ]
 
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
 [[package]]
 name = "orjson"
 version = "3.11.8"
@@ -374,7 +401,7 @@ wheels = [
 
 [[package]]
 name = "pytest"
-version = "9.0.2"
+version = "9.0.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
@@ -383,9 +410,9 @@ dependencies = [
     { name = "pluggy" },
     { name = "pygments" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
 ]
 
 [[package]]
@@ -415,29 +442,42 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
 ]
 
+[[package]]
+name = "rich"
+version = "14.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
+]
+
 [[package]]
 name = "ruff"
-version = "0.15.8"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/14/b0/73cf7550861e2b4824950b8b52eebdcc5adc792a00c514406556c5b80817/ruff-0.15.8.tar.gz", hash = "sha256:995f11f63597ee362130d1d5a327a87cb6f3f5eae3094c620bcc632329a4d26e", size = 4610921, upload-time = "2026-03-26T18:39:38.675Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/92/c445b0cd6da6e7ae51e954939cb69f97e008dbe750cfca89b8cedc081be7/ruff-0.15.8-py3-none-linux_armv6l.whl", hash = "sha256:cbe05adeba76d58162762d6b239c9056f1a15a55bd4b346cfd21e26cd6ad7bc7", size = 10527394, upload-time = "2026-03-26T18:39:41.566Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/92/f1c662784d149ad1414cae450b082cf736430c12ca78367f20f5ed569d65/ruff-0.15.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d3e3d0b6ba8dca1b7ef9ab80a28e840a20070c4b62e56d675c24f366ef330570", size = 10905693, upload-time = "2026-03-26T18:39:30.364Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/f2/7a631a8af6d88bcef997eb1bf87cc3da158294c57044aafd3e17030613de/ruff-0.15.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6ee3ae5c65a42f273f126686353f2e08ff29927b7b7e203b711514370d500de3", size = 10323044, upload-time = "2026-03-26T18:39:33.37Z" },
-    { url = "https://files.pythonhosted.org/packages/67/18/1bf38e20914a05e72ef3b9569b1d5c70a7ef26cd188d69e9ca8ef588d5bf/ruff-0.15.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdce027ada77baa448077ccc6ebb2fa9c3c62fd110d8659d601cf2f475858d94", size = 10629135, upload-time = "2026-03-26T18:39:44.142Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/e9/138c150ff9af60556121623d41aba18b7b57d95ac032e177b6a53789d279/ruff-0.15.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12e617fc01a95e5821648a6df341d80456bd627bfab8a829f7cfc26a14a4b4a3", size = 10348041, upload-time = "2026-03-26T18:39:52.178Z" },
-    { url = "https://files.pythonhosted.org/packages/02/f1/5bfb9298d9c323f842c5ddeb85f1f10ef51516ac7a34ba446c9347d898df/ruff-0.15.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:432701303b26416d22ba696c39f2c6f12499b89093b61360abc34bcc9bf07762", size = 11121987, upload-time = "2026-03-26T18:39:55.195Z" },
-    { url = "https://files.pythonhosted.org/packages/10/11/6da2e538704e753c04e8d86b1fc55712fdbdcc266af1a1ece7a51fff0d10/ruff-0.15.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d910ae974b7a06a33a057cb87d2a10792a3b2b3b35e33d2699fdf63ec8f6b17a", size = 11951057, upload-time = "2026-03-26T18:39:19.18Z" },
-    { url = "https://files.pythonhosted.org/packages/83/f0/c9208c5fd5101bf87002fed774ff25a96eea313d305f1e5d5744698dc314/ruff-0.15.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2033f963c43949d51e6fdccd3946633c6b37c484f5f98c3035f49c27395a8ab8", size = 11464613, upload-time = "2026-03-26T18:40:06.301Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/22/d7f2fabdba4fae9f3b570e5605d5eb4500dcb7b770d3217dca4428484b17/ruff-0.15.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f29b989a55572fb885b77464cf24af05500806ab4edf9a0fd8977f9759d85b1", size = 11257557, upload-time = "2026-03-26T18:39:57.972Z" },
-    { url = "https://files.pythonhosted.org/packages/71/8c/382a9620038cf6906446b23ce8632ab8c0811b8f9d3e764f58bedd0c9a6f/ruff-0.15.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:ac51d486bf457cdc985a412fb1801b2dfd1bd8838372fc55de64b1510eff4bec", size = 11169440, upload-time = "2026-03-26T18:39:22.205Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/0d/0994c802a7eaaf99380085e4e40c845f8e32a562e20a38ec06174b52ef24/ruff-0.15.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c9861eb959edab053c10ad62c278835ee69ca527b6dcd72b47d5c1e5648964f6", size = 10605963, upload-time = "2026-03-26T18:39:46.682Z" },
-    { url = "https://files.pythonhosted.org/packages/19/aa/d624b86f5b0aad7cef6bbf9cd47a6a02dfdc4f72c92a337d724e39c9d14b/ruff-0.15.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8d9a5b8ea13f26ae90838afc33f91b547e61b794865374f114f349e9036835fb", size = 10357484, upload-time = "2026-03-26T18:39:49.176Z" },
-    { url = "https://files.pythonhosted.org/packages/35/c3/e0b7835d23001f7d999f3895c6b569927c4d39912286897f625736e1fd04/ruff-0.15.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c2a33a529fb3cbc23a7124b5c6ff121e4d6228029cba374777bd7649cc8598b8", size = 10830426, upload-time = "2026-03-26T18:40:03.702Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/51/ab20b322f637b369383adc341d761eaaa0f0203d6b9a7421cd6e783d81b9/ruff-0.15.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:75e5cd06b1cf3f47a3996cfc999226b19aa92e7cce682dcd62f80d7035f98f49", size = 11345125, upload-time = "2026-03-26T18:39:27.799Z" },
-    { url = "https://files.pythonhosted.org/packages/37/e6/90b2b33419f59d0f2c4c8a48a4b74b460709a557e8e0064cf33ad894f983/ruff-0.15.8-py3-none-win32.whl", hash = "sha256:bc1f0a51254ba21767bfa9a8b5013ca8149dcf38092e6a9eb704d876de94dc34", size = 10571959, upload-time = "2026-03-26T18:39:36.117Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/a2/ef467cb77099062317154c63f234b8a7baf7cb690b99af760c5b68b9ee7f/ruff-0.15.8-py3-none-win_amd64.whl", hash = "sha256:04f79eff02a72db209d47d665ba7ebcad609d8918a134f86cb13dd132159fc89", size = 11743893, upload-time = "2026-03-26T18:39:25.01Z" },
-    { url = "https://files.pythonhosted.org/packages/15/e2/77be4fff062fa78d9b2a4dea85d14785dac5f1d0c1fb58ed52331f0ebe28/ruff-0.15.8-py3-none-win_arm64.whl", hash = "sha256:cf891fa8e3bb430c0e7fac93851a5978fc99c8fa2c053b57b118972866f8e5f2", size = 11048175, upload-time = "2026-03-26T18:40:01.06Z" },
+version = "0.15.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e6/97/e9f1ca355108ef7194e38c812ef40ba98c7208f47b13ad78d023caa583da/ruff-0.15.9.tar.gz", hash = "sha256:29cbb1255a9797903f6dde5ba0188c707907ff44a9006eb273b5a17bfa0739a2", size = 4617361, upload-time = "2026-04-02T18:17:20.829Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0b/1f/9cdfd0ac4b9d1e5a6cf09bedabdf0b56306ab5e333c85c87281273e7b041/ruff-0.15.9-py3-none-linux_armv6l.whl", hash = "sha256:6efbe303983441c51975c243e26dff328aca11f94b70992f35b093c2e71801e1", size = 10511206, upload-time = "2026-04-02T18:16:41.574Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/f6/32bfe3e9c136b35f02e489778d94384118bb80fd92c6d92e7ccd97db12ce/ruff-0.15.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4965bac6ac9ea86772f4e23587746f0b7a395eccabb823eb8bfacc3fa06069f7", size = 10923307, upload-time = "2026-04-02T18:17:08.645Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/25/de55f52ab5535d12e7aaba1de37a84be6179fb20bddcbe71ec091b4a3243/ruff-0.15.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:eaf05aad70ca5b5a0a4b0e080df3a6b699803916d88f006efd1f5b46302daab8", size = 10316722, upload-time = "2026-04-02T18:16:44.206Z" },
+    { url = "https://files.pythonhosted.org/packages/48/11/690d75f3fd6278fe55fff7c9eb429c92d207e14b25d1cae4064a32677029/ruff-0.15.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9439a342adb8725f32f92732e2bafb6d5246bd7a5021101166b223d312e8fc59", size = 10623674, upload-time = "2026-04-02T18:16:50.951Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/ec/176f6987be248fc5404199255522f57af1b4a5a1b57727e942479fec98ad/ruff-0.15.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9c5e6faf9d97c8edc43877c3f406f47446fc48c40e1442d58cfcdaba2acea745", size = 10351516, upload-time = "2026-04-02T18:16:57.206Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/fc/51cffbd2b3f240accc380171d51446a32aa2ea43a40d4a45ada67368fbd2/ruff-0.15.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b34a9766aeec27a222373d0b055722900fbc0582b24f39661aa96f3fe6ad901", size = 11150202, upload-time = "2026-04-02T18:17:06.452Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/d4/25292a6dfc125f6b6528fe6af31f5e996e19bf73ca8e3ce6eb7fa5b95885/ruff-0.15.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89dd695bc72ae76ff484ae54b7e8b0f6b50f49046e198355e44ea656e521fef9", size = 11988891, upload-time = "2026-04-02T18:17:18.575Z" },
+    { url = "https://files.pythonhosted.org/packages/13/e1/1eebcb885c10e19f969dcb93d8413dfee8172578709d7ee933640f5e7147/ruff-0.15.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce187224ef1de1bd225bc9a152ac7102a6171107f026e81f317e4257052916d5", size = 11480576, upload-time = "2026-04-02T18:16:52.986Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/6b/a1548ac378a78332a4c3dcf4a134c2475a36d2a22ddfa272acd574140b50/ruff-0.15.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b0c7c341f68adb01c488c3b7d4b49aa8ea97409eae6462d860a79cf55f431b6", size = 11254525, upload-time = "2026-04-02T18:17:02.041Z" },
+    { url = "https://files.pythonhosted.org/packages/42/aa/4bb3af8e61acd9b1281db2ab77e8b2c3c5e5599bf2a29d4a942f1c62b8d6/ruff-0.15.9-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:55cc15eee27dc0eebdfcb0d185a6153420efbedc15eb1d38fe5e685657b0f840", size = 11204072, upload-time = "2026-04-02T18:17:13.581Z" },
+    { url = "https://files.pythonhosted.org/packages/69/48/d550dc2aa6e423ea0bcc1d0ff0699325ffe8a811e2dba156bd80750b86dc/ruff-0.15.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a6537f6eed5cda688c81073d46ffdfb962a5f29ecb6f7e770b2dc920598997ed", size = 10594998, upload-time = "2026-04-02T18:16:46.369Z" },
+    { url = "https://files.pythonhosted.org/packages/63/47/321167e17f5344ed5ec6b0aa2cff64efef5f9e985af8f5622cfa6536043f/ruff-0.15.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6d3fcbca7388b066139c523bda744c822258ebdcfbba7d24410c3f454cc9af71", size = 10359769, upload-time = "2026-04-02T18:17:10.994Z" },
+    { url = "https://files.pythonhosted.org/packages/67/5e/074f00b9785d1d2c6f8c22a21e023d0c2c1817838cfca4c8243200a1fa87/ruff-0.15.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:058d8e99e1bfe79d8a0def0b481c56059ee6716214f7e425d8e737e412d69677", size = 10850236, upload-time = "2026-04-02T18:16:48.749Z" },
+    { url = "https://files.pythonhosted.org/packages/76/37/804c4135a2a2caf042925d30d5f68181bdbd4461fd0d7739da28305df593/ruff-0.15.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8e1ddb11dbd61d5983fa2d7d6370ef3eb210951e443cace19594c01c72abab4c", size = 11358343, upload-time = "2026-04-02T18:16:55.068Z" },
+    { url = "https://files.pythonhosted.org/packages/88/3d/1364fcde8656962782aa9ea93c92d98682b1ecec2f184e625a965ad3b4a6/ruff-0.15.9-py3-none-win32.whl", hash = "sha256:bde6ff36eaf72b700f32b7196088970bf8fdb2b917b7accd8c371bfc0fd573ec", size = 10583382, upload-time = "2026-04-02T18:17:04.261Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/56/5c7084299bd2cacaa07ae63a91c6f4ba66edc08bf28f356b24f6b717c799/ruff-0.15.9-py3-none-win_amd64.whl", hash = "sha256:45a70921b80e1c10cf0b734ef09421f71b5aa11d27404edc89d7e8a69505e43d", size = 11744969, upload-time = "2026-04-02T18:16:59.611Z" },
+    { url = "https://files.pythonhosted.org/packages/03/36/76704c4f312257d6dbaae3c959add2a622f63fcca9d864659ce6d8d97d3d/ruff-0.15.9-py3-none-win_arm64.whl", hash = "sha256:0694e601c028fd97dc5c6ee244675bc241aeefced7ef80cd9c6935a871078f53", size = 11005870, upload-time = "2026-04-02T18:17:15.773Z" },
 ]
 
 [[package]]
@@ -454,26 +494,26 @@ wheels = [
 
 [[package]]
 name = "ty"
-version = "0.0.27"
+version = "0.0.29"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f4/de/e5cf1f151cf52fe1189e42d03d90909d7d1354fdc0c1847cbb63a0baa3da/ty-0.0.27.tar.gz", hash = "sha256:d7a8de3421d92420b40c94fe7e7d4816037560621903964dd035cf9bd0204a73", size = 5424130, upload-time = "2026-03-31T19:07:20.806Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/47/d5/853561de49fae38c519e905b2d8da9c531219608f1fccc47a0fc2c896980/ty-0.0.29.tar.gz", hash = "sha256:e7936cca2f691eeda631876c92809688dbbab68687c3473f526cd83b6a9228d8", size = 5469221, upload-time = "2026-04-05T15:01:21.328Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/20/2a9ea661758bd67f2bfd54ce9daacb5a26c56c5f8b49fbd9a43b365a8a7d/ty-0.0.27-py3-none-linux_armv6l.whl", hash = "sha256:eb14456b8611c9e8287aa9b633f4d2a0d9f3082a31796969e0b50bdda8930281", size = 10571211, upload-time = "2026-03-31T19:07:23.28Z" },
-    { url = "https://files.pythonhosted.org/packages/da/b2/8887a51f705d075ddbe78ae7f0d4755ef48d0a90235f67aee289e9cee950/ty-0.0.27-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:02e662184703db7586118df611cf24a000d35dae38d950053d1dd7b6736fd2c4", size = 10427576, upload-time = "2026-03-31T19:07:15.499Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/c3/79d88163f508fb709ce19bc0b0a66c7c64b53d372d4caa56172c3d9b3ae8/ty-0.0.27-py3-none-macosx_11_0_arm64.whl", hash = "sha256:be5fc2899441f7f8f7ef40f9ffd006075a5ff6b06c44e8d2aa30e1b900c12f51", size = 9870359, upload-time = "2026-03-31T19:07:36.852Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/4d/ed1b0db0e1e46b5ed4976bbfe0d1825faf003b4e3774ef28c785ed73e4bb/ty-0.0.27-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30231e652b14742a76b64755e54bf0cb1cd4c128bcaf625222e0ca92a2094887", size = 10380488, upload-time = "2026-03-31T19:07:31.268Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/f2/20372f6d510b01570028433064880adec2f8abe68bf0c4603be61a560bef/ty-0.0.27-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a119b1168f64261b3205a37e40b5b6c4aac8fd58e4587988f4e4b22c3c79847", size = 10390248, upload-time = "2026-03-31T19:07:28.345Z" },
-    { url = "https://files.pythonhosted.org/packages/45/4b/46b31a7311306be1a560f7f20fdc37b5bf718787f60626cd265d9b637554/ty-0.0.27-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e38f4e187b6975d2cbebf0f1eb1221f8f64f6e509bad14d7bb2a91afc97e4956", size = 10878479, upload-time = "2026-03-31T19:07:39.393Z" },
-    { url = "https://files.pythonhosted.org/packages/42/ba/5231a2a1fb1cebe053a25de8fded95e1a30a1e77d3628a9e58487297bafc/ty-0.0.27-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a07b1a8fbb23844f6d22091275430d9ac617175f34aa99159b268193de210389", size = 11461232, upload-time = "2026-03-31T19:07:02.518Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/37/558abab3e1f6670493524f61280b4dfcc3219555f13889223e733381dfab/ty-0.0.27-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d3ec4033031f240836bb0337274bac5c49dde312c7c6d7575451ed719bf8ffa3", size = 11133002, upload-time = "2026-03-31T19:07:18.371Z" },
-    { url = "https://files.pythonhosted.org/packages/32/38/188c14a57f52160407ce62c6abb556011718fd0bcbe1dca690529ce84c46/ty-0.0.27-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:924a8849afd500d260bf5b7296165a05b7424fbb6b19113f30f3b999d682873f", size = 10986624, upload-time = "2026-03-31T19:07:13.066Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/f1/667a71393f47d2cd6ba9ed07541b8df3eb63aab1f2ee658e77d91b8362fa/ty-0.0.27-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d8270026c07e7423a1b3a3fd065b46ed1478748f0662518b523b57744f3fa025", size = 10366721, upload-time = "2026-03-31T19:07:00.131Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/aa/8edafe41be898bda774249abc5be6edd733e53fb1777d59ea9331e38537d/ty-0.0.27-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e26e9735d3bdfd95d881111ad1cf570eab8188d8c3be36d6bcaad044d38984d8", size = 10412239, upload-time = "2026-03-31T19:07:05.297Z" },
-    { url = "https://files.pythonhosted.org/packages/53/ff/8bafaed4a18d38264f46bdfc427de7ea2974cf9064e4e0bdb1b6e6c724e3/ty-0.0.27-py3-none-musllinux_1_2_i686.whl", hash = "sha256:7c09cc9a699810609acc0090af8d0db68adaee6e60a7c3e05ab80cc954a83db7", size = 10573507, upload-time = "2026-03-31T19:06:57.064Z" },
-    { url = "https://files.pythonhosted.org/packages/16/2e/63a8284a2fefd08ab56ecbad0fde7dd4b2d4045a31cf24c1d1fcd9643227/ty-0.0.27-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2d3e02853bb037221a456e034b1898aaa573e6374fbb53884e33cb7513ccb85a", size = 11090233, upload-time = "2026-03-31T19:07:34.139Z" },
-    { url = "https://files.pythonhosted.org/packages/14/d3/d6fa1cafdfa2b34dbfa304fc6833af8e1669fc34e24d214fa76d2a2e5a25/ty-0.0.27-py3-none-win32.whl", hash = "sha256:34e7377f2047c14dbbb7bf5322e84114db7a5f2cb470db6bee63f8f3550cfc1e", size = 9984415, upload-time = "2026-03-31T19:07:07.98Z" },
-    { url = "https://files.pythonhosted.org/packages/85/e6/dd4e27da9632b3472d5711ca49dbd3709dbd3e8c73f3af6db9c254235ca9/ty-0.0.27-py3-none-win_amd64.whl", hash = "sha256:3f7e4145aad8b815ed69b324c93b5b773eb864dda366ca16ab8693ff88ce6f36", size = 10961535, upload-time = "2026-03-31T19:07:10.566Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/1a/824b3496d66852ed7d5d68d9787711131552b68dce8835ce9410db32e618/ty-0.0.27-py3-none-win_arm64.whl", hash = "sha256:95bf8d01eb96bb2ba3ffc39faff19da595176448e80871a7b362f4d2de58476c", size = 10376689, upload-time = "2026-03-31T19:07:25.732Z" },
+    { url = "https://files.pythonhosted.org/packages/03/b7/911f9962115acfa24e3b2ec9d4992dd994c38e8769e1b1d7680bb4d28a51/ty-0.0.29-py3-none-linux_armv6l.whl", hash = "sha256:b8a40955f7660d3eaceb0d964affc81b790c0765e7052921a5f861ff8a471c30", size = 10568206, upload-time = "2026-04-05T15:01:19.165Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/c3/fcae2167d4c77a97269f92f11d1b43b03617f81de1283d5d05b43432110c/ty-0.0.29-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6b6849adae15b00bbe2d3c5b078967dcb62eba37d38936b8eeb4c81a82d2e3b8", size = 10442530, upload-time = "2026-04-05T15:01:28.471Z" },
+    { url = "https://files.pythonhosted.org/packages/97/33/5a6bfa240cfcb9c36046ae2459fa9ea23238d20130d8656ff5ac4d6c012a/ty-0.0.29-py3-none-macosx_11_0_arm64.whl", hash = "sha256:dcdd9b17209788152f7b7ea815eda07989152325052fe690013537cc7904ce49", size = 9915735, upload-time = "2026-04-05T15:01:10.365Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/1e/318f45fae232118e81a6306c30f50de42c509c412128d5bd231eab699ffb/ty-0.0.29-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d8ed4789bae78ffaf94462c0d25589a734cab0366b86f2bbcb1bb90e1a7a169", size = 10419748, upload-time = "2026-04-05T15:01:32.375Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/a8/5687872e2ab5a0f7dd4fd8456eac31e9381ad4dc74961f6f29965ad4dd91/ty-0.0.29-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:91ec374b8565e0ad0900011c24641ebbef2da51adbd4fb69ff3280c8a7eceb02", size = 10394738, upload-time = "2026-04-05T15:01:06.473Z" },
+    { url = "https://files.pythonhosted.org/packages/de/68/015d118097eeb95e6a44c4abce4c0a28b7b9dfb3085b7f0ee48e4f099633/ty-0.0.29-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:298a8d5faa2502d3810bbbb47a030b9455495b9921594206043c785dd61548cf", size = 10910613, upload-time = "2026-04-05T15:01:17.17Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/01/47ce3c6c53e0670eadbe80756b167bf80ed6681d1ba57cfde2e8065a13d1/ty-0.0.29-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c8fba1a3524c6109d1e020d92301c79d41bf442fa8d335b9fa366239339cb70", size = 11475750, upload-time = "2026-04-05T15:01:30.461Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/cf/e361845b1081c9264ad5b7c963231bab03f2666865a9f2a115c4233f2137/ty-0.0.29-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c48adf88a70d264128c39ee922ed14a947817fced1e93c08c1a89c9244edcde", size = 11190055, upload-time = "2026-04-05T15:01:12.369Z" },
+    { url = "https://files.pythonhosted.org/packages/79/12/0fb0857e9a62cb11586e9a712103877bbf717f5fb570d16634408cfdefee/ty-0.0.29-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ce0a7a0e96bc7b42518cd3a1a6a6298ef64ff40ca4614355c1aa807059b5c6f", size = 11020539, upload-time = "2026-04-05T15:01:37.022Z" },
+    { url = "https://files.pythonhosted.org/packages/20/36/5a26753802083f80cd125db6c4348ad42b3c982ec36e718e0bf4c18f75e5/ty-0.0.29-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a6ac86a05b4a3731d45365ab97780acc7b8146fa62fccb3cbe94fe6546c67a97", size = 10396399, upload-time = "2026-04-05T15:01:26.167Z" },
+    { url = "https://files.pythonhosted.org/packages/00/e6/b4e75b5752239ab3ab400f19faef4dbef81d05aab5d3419fda0c062a3765/ty-0.0.29-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6bbbf53141af0f3150bf288d716263f1a3550054e4b3551ca866d38192ba9891", size = 10421461, upload-time = "2026-04-05T15:01:08.367Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/21/1084b5b609f9abed62070ec0b31c283a403832a6310c8bbc208bd45ee1e6/ty-0.0.29-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1c9e06b770c1d0ff5efc51e34312390db31d53fcf3088163f413030b42b74f84", size = 10599187, upload-time = "2026-04-05T15:01:23.52Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a1/ce19a2ca717bbcc1ee11378aba52ef70b6ce5b87245162a729d9fdc2360f/ty-0.0.29-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0307fe37e3f000ef1a4ae230bbaf511508a78d24a5e51b40902a21b09d5e6037", size = 11121198, upload-time = "2026-04-05T15:01:15.22Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/6b/f1430b279af704321566ce7ec2725d3d8258c2f815ebd93e474c64cd4543/ty-0.0.29-py3-none-win32.whl", hash = "sha256:7a2a898217960a825f8bc0087e1fdbaf379606175e98f9807187221d53a4a8ed", size = 9995331, upload-time = "2026-04-05T15:01:01.32Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/ef/3ef01c17785ff9a69378465c7d0faccd48a07b163554db0995e5d65a5a23/ty-0.0.29-py3-none-win_amd64.whl", hash = "sha256:fc1294200226b91615acbf34e0a9ad81caf98c081e9c6a912a31b0a7b603bc3f", size = 11023644, upload-time = "2026-04-05T15:01:04.432Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/55/87280a994d6a2d2647c65e12abbc997ed49835794366153c04c4d9304d76/ty-0.0.29-py3-none-win_arm64.whl", hash = "sha256:f9794bbd1bb3ce13f78c191d0c89ae4c63f52c12b6daa0c6fe220b90d019d12c", size = 10428165, upload-time = "2026-04-05T15:01:34.665Z" },
 ]
 
 [[package]]
@@ -499,15 +539,15 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.42.0"
+version = "0.44.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5e/da/6eee1ff8b6cbeed47eeb5229749168e81eb4b7b999a1a15a7176e51410c9/uvicorn-0.44.0.tar.gz", hash = "sha256:6c942071b68f07e178264b9152f1f16dfac5da85880c4ce06366a96d70d4f31e", size = 86947, upload-time = "2026-04-06T09:23:22.826Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/23/a5bbd9600dd607411fa644c06ff4951bec3a4d82c4b852374024359c19c0/uvicorn-0.44.0-py3-none-any.whl", hash = "sha256:ce937c99a2cc70279556967274414c087888e8cec9f9c94644dfca11bd3ced89", size = 69425, upload-time = "2026-04-06T09:23:21.524Z" },
 ]
 
 [[package]]