From caaf3a6a3d80881e9cd65838283e797f042f5a5c Mon Sep 17 00:00:00 2001
From: EterUltimate <1831303476@qq.com>
Date: Thu, 11 Jun 2026 19:07:15 +0800
Subject: [PATCH] perf: optimize LLM hook cache-friendly context injection

---
 README.md                             |  2 +-
 config.py                             | 34 +++++++++--
 docs/README.md                        |  2 +-
 docs/architecture.md                  |  4 +-
 docs/learning-flow.md                 |  6 +-
 services/hooks/llm_hook_handler.py    | 82 +++++++++++++++++++++------
 statics/messages.py                   |  2 +-
 tests/unit/test_config.py             |  8 +++
 tests/unit/test_config_service.py     |  4 ++
 tests/unit/test_feature_delegation.py | 65 +++++++++++++++++++++
 webui/services/config_service.py      | 16 ++++--
 11 files changed, 190 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index 95423c11..493d55cc 100644
--- a/README.md
+++ b/README.md
@@ -156,7 +156,7 @@ sequenceDiagram
     G->>Review: 写入人格审查 / 风格审查
     LLM->>H: 下一次请求前触发
     H->>DB: 读取已批准 few-shot / 黑话 / 记忆 / 社交关系
-    H->>LLM: 注入 extra_user_content_parts 或 system_prompt
+    H->>LLM: 临时 extra_user_content_parts 注入，旧版 fallback
 ```
 
 ```mermaid
diff --git a/config.py b/config.py
index 7fb3b13a..a44a7541 100644
--- a/config.py
+++ b/config.py
@@ -20,6 +20,16 @@
 SUPPORTED_DB_TYPES = {"sqlite", "mysql", "postgresql"}
 POSTGRESQL_DB_TYPE_ALIASES = {"postgres", "pg", "pgsql"}
 HIGH_COST_LIGHTRAG_QUERY_MODES = {"hybrid", "mix"}
+CACHE_FRIENDLY_LLM_HOOK_TARGET = "extra_user_content_parts"
+LEGACY_LLM_HOOK_TARGETS = {"system_prompt", "prompt"}
+LLM_HOOK_TARGET_ALIASES = {
+    "extra_user_content_parts": CACHE_FRIENDLY_LLM_HOOK_TARGET,
+    "extra_user_content": CACHE_FRIENDLY_LLM_HOOK_TARGET,
+    "user_content": CACHE_FRIENDLY_LLM_HOOK_TARGET,
+    "user_message_tail": CACHE_FRIENDLY_LLM_HOOK_TARGET,
+    "system_prompt": "system_prompt",
+    "prompt": "prompt",
+}
 LIGHTRAG_LIVINGMEMORY_COST_WARNING = (
     "当前配置选择 LightRAG 的 hybrid/mix 查询，并允许记忆委托给 LivingMemory；"
     "当 LivingMemory 插件已加载时，会叠加 LightRAG 全局/混合检索与 LivingMemory 记忆检索，"
@@ -283,11 +293,10 @@ class PluginConfig(BaseModel):
     include_mood_info: bool = True # 注入Bot情绪信息
     context_injection_position: str = "start" # 上下文注入位置: "start" 或 "end"
 
-    # LLM Hook 注入位置设置（v1.1.1新增）
-    # 控制注入内容添加到 req.system_prompt 还是 req.prompt
-    # - "system_prompt": 注入到系统提示（推荐，不会被保存到对话历史）
-    # - "prompt": 注入到用户消息（旧版行为，会导致对话历史膨胀）
-    llm_hook_injection_target: str = "system_prompt" # 可选值: "system_prompt" 或 "prompt"
+    # LLM Hook 注入位置设置
+    # 动态上下文优先注入 req.extra_user_content_parts，避免改动稳定 system_prompt
+    # 以提高 provider prefix cache 命中率；旧版 AstrBot 不支持时才按 legacy 目标回退。
+    llm_hook_injection_target: str = CACHE_FRIENDLY_LLM_HOOK_TARGET
 
     # 目标驱动对话配置
     enable_goal_driven_chat: bool = False # 启用目标驱动对话
@@ -332,6 +341,19 @@ def _normalize_target_qq_list(cls, value) -> List[str]:
     def _normalize_target_blacklist(cls, value) -> List[str]:
         return normalize_identifier_list(value)
 
+    @field_validator("llm_hook_injection_target", mode="before")
+    @classmethod
+    def _normalize_llm_hook_injection_target(cls, value) -> str:
+        target = str(value or CACHE_FRIENDLY_LLM_HOOK_TARGET).strip()
+        normalized = LLM_HOOK_TARGET_ALIASES.get(target)
+        if normalized:
+            return normalized
+        logger.warning(
+            f"未知 LLM Hook 注入目标 {value!r}，"
+            "已回退到 cache-friendly extra_user_content_parts"
+        )
+        return CACHE_FRIENDLY_LLM_HOOK_TARGET
+
     def model_post_init(self, __context) -> None:
         """Normalize and apply the configured AstrBot log level."""
         normalized_level = normalize_log_level(
@@ -549,7 +571,7 @@ def create_from_config(cls, config: dict, data_dir: Optional[str] = None) -> 'Pl
             llm_hook_context_timeout=float(runtime_internal_settings.get('llm_hook_context_timeout', 3.0)),
             llm_hook_injection_target=runtime_internal_settings.get(
                 'llm_hook_injection_target',
-                'system_prompt',
+                CACHE_FRIENDLY_LLM_HOOK_TARGET,
             ),
 
             # 社交上下文注入设置
diff --git a/docs/README.md b/docs/README.md
index 88517c1f..880358c1 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -70,7 +70,7 @@ flowchart TD
     M["Bot 回复发送后"] --> N["on_bot_message_sent"]
     N --> O["BotMessage"]
     P["下一次 LLM 请求"] --> Q["LLMHookHandler.handle"]
-    Q --> R["extra_user_content_parts 或 system_prompt 注入"]
+    Q --> R["临时 extra_user_content_parts 注入，旧版 fallback"]
     FD --> S["LivingMemory: 长期记忆委托"]
     FD --> T["Group Chat Plus: 回复委托"]
 ```
diff --git a/docs/architecture.md b/docs/architecture.md
index a3da6ac6..7640c489 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -121,8 +121,8 @@ AstrBot 完成 handler 绑定后调用 `initialize()`，它委托给 `PluginLife
 
 1. 委托 `LLMHookHandler.handle(event, req)`。
 2. 并行拉取社交上下文、V2 上下文、多样性提示、黑话解释、已批准 few-shot。
-3. 优先写入 `req.extra_user_content_parts`。
-4. 旧版 AstrBot 缺少 `extra_user_content_parts` 时回退追加 `req.system_prompt`。
+3. 优先写入 `req.extra_user_content_parts`，并在框架支持时标记为临时 `TextPart`。
+4. 旧版 AstrBot 缺少 `extra_user_content_parts` 时才回退追加 `req.system_prompt` 或 `req.prompt`。
 
 ### `on_bot_message_sent`
 
diff --git a/docs/learning-flow.md b/docs/learning-flow.md
index 3f18afb5..93f088b4 100644
--- a/docs/learning-flow.md
+++ b/docs/learning-flow.md
@@ -230,10 +230,10 @@ WebUI 通过统一人格审查接口处理传统人格更新、渐进式人格
 
 注入优先级:
 
-1. 使用 `req.extra_user_content_parts.append(TextPart(...))`。
-2. 如果当前 AstrBot 版本不支持，回退追加 `req.system_prompt`。
+1. 使用 `req.extra_user_content_parts.append(TextPart(...))`，并在框架支持时调用 `mark_as_temp()`。
+2. 如果当前 AstrBot 版本不支持，才按 legacy 配置回退追加 `req.system_prompt` 或 `req.prompt`。
 
-注入内容会包在 `<context>...</context>` 中。这样可以保持系统提示相对稳定，降低对 LLM prefix cache 的影响。
+注入内容会包在 `<context>...</context>` 中。这样可以保持系统提示和历史前缀相对稳定，降低对 LLM prefix cache 的影响，并避免动态上下文持久化到后续对话。
 
 ## 9. 功能融合对学习链路的影响
 
diff --git a/services/hooks/llm_hook_handler.py b/services/hooks/llm_hook_handler.py
index dfd63f3e..cc8c9c45 100644
--- a/services/hooks/llm_hook_handler.py
+++ b/services/hooks/llm_hook_handler.py
@@ -13,6 +13,11 @@
 from astrbot.api.event import AstrMessageEvent
 
 from ..monitoring.instrumentation import monitored
+try:
+    from ...config import CACHE_FRIENDLY_LLM_HOOK_TARGET, LEGACY_LLM_HOOK_TARGETS
+except ImportError:
+    from config import CACHE_FRIENDLY_LLM_HOOK_TARGET, LEGACY_LLM_HOOK_TARGETS
+
 try:
     from astrbot.core.agent.message import TextPart
 except ImportError:
@@ -407,32 +412,24 @@ def _inject(
         self, req: Any, injections: List[str], hook_start: float
     ) -> None:
         injection_text = "\n\n".join(injections)
+        context_text = f"<context>\n{injection_text}\n</context>"
+        target = getattr(
+            self._config,
+            "llm_hook_injection_target",
+            CACHE_FRIENDLY_LLM_HOOK_TARGET,
+        )
 
         # Use AstrBot's extra_user_content_parts API to inject context.
         # This keeps system_prompt stable for LLM API prefix caching,
         # while appending dynamic context as extra content blocks after
         # the user message.
-        if hasattr(req, "extra_user_content_parts") and TextPart is not None:
-            req.extra_user_content_parts.append(
-                TextPart(text=f"<context>\n{injection_text}\n</context>")
-            )
+        if self._append_extra_user_content(req, context_text):
             logger.debug(
                 f"[LLM Hook] extra_user_content_parts 注入完成 - "
-                f"新增: {len(injection_text)} chars"
+                f"新增: {len(injection_text)} chars, target={target}"
             )
         else:
-            # Fallback for older AstrBot versions without extra_user_content_parts
-            if not req.system_prompt:
-                req.system_prompt = ""
-            req.system_prompt += "\n\n" + injection_text
-            logger.debug(
-                f"[LLM Hook] system_prompt fallback 注入完成 - "
-                f"新增: {len(injection_text)} chars"
-            )
-            logger.warning(
-                "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts，"
-                "回退到 system_prompt 注入（会影响缓存命中率）"
-            )
+            self._legacy_inject(req, injection_text, target)
 
         current_style = self._diversity_manager.get_current_style()
         current_pattern = self._diversity_manager.get_current_pattern()
@@ -444,3 +441,54 @@ def _inject(
             f"耗时: {time.time() - hook_start:.3f}s"
         )
         logger.debug(f"[LLM Hook] 注入内容预览: {injection_text[:200]}...")
+
+    @staticmethod
+    def _append_extra_user_content(req: Any, context_text: str) -> bool:
+        """Append dynamic context as a temporary AstrBot content part when possible."""
+        content_parts = getattr(req, "extra_user_content_parts", None)
+        if (
+            TextPart is None
+            or content_parts is None
+            or not hasattr(content_parts, "append")
+        ):
+            return False
+
+        part = TextPart(text=context_text)
+        mark_as_temp = getattr(part, "mark_as_temp", None)
+        if callable(mark_as_temp):
+            mark_as_temp()
+        content_parts.append(part)
+        return True
+
+    @staticmethod
+    def _legacy_inject(req: Any, injection_text: str, target: str) -> None:
+        """Fallback for old AstrBot versions without extra_user_content_parts."""
+        fallback_target = target if target in LEGACY_LLM_HOOK_TARGETS else "system_prompt"
+
+        if fallback_target == "prompt":
+            prompt = getattr(req, "prompt", "") or ""
+            req.prompt = f"{prompt}\n\n{injection_text}" if prompt else injection_text
+            logger.debug(
+                f"[LLM Hook] prompt fallback 注入完成 - "
+                f"新增: {len(injection_text)} chars"
+            )
+            logger.warning(
+                "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts，"
+                "回退到 prompt 注入（可能膨胀对话历史并降低缓存命中率）"
+            )
+            return
+
+        system_prompt = getattr(req, "system_prompt", "") or ""
+        req.system_prompt = (
+            f"{system_prompt}\n\n{injection_text}"
+            if system_prompt
+            else injection_text
+        )
+        logger.debug(
+            f"[LLM Hook] system_prompt fallback 注入完成 - "
+            f"新增: {len(injection_text)} chars"
+        )
+        logger.warning(
+            "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts，"
+            "回退到 system_prompt 注入（会影响缓存命中率）"
+        )
diff --git a/statics/messages.py b/statics/messages.py
index 084ad319..ef784f6c 100644
--- a/statics/messages.py
+++ b/statics/messages.py
@@ -292,7 +292,7 @@ class LogMessages:
     AFFECTION_PROCESSING_SUCCESS = "好感度处理成功: {result}"
     AFFECTION_PROCESSING_FAILED = "好感度系统处理失败: {error}"
     ENHANCED_INTERACTION_FAILED = "增强交互处理失败: {error}"
-    LLM_REQUEST_HOOK_SUCCESS = "已注入情绪状态到system_prompt，群组: {group_id}"
+    LLM_REQUEST_HOOK_SUCCESS = "已注入临时 LLM 上下文，群组: {group_id}"
     LLM_REQUEST_HOOK_FAILED = "LLM请求hook处理失败: {error}"
     PLUGIN_CONFIG_SAVED = "插件配置已保存"
     PLUGIN_UNLOAD_SUCCESS = "自学习插件已安全卸载"
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index daf2bce8..d1bf0b72 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -45,6 +45,7 @@ def test_create_default_instance(self):
         assert config.web_interface_port == 7833
         assert config.web_interface_host == "0.0.0.0"
         assert config.log_level == "info"
+        assert config.llm_hook_injection_target == "extra_user_content_parts"
 
     def test_create_default_classmethod(self):
         """Test the create_default classmethod."""
@@ -349,6 +350,13 @@ def test_create_from_empty_config(self):
         assert config.target_qq_list == []
         assert config.learning_interval_hours == 6
         assert config.db_type == 'postgresql'
+        assert config.llm_hook_injection_target == 'extra_user_content_parts'
+
+    def test_llm_hook_injection_target_aliases_normalize_to_cache_friendly_default(self):
+        """Short aliases should still resolve to the cache-friendly AstrBot API."""
+        config = PluginConfig(llm_hook_injection_target="user_message_tail")
+
+        assert config.llm_hook_injection_target == "extra_user_content_parts"
 
     def test_target_list_blank_values_keep_full_learning_default(self):
         """Blank settings-page rows should not disable full learning."""
diff --git a/tests/unit/test_config_service.py b/tests/unit/test_config_service.py
index e6d04a03..dd2b75c5 100644
--- a/tests/unit/test_config_service.py
+++ b/tests/unit/test_config_service.py
@@ -159,6 +159,10 @@ async def test_get_config_schema_includes_full_settings(self, tmp_path):
         assert runtime_fields["messages_db_path"]["editable"] is False
         assert runtime_fields["enable_llm_hooks"]["widget"] == "toggle"
         assert runtime_fields["enable_llm_hooks"]["value"] is False
+        hook_target = runtime_fields["llm_hook_injection_target"]
+        assert hook_target["value"] == "extra_user_content_parts"
+        assert hook_target["options"][0]["value"] == "extra_user_content_parts"
+        assert "prefix cache" in hook_target["hint"]
 
         basic_fields = {field["key"]: field for field in groups["Self_Learning_Basic"]["fields"]}
         assert basic_fields["enable_webui_password"]["widget"] == "toggle"
diff --git a/tests/unit/test_feature_delegation.py b/tests/unit/test_feature_delegation.py
index dc9180c0..65ceeead 100644
--- a/tests/unit/test_feature_delegation.py
+++ b/tests/unit/test_feature_delegation.py
@@ -13,6 +13,7 @@
 from self_learning_EterU.config import PluginConfig
 from self_learning_EterU.core.feature_delegation import FeatureDelegation
 from self_learning_EterU.core.factory import ServiceFactory
+from self_learning_EterU.services.hooks import llm_hook_handler as llm_hook_module
 from self_learning_EterU.services.hooks.llm_hook_handler import LLMHookHandler
 
 
@@ -117,6 +118,70 @@ async def test_llm_hook_handle_returns_without_context_fetches_when_disabled():
     assert req.extra_user_content_parts == []
 
 
+def test_llm_hook_injects_temp_extra_user_content_without_touching_system_prompt(monkeypatch):
+    class FakeTextPart:
+        def __init__(self, text):
+            self.text = text
+            self.temp = False
+
+        def mark_as_temp(self):
+            self.temp = True
+
+    monkeypatch.setattr(llm_hook_module, "TextPart", FakeTextPart)
+    handler = LLMHookHandler(
+        plugin_config=SimpleNamespace(
+            llm_hook_injection_target="extra_user_content_parts"
+        ),
+        diversity_manager=SimpleNamespace(
+            get_current_style=lambda: "style",
+            get_current_pattern=lambda: "pattern",
+        ),
+        social_context_injector=None,
+        v2_integration=None,
+        jargon_query_service=None,
+        temporary_persona_updater=None,
+        perf_tracker=SimpleNamespace(record=lambda payload: None),
+        group_id_to_unified_origin={},
+        db_manager=None,
+    )
+    req = SimpleNamespace(
+        prompt="用户消息",
+        system_prompt="stable system prompt",
+        extra_user_content_parts=[],
+    )
+
+    handler._inject(req, ["dynamic context"], 0)
+
+    assert req.system_prompt == "stable system prompt"
+    assert len(req.extra_user_content_parts) == 1
+    assert req.extra_user_content_parts[0].text == "<context>\ndynamic context\n</context>"
+    assert req.extra_user_content_parts[0].temp is True
+
+
+def test_llm_hook_legacy_prompt_fallback_when_extra_parts_unavailable(monkeypatch):
+    monkeypatch.setattr(llm_hook_module, "TextPart", None)
+    handler = LLMHookHandler(
+        plugin_config=SimpleNamespace(llm_hook_injection_target="prompt"),
+        diversity_manager=SimpleNamespace(
+            get_current_style=lambda: "style",
+            get_current_pattern=lambda: "pattern",
+        ),
+        social_context_injector=None,
+        v2_integration=None,
+        jargon_query_service=None,
+        temporary_persona_updater=None,
+        perf_tracker=SimpleNamespace(record=lambda payload: None),
+        group_id_to_unified_origin={},
+        db_manager=None,
+    )
+    req = SimpleNamespace(prompt="用户消息", system_prompt="stable system prompt")
+
+    handler._inject(req, ["legacy context"], 0)
+
+    assert req.system_prompt == "stable system prompt"
+    assert req.prompt == "用户消息\n\nlegacy context"
+
+
 @pytest.mark.asyncio
 async def test_llm_hook_omits_local_v2_memories_when_livingmemory_delegated():
     v2 = SimpleNamespace(
diff --git a/webui/services/config_service.py b/webui/services/config_service.py
index cc007d3b..3db1b1fb 100644
--- a/webui/services/config_service.py
+++ b/webui/services/config_service.py
@@ -150,11 +150,19 @@ def _load_schema_definition() -> Dict[str, Any]:
             "llm_hook_injection_target": {
                 "description": "LLM Hook 注入目标",
                 "type": "string",
-                "hint": "控制注入到 system_prompt 还是 prompt",
-                "default": "system_prompt",
+                "hint": (
+                    "推荐保持 extra_user_content_parts：动态上下文会追加到用户消息尾部"
+                    "并标记为临时内容，避免改动 system_prompt 影响 provider prefix cache。"
+                    "system_prompt/prompt 仅作为旧版 AstrBot fallback"
+                ),
+                "default": "extra_user_content_parts",
                 "options": [
-                    {"value": "system_prompt", "label": "system_prompt"},
-                    {"value": "prompt", "label": "prompt"},
+                    {
+                        "value": "extra_user_content_parts",
+                        "label": "extra_user_content_parts（推荐）",
+                    },
+                    {"value": "system_prompt", "label": "system_prompt（旧版 fallback）"},
+                    {"value": "prompt", "label": "prompt（旧版 fallback）"},
                 ],
             },
             "enable_llm_hooks": {