From caaf3a6a3d80881e9cd65838283e797f042f5a5c Mon Sep 17 00:00:00 2001
From: EterUltimate <1831303476@qq.com>
Date: Thu, 11 Jun 2026 19:07:15 +0800
Subject: [PATCH] perf: optimize LLM hook cache-friendly context injection
---
README.md | 2 +-
config.py | 34 +++++++++--
docs/README.md | 2 +-
docs/architecture.md | 4 +-
docs/learning-flow.md | 6 +-
services/hooks/llm_hook_handler.py | 82 +++++++++++++++++++++------
statics/messages.py | 2 +-
tests/unit/test_config.py | 8 +++
tests/unit/test_config_service.py | 4 ++
tests/unit/test_feature_delegation.py | 65 +++++++++++++++++++++
webui/services/config_service.py | 16 ++++--
11 files changed, 190 insertions(+), 35 deletions(-)
diff --git a/README.md b/README.md
index 95423c11..493d55cc 100644
--- a/README.md
+++ b/README.md
@@ -156,7 +156,7 @@ sequenceDiagram
G->>Review: 写入人格审查 / 风格审查
LLM->>H: 下一次请求前触发
H->>DB: 读取已批准 few-shot / 黑话 / 记忆 / 社交关系
- H->>LLM: 注入 extra_user_content_parts 或 system_prompt
+ H->>LLM: 临时 extra_user_content_parts 注入,旧版 fallback
```
```mermaid
diff --git a/config.py b/config.py
index 7fb3b13a..a44a7541 100644
--- a/config.py
+++ b/config.py
@@ -20,6 +20,16 @@
SUPPORTED_DB_TYPES = {"sqlite", "mysql", "postgresql"}
POSTGRESQL_DB_TYPE_ALIASES = {"postgres", "pg", "pgsql"}
HIGH_COST_LIGHTRAG_QUERY_MODES = {"hybrid", "mix"}
+CACHE_FRIENDLY_LLM_HOOK_TARGET = "extra_user_content_parts"
+LEGACY_LLM_HOOK_TARGETS = {"system_prompt", "prompt"}
+LLM_HOOK_TARGET_ALIASES = {
+ "extra_user_content_parts": CACHE_FRIENDLY_LLM_HOOK_TARGET,
+ "extra_user_content": CACHE_FRIENDLY_LLM_HOOK_TARGET,
+ "user_content": CACHE_FRIENDLY_LLM_HOOK_TARGET,
+ "user_message_tail": CACHE_FRIENDLY_LLM_HOOK_TARGET,
+ "system_prompt": "system_prompt",
+ "prompt": "prompt",
+}
LIGHTRAG_LIVINGMEMORY_COST_WARNING = (
"当前配置选择 LightRAG 的 hybrid/mix 查询,并允许记忆委托给 LivingMemory;"
"当 LivingMemory 插件已加载时,会叠加 LightRAG 全局/混合检索与 LivingMemory 记忆检索,"
@@ -283,11 +293,10 @@ class PluginConfig(BaseModel):
include_mood_info: bool = True # 注入Bot情绪信息
context_injection_position: str = "start" # 上下文注入位置: "start" 或 "end"
- # LLM Hook 注入位置设置(v1.1.1新增)
- # 控制注入内容添加到 req.system_prompt 还是 req.prompt
- # - "system_prompt": 注入到系统提示(推荐,不会被保存到对话历史)
- # - "prompt": 注入到用户消息(旧版行为,会导致对话历史膨胀)
- llm_hook_injection_target: str = "system_prompt" # 可选值: "system_prompt" 或 "prompt"
+ # LLM Hook 注入位置设置
+ # 动态上下文优先注入 req.extra_user_content_parts,避免改动稳定 system_prompt
+ # 以提高 provider prefix cache 命中率;旧版 AstrBot 不支持时才按 legacy 目标回退。
+ llm_hook_injection_target: str = CACHE_FRIENDLY_LLM_HOOK_TARGET
# 目标驱动对话配置
enable_goal_driven_chat: bool = False # 启用目标驱动对话
@@ -332,6 +341,19 @@ def _normalize_target_qq_list(cls, value) -> List[str]:
def _normalize_target_blacklist(cls, value) -> List[str]:
return normalize_identifier_list(value)
+ @field_validator("llm_hook_injection_target", mode="before")
+ @classmethod
+ def _normalize_llm_hook_injection_target(cls, value) -> str:
+ target = str(value or CACHE_FRIENDLY_LLM_HOOK_TARGET).strip()
+ normalized = LLM_HOOK_TARGET_ALIASES.get(target)
+ if normalized:
+ return normalized
+ logger.warning(
+ f"未知 LLM Hook 注入目标 {value!r},"
+ "已回退到 cache-friendly extra_user_content_parts"
+ )
+ return CACHE_FRIENDLY_LLM_HOOK_TARGET
+
def model_post_init(self, __context) -> None:
"""Normalize and apply the configured AstrBot log level."""
normalized_level = normalize_log_level(
@@ -549,7 +571,7 @@ def create_from_config(cls, config: dict, data_dir: Optional[str] = None) -> 'Pl
llm_hook_context_timeout=float(runtime_internal_settings.get('llm_hook_context_timeout', 3.0)),
llm_hook_injection_target=runtime_internal_settings.get(
'llm_hook_injection_target',
- 'system_prompt',
+ CACHE_FRIENDLY_LLM_HOOK_TARGET,
),
# 社交上下文注入设置
diff --git a/docs/README.md b/docs/README.md
index 88517c1f..880358c1 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -70,7 +70,7 @@ flowchart TD
M["Bot 回复发送后"] --> N["on_bot_message_sent"]
N --> O["BotMessage"]
P["下一次 LLM 请求"] --> Q["LLMHookHandler.handle"]
- Q --> R["extra_user_content_parts 或 system_prompt 注入"]
+ Q --> R["临时 extra_user_content_parts 注入,旧版 fallback"]
FD --> S["LivingMemory: 长期记忆委托"]
FD --> T["Group Chat Plus: 回复委托"]
```
diff --git a/docs/architecture.md b/docs/architecture.md
index a3da6ac6..7640c489 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -121,8 +121,8 @@ AstrBot 完成 handler 绑定后调用 `initialize()`,它委托给 `PluginLife
1. 委托 `LLMHookHandler.handle(event, req)`。
2. 并行拉取社交上下文、V2 上下文、多样性提示、黑话解释、已批准 few-shot。
-3. 优先写入 `req.extra_user_content_parts`。
-4. 旧版 AstrBot 缺少 `extra_user_content_parts` 时回退追加 `req.system_prompt`。
+3. 优先写入 `req.extra_user_content_parts`,并在框架支持时标记为临时 `TextPart`。
+4. 旧版 AstrBot 缺少 `extra_user_content_parts` 时才回退追加 `req.system_prompt` 或 `req.prompt`。
### `on_bot_message_sent`
diff --git a/docs/learning-flow.md b/docs/learning-flow.md
index 3f18afb5..93f088b4 100644
--- a/docs/learning-flow.md
+++ b/docs/learning-flow.md
@@ -230,10 +230,10 @@ WebUI 通过统一人格审查接口处理传统人格更新、渐进式人格
注入优先级:
-1. 使用 `req.extra_user_content_parts.append(TextPart(...))`。
-2. 如果当前 AstrBot 版本不支持,回退追加 `req.system_prompt`。
+1. 使用 `req.extra_user_content_parts.append(TextPart(...))`,并在框架支持时调用 `mark_as_temp()`。
+2. 如果当前 AstrBot 版本不支持,才按 legacy 配置回退追加 `req.system_prompt` 或 `req.prompt`。
-注入内容会包在 `...` 中。这样可以保持系统提示相对稳定,降低对 LLM prefix cache 的影响。
+注入内容会包在 `...` 中。这样可以保持系统提示和历史前缀相对稳定,降低对 LLM prefix cache 的影响,并避免动态上下文持久化到后续对话。
## 9. 功能融合对学习链路的影响
diff --git a/services/hooks/llm_hook_handler.py b/services/hooks/llm_hook_handler.py
index dfd63f3e..cc8c9c45 100644
--- a/services/hooks/llm_hook_handler.py
+++ b/services/hooks/llm_hook_handler.py
@@ -13,6 +13,11 @@
from astrbot.api.event import AstrMessageEvent
from ..monitoring.instrumentation import monitored
+try:
+ from ...config import CACHE_FRIENDLY_LLM_HOOK_TARGET, LEGACY_LLM_HOOK_TARGETS
+except ImportError:
+ from config import CACHE_FRIENDLY_LLM_HOOK_TARGET, LEGACY_LLM_HOOK_TARGETS
+
try:
from astrbot.core.agent.message import TextPart
except ImportError:
@@ -407,32 +412,24 @@ def _inject(
self, req: Any, injections: List[str], hook_start: float
) -> None:
injection_text = "\n\n".join(injections)
+ context_text = f"\n{injection_text}\n"
+ target = getattr(
+ self._config,
+ "llm_hook_injection_target",
+ CACHE_FRIENDLY_LLM_HOOK_TARGET,
+ )
# Use AstrBot's extra_user_content_parts API to inject context.
# This keeps system_prompt stable for LLM API prefix caching,
# while appending dynamic context as extra content blocks after
# the user message.
- if hasattr(req, "extra_user_content_parts") and TextPart is not None:
- req.extra_user_content_parts.append(
- TextPart(text=f"\n{injection_text}\n")
- )
+ if self._append_extra_user_content(req, context_text):
logger.debug(
f"[LLM Hook] extra_user_content_parts 注入完成 - "
- f"新增: {len(injection_text)} chars"
+ f"新增: {len(injection_text)} chars, target={target}"
)
else:
- # Fallback for older AstrBot versions without extra_user_content_parts
- if not req.system_prompt:
- req.system_prompt = ""
- req.system_prompt += "\n\n" + injection_text
- logger.debug(
- f"[LLM Hook] system_prompt fallback 注入完成 - "
- f"新增: {len(injection_text)} chars"
- )
- logger.warning(
- "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts,"
- "回退到 system_prompt 注入(会影响缓存命中率)"
- )
+ self._legacy_inject(req, injection_text, target)
current_style = self._diversity_manager.get_current_style()
current_pattern = self._diversity_manager.get_current_pattern()
@@ -444,3 +441,54 @@ def _inject(
f"耗时: {time.time() - hook_start:.3f}s"
)
logger.debug(f"[LLM Hook] 注入内容预览: {injection_text[:200]}...")
+
+ @staticmethod
+ def _append_extra_user_content(req: Any, context_text: str) -> bool:
+ """Append dynamic context as a temporary AstrBot content part when possible."""
+ content_parts = getattr(req, "extra_user_content_parts", None)
+ if (
+ TextPart is None
+ or content_parts is None
+ or not hasattr(content_parts, "append")
+ ):
+ return False
+
+ part = TextPart(text=context_text)
+ mark_as_temp = getattr(part, "mark_as_temp", None)
+ if callable(mark_as_temp):
+ mark_as_temp()
+ content_parts.append(part)
+ return True
+
+ @staticmethod
+ def _legacy_inject(req: Any, injection_text: str, target: str) -> None:
+ """Fallback for old AstrBot versions without extra_user_content_parts."""
+ fallback_target = target if target in LEGACY_LLM_HOOK_TARGETS else "system_prompt"
+
+ if fallback_target == "prompt":
+ prompt = getattr(req, "prompt", "") or ""
+ req.prompt = f"{prompt}\n\n{injection_text}" if prompt else injection_text
+ logger.debug(
+ f"[LLM Hook] prompt fallback 注入完成 - "
+ f"新增: {len(injection_text)} chars"
+ )
+ logger.warning(
+ "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts,"
+ "回退到 prompt 注入(可能膨胀对话历史并降低缓存命中率)"
+ )
+ return
+
+ system_prompt = getattr(req, "system_prompt", "") or ""
+ req.system_prompt = (
+ f"{system_prompt}\n\n{injection_text}"
+ if system_prompt
+ else injection_text
+ )
+ logger.debug(
+ f"[LLM Hook] system_prompt fallback 注入完成 - "
+ f"新增: {len(injection_text)} chars"
+ )
+ logger.warning(
+ "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts,"
+ "回退到 system_prompt 注入(会影响缓存命中率)"
+ )
diff --git a/statics/messages.py b/statics/messages.py
index 084ad319..ef784f6c 100644
--- a/statics/messages.py
+++ b/statics/messages.py
@@ -292,7 +292,7 @@ class LogMessages:
AFFECTION_PROCESSING_SUCCESS = "好感度处理成功: {result}"
AFFECTION_PROCESSING_FAILED = "好感度系统处理失败: {error}"
ENHANCED_INTERACTION_FAILED = "增强交互处理失败: {error}"
- LLM_REQUEST_HOOK_SUCCESS = "已注入情绪状态到system_prompt,群组: {group_id}"
+ LLM_REQUEST_HOOK_SUCCESS = "已注入临时 LLM 上下文,群组: {group_id}"
LLM_REQUEST_HOOK_FAILED = "LLM请求hook处理失败: {error}"
PLUGIN_CONFIG_SAVED = "插件配置已保存"
PLUGIN_UNLOAD_SUCCESS = "自学习插件已安全卸载"
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index daf2bce8..d1bf0b72 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -45,6 +45,7 @@ def test_create_default_instance(self):
assert config.web_interface_port == 7833
assert config.web_interface_host == "0.0.0.0"
assert config.log_level == "info"
+ assert config.llm_hook_injection_target == "extra_user_content_parts"
def test_create_default_classmethod(self):
"""Test the create_default classmethod."""
@@ -349,6 +350,13 @@ def test_create_from_empty_config(self):
assert config.target_qq_list == []
assert config.learning_interval_hours == 6
assert config.db_type == 'postgresql'
+ assert config.llm_hook_injection_target == 'extra_user_content_parts'
+
+ def test_llm_hook_injection_target_aliases_normalize_to_cache_friendly_default(self):
+ """Short aliases should still resolve to the cache-friendly AstrBot API."""
+ config = PluginConfig(llm_hook_injection_target="user_message_tail")
+
+ assert config.llm_hook_injection_target == "extra_user_content_parts"
def test_target_list_blank_values_keep_full_learning_default(self):
"""Blank settings-page rows should not disable full learning."""
diff --git a/tests/unit/test_config_service.py b/tests/unit/test_config_service.py
index e6d04a03..dd2b75c5 100644
--- a/tests/unit/test_config_service.py
+++ b/tests/unit/test_config_service.py
@@ -159,6 +159,10 @@ async def test_get_config_schema_includes_full_settings(self, tmp_path):
assert runtime_fields["messages_db_path"]["editable"] is False
assert runtime_fields["enable_llm_hooks"]["widget"] == "toggle"
assert runtime_fields["enable_llm_hooks"]["value"] is False
+ hook_target = runtime_fields["llm_hook_injection_target"]
+ assert hook_target["value"] == "extra_user_content_parts"
+ assert hook_target["options"][0]["value"] == "extra_user_content_parts"
+ assert "prefix cache" in hook_target["hint"]
basic_fields = {field["key"]: field for field in groups["Self_Learning_Basic"]["fields"]}
assert basic_fields["enable_webui_password"]["widget"] == "toggle"
diff --git a/tests/unit/test_feature_delegation.py b/tests/unit/test_feature_delegation.py
index dc9180c0..65ceeead 100644
--- a/tests/unit/test_feature_delegation.py
+++ b/tests/unit/test_feature_delegation.py
@@ -13,6 +13,7 @@
from self_learning_EterU.config import PluginConfig
from self_learning_EterU.core.feature_delegation import FeatureDelegation
from self_learning_EterU.core.factory import ServiceFactory
+from self_learning_EterU.services.hooks import llm_hook_handler as llm_hook_module
from self_learning_EterU.services.hooks.llm_hook_handler import LLMHookHandler
@@ -117,6 +118,70 @@ async def test_llm_hook_handle_returns_without_context_fetches_when_disabled():
assert req.extra_user_content_parts == []
+def test_llm_hook_injects_temp_extra_user_content_without_touching_system_prompt(monkeypatch):
+ class FakeTextPart:
+ def __init__(self, text):
+ self.text = text
+ self.temp = False
+
+ def mark_as_temp(self):
+ self.temp = True
+
+ monkeypatch.setattr(llm_hook_module, "TextPart", FakeTextPart)
+ handler = LLMHookHandler(
+ plugin_config=SimpleNamespace(
+ llm_hook_injection_target="extra_user_content_parts"
+ ),
+ diversity_manager=SimpleNamespace(
+ get_current_style=lambda: "style",
+ get_current_pattern=lambda: "pattern",
+ ),
+ social_context_injector=None,
+ v2_integration=None,
+ jargon_query_service=None,
+ temporary_persona_updater=None,
+ perf_tracker=SimpleNamespace(record=lambda payload: None),
+ group_id_to_unified_origin={},
+ db_manager=None,
+ )
+ req = SimpleNamespace(
+ prompt="用户消息",
+ system_prompt="stable system prompt",
+ extra_user_content_parts=[],
+ )
+
+ handler._inject(req, ["dynamic context"], 0)
+
+ assert req.system_prompt == "stable system prompt"
+ assert len(req.extra_user_content_parts) == 1
+ assert req.extra_user_content_parts[0].text == "\ndynamic context\n"
+ assert req.extra_user_content_parts[0].temp is True
+
+
+def test_llm_hook_legacy_prompt_fallback_when_extra_parts_unavailable(monkeypatch):
+ monkeypatch.setattr(llm_hook_module, "TextPart", None)
+ handler = LLMHookHandler(
+ plugin_config=SimpleNamespace(llm_hook_injection_target="prompt"),
+ diversity_manager=SimpleNamespace(
+ get_current_style=lambda: "style",
+ get_current_pattern=lambda: "pattern",
+ ),
+ social_context_injector=None,
+ v2_integration=None,
+ jargon_query_service=None,
+ temporary_persona_updater=None,
+ perf_tracker=SimpleNamespace(record=lambda payload: None),
+ group_id_to_unified_origin={},
+ db_manager=None,
+ )
+ req = SimpleNamespace(prompt="用户消息", system_prompt="stable system prompt")
+
+ handler._inject(req, ["legacy context"], 0)
+
+ assert req.system_prompt == "stable system prompt"
+ assert req.prompt == "用户消息\n\nlegacy context"
+
+
@pytest.mark.asyncio
async def test_llm_hook_omits_local_v2_memories_when_livingmemory_delegated():
v2 = SimpleNamespace(
diff --git a/webui/services/config_service.py b/webui/services/config_service.py
index cc007d3b..3db1b1fb 100644
--- a/webui/services/config_service.py
+++ b/webui/services/config_service.py
@@ -150,11 +150,19 @@ def _load_schema_definition() -> Dict[str, Any]:
"llm_hook_injection_target": {
"description": "LLM Hook 注入目标",
"type": "string",
- "hint": "控制注入到 system_prompt 还是 prompt",
- "default": "system_prompt",
+ "hint": (
+ "推荐保持 extra_user_content_parts:动态上下文会追加到用户消息尾部"
+ "并标记为临时内容,避免改动 system_prompt 影响 provider prefix cache。"
+ "system_prompt/prompt 仅作为旧版 AstrBot fallback"
+ ),
+ "default": "extra_user_content_parts",
"options": [
- {"value": "system_prompt", "label": "system_prompt"},
- {"value": "prompt", "label": "prompt"},
+ {
+ "value": "extra_user_content_parts",
+ "label": "extra_user_content_parts(推荐)",
+ },
+ {"value": "system_prompt", "label": "system_prompt(旧版 fallback)"},
+ {"value": "prompt", "label": "prompt(旧版 fallback)"},
],
},
"enable_llm_hooks": {