From caaf3a6a3d80881e9cd65838283e797f042f5a5c Mon Sep 17 00:00:00 2001 From: EterUltimate <1831303476@qq.com> Date: Thu, 11 Jun 2026 19:07:15 +0800 Subject: [PATCH] perf: optimize LLM hook cache-friendly context injection --- README.md | 2 +- config.py | 34 +++++++++-- docs/README.md | 2 +- docs/architecture.md | 4 +- docs/learning-flow.md | 6 +- services/hooks/llm_hook_handler.py | 82 +++++++++++++++++++++------ statics/messages.py | 2 +- tests/unit/test_config.py | 8 +++ tests/unit/test_config_service.py | 4 ++ tests/unit/test_feature_delegation.py | 65 +++++++++++++++++++++ webui/services/config_service.py | 16 ++++-- 11 files changed, 190 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 95423c11..493d55cc 100644 --- a/README.md +++ b/README.md @@ -156,7 +156,7 @@ sequenceDiagram G->>Review: 写入人格审查 / 风格审查 LLM->>H: 下一次请求前触发 H->>DB: 读取已批准 few-shot / 黑话 / 记忆 / 社交关系 - H->>LLM: 注入 extra_user_content_parts 或 system_prompt + H->>LLM: 临时 extra_user_content_parts 注入,旧版 fallback ``` ```mermaid diff --git a/config.py b/config.py index 7fb3b13a..a44a7541 100644 --- a/config.py +++ b/config.py @@ -20,6 +20,16 @@ SUPPORTED_DB_TYPES = {"sqlite", "mysql", "postgresql"} POSTGRESQL_DB_TYPE_ALIASES = {"postgres", "pg", "pgsql"} HIGH_COST_LIGHTRAG_QUERY_MODES = {"hybrid", "mix"} +CACHE_FRIENDLY_LLM_HOOK_TARGET = "extra_user_content_parts" +LEGACY_LLM_HOOK_TARGETS = {"system_prompt", "prompt"} +LLM_HOOK_TARGET_ALIASES = { + "extra_user_content_parts": CACHE_FRIENDLY_LLM_HOOK_TARGET, + "extra_user_content": CACHE_FRIENDLY_LLM_HOOK_TARGET, + "user_content": CACHE_FRIENDLY_LLM_HOOK_TARGET, + "user_message_tail": CACHE_FRIENDLY_LLM_HOOK_TARGET, + "system_prompt": "system_prompt", + "prompt": "prompt", +} LIGHTRAG_LIVINGMEMORY_COST_WARNING = ( "当前配置选择 LightRAG 的 hybrid/mix 查询,并允许记忆委托给 LivingMemory;" "当 LivingMemory 插件已加载时,会叠加 LightRAG 全局/混合检索与 LivingMemory 记忆检索," @@ -283,11 +293,10 @@ class PluginConfig(BaseModel): include_mood_info: bool = True # 注入Bot情绪信息 context_injection_position: str = "start" # 上下文注入位置: "start" 或 "end" - # LLM Hook 注入位置设置(v1.1.1新增) - # 控制注入内容添加到 req.system_prompt 还是 req.prompt - # - "system_prompt": 注入到系统提示(推荐,不会被保存到对话历史) - # - "prompt": 注入到用户消息(旧版行为,会导致对话历史膨胀) - llm_hook_injection_target: str = "system_prompt" # 可选值: "system_prompt" 或 "prompt" + # LLM Hook 注入位置设置 + # 动态上下文优先注入 req.extra_user_content_parts,避免改动稳定 system_prompt + # 以提高 provider prefix cache 命中率;旧版 AstrBot 不支持时才按 legacy 目标回退。 + llm_hook_injection_target: str = CACHE_FRIENDLY_LLM_HOOK_TARGET # 目标驱动对话配置 enable_goal_driven_chat: bool = False # 启用目标驱动对话 @@ -332,6 +341,19 @@ def _normalize_target_qq_list(cls, value) -> List[str]: def _normalize_target_blacklist(cls, value) -> List[str]: return normalize_identifier_list(value) + @field_validator("llm_hook_injection_target", mode="before") + @classmethod + def _normalize_llm_hook_injection_target(cls, value) -> str: + target = str(value or CACHE_FRIENDLY_LLM_HOOK_TARGET).strip() + normalized = LLM_HOOK_TARGET_ALIASES.get(target) + if normalized: + return normalized + logger.warning( + f"未知 LLM Hook 注入目标 {value!r}," + "已回退到 cache-friendly extra_user_content_parts" + ) + return CACHE_FRIENDLY_LLM_HOOK_TARGET + def model_post_init(self, __context) -> None: """Normalize and apply the configured AstrBot log level.""" normalized_level = normalize_log_level( @@ -549,7 +571,7 @@ def create_from_config(cls, config: dict, data_dir: Optional[str] = None) -> 'Pl llm_hook_context_timeout=float(runtime_internal_settings.get('llm_hook_context_timeout', 3.0)), llm_hook_injection_target=runtime_internal_settings.get( 'llm_hook_injection_target', - 'system_prompt', + CACHE_FRIENDLY_LLM_HOOK_TARGET, ), # 社交上下文注入设置 diff --git a/docs/README.md b/docs/README.md index 88517c1f..880358c1 100644 --- a/docs/README.md +++ b/docs/README.md @@ -70,7 +70,7 @@ flowchart TD M["Bot 回复发送后"] --> N["on_bot_message_sent"] N --> O["BotMessage"] P["下一次 LLM 请求"] --> Q["LLMHookHandler.handle"] - Q --> R["extra_user_content_parts 或 system_prompt 注入"] + Q --> R["临时 extra_user_content_parts 注入,旧版 fallback"] FD --> S["LivingMemory: 长期记忆委托"] FD --> T["Group Chat Plus: 回复委托"] ``` diff --git a/docs/architecture.md b/docs/architecture.md index a3da6ac6..7640c489 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -121,8 +121,8 @@ AstrBot 完成 handler 绑定后调用 `initialize()`,它委托给 `PluginLife 1. 委托 `LLMHookHandler.handle(event, req)`。 2. 并行拉取社交上下文、V2 上下文、多样性提示、黑话解释、已批准 few-shot。 -3. 优先写入 `req.extra_user_content_parts`。 -4. 旧版 AstrBot 缺少 `extra_user_content_parts` 时回退追加 `req.system_prompt`。 +3. 优先写入 `req.extra_user_content_parts`,并在框架支持时标记为临时 `TextPart`。 +4. 旧版 AstrBot 缺少 `extra_user_content_parts` 时才回退追加 `req.system_prompt` 或 `req.prompt`。 ### `on_bot_message_sent` diff --git a/docs/learning-flow.md b/docs/learning-flow.md index 3f18afb5..93f088b4 100644 --- a/docs/learning-flow.md +++ b/docs/learning-flow.md @@ -230,10 +230,10 @@ WebUI 通过统一人格审查接口处理传统人格更新、渐进式人格 注入优先级: -1. 使用 `req.extra_user_content_parts.append(TextPart(...))`。 -2. 如果当前 AstrBot 版本不支持,回退追加 `req.system_prompt`。 +1. 使用 `req.extra_user_content_parts.append(TextPart(...))`,并在框架支持时调用 `mark_as_temp()`。 +2. 如果当前 AstrBot 版本不支持,才按 legacy 配置回退追加 `req.system_prompt` 或 `req.prompt`。 -注入内容会包在 `...` 中。这样可以保持系统提示相对稳定,降低对 LLM prefix cache 的影响。 +注入内容会包在 `...` 中。这样可以保持系统提示和历史前缀相对稳定,降低对 LLM prefix cache 的影响,并避免动态上下文持久化到后续对话。 ## 9. 功能融合对学习链路的影响 diff --git a/services/hooks/llm_hook_handler.py b/services/hooks/llm_hook_handler.py index dfd63f3e..cc8c9c45 100644 --- a/services/hooks/llm_hook_handler.py +++ b/services/hooks/llm_hook_handler.py @@ -13,6 +13,11 @@ from astrbot.api.event import AstrMessageEvent from ..monitoring.instrumentation import monitored +try: + from ...config import CACHE_FRIENDLY_LLM_HOOK_TARGET, LEGACY_LLM_HOOK_TARGETS +except ImportError: + from config import CACHE_FRIENDLY_LLM_HOOK_TARGET, LEGACY_LLM_HOOK_TARGETS + try: from astrbot.core.agent.message import TextPart except ImportError: @@ -407,32 +412,24 @@ def _inject( self, req: Any, injections: List[str], hook_start: float ) -> None: injection_text = "\n\n".join(injections) + context_text = f"\n{injection_text}\n" + target = getattr( + self._config, + "llm_hook_injection_target", + CACHE_FRIENDLY_LLM_HOOK_TARGET, + ) # Use AstrBot's extra_user_content_parts API to inject context. # This keeps system_prompt stable for LLM API prefix caching, # while appending dynamic context as extra content blocks after # the user message. - if hasattr(req, "extra_user_content_parts") and TextPart is not None: - req.extra_user_content_parts.append( - TextPart(text=f"\n{injection_text}\n") - ) + if self._append_extra_user_content(req, context_text): logger.debug( f"[LLM Hook] extra_user_content_parts 注入完成 - " - f"新增: {len(injection_text)} chars" + f"新增: {len(injection_text)} chars, target={target}" ) else: - # Fallback for older AstrBot versions without extra_user_content_parts - if not req.system_prompt: - req.system_prompt = "" - req.system_prompt += "\n\n" + injection_text - logger.debug( - f"[LLM Hook] system_prompt fallback 注入完成 - " - f"新增: {len(injection_text)} chars" - ) - logger.warning( - "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts," - "回退到 system_prompt 注入(会影响缓存命中率)" - ) + self._legacy_inject(req, injection_text, target) current_style = self._diversity_manager.get_current_style() current_pattern = self._diversity_manager.get_current_pattern() @@ -444,3 +441,54 @@ def _inject( f"耗时: {time.time() - hook_start:.3f}s" ) logger.debug(f"[LLM Hook] 注入内容预览: {injection_text[:200]}...") + + @staticmethod + def _append_extra_user_content(req: Any, context_text: str) -> bool: + """Append dynamic context as a temporary AstrBot content part when possible.""" + content_parts = getattr(req, "extra_user_content_parts", None) + if ( + TextPart is None + or content_parts is None + or not hasattr(content_parts, "append") + ): + return False + + part = TextPart(text=context_text) + mark_as_temp = getattr(part, "mark_as_temp", None) + if callable(mark_as_temp): + mark_as_temp() + content_parts.append(part) + return True + + @staticmethod + def _legacy_inject(req: Any, injection_text: str, target: str) -> None: + """Fallback for old AstrBot versions without extra_user_content_parts.""" + fallback_target = target if target in LEGACY_LLM_HOOK_TARGETS else "system_prompt" + + if fallback_target == "prompt": + prompt = getattr(req, "prompt", "") or "" + req.prompt = f"{prompt}\n\n{injection_text}" if prompt else injection_text + logger.debug( + f"[LLM Hook] prompt fallback 注入完成 - " + f"新增: {len(injection_text)} chars" + ) + logger.warning( + "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts," + "回退到 prompt 注入(可能膨胀对话历史并降低缓存命中率)" + ) + return + + system_prompt = getattr(req, "system_prompt", "") or "" + req.system_prompt = ( + f"{system_prompt}\n\n{injection_text}" + if system_prompt + else injection_text + ) + logger.debug( + f"[LLM Hook] system_prompt fallback 注入完成 - " + f"新增: {len(injection_text)} chars" + ) + logger.warning( + "[LLM Hook] 当前 AstrBot 版本不支持 extra_user_content_parts," + "回退到 system_prompt 注入(会影响缓存命中率)" + ) diff --git a/statics/messages.py b/statics/messages.py index 084ad319..ef784f6c 100644 --- a/statics/messages.py +++ b/statics/messages.py @@ -292,7 +292,7 @@ class LogMessages: AFFECTION_PROCESSING_SUCCESS = "好感度处理成功: {result}" AFFECTION_PROCESSING_FAILED = "好感度系统处理失败: {error}" ENHANCED_INTERACTION_FAILED = "增强交互处理失败: {error}" - LLM_REQUEST_HOOK_SUCCESS = "已注入情绪状态到system_prompt,群组: {group_id}" + LLM_REQUEST_HOOK_SUCCESS = "已注入临时 LLM 上下文,群组: {group_id}" LLM_REQUEST_HOOK_FAILED = "LLM请求hook处理失败: {error}" PLUGIN_CONFIG_SAVED = "插件配置已保存" PLUGIN_UNLOAD_SUCCESS = "自学习插件已安全卸载" diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index daf2bce8..d1bf0b72 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -45,6 +45,7 @@ def test_create_default_instance(self): assert config.web_interface_port == 7833 assert config.web_interface_host == "0.0.0.0" assert config.log_level == "info" + assert config.llm_hook_injection_target == "extra_user_content_parts" def test_create_default_classmethod(self): """Test the create_default classmethod.""" @@ -349,6 +350,13 @@ def test_create_from_empty_config(self): assert config.target_qq_list == [] assert config.learning_interval_hours == 6 assert config.db_type == 'postgresql' + assert config.llm_hook_injection_target == 'extra_user_content_parts' + + def test_llm_hook_injection_target_aliases_normalize_to_cache_friendly_default(self): + """Short aliases should still resolve to the cache-friendly AstrBot API.""" + config = PluginConfig(llm_hook_injection_target="user_message_tail") + + assert config.llm_hook_injection_target == "extra_user_content_parts" def test_target_list_blank_values_keep_full_learning_default(self): """Blank settings-page rows should not disable full learning.""" diff --git a/tests/unit/test_config_service.py b/tests/unit/test_config_service.py index e6d04a03..dd2b75c5 100644 --- a/tests/unit/test_config_service.py +++ b/tests/unit/test_config_service.py @@ -159,6 +159,10 @@ async def test_get_config_schema_includes_full_settings(self, tmp_path): assert runtime_fields["messages_db_path"]["editable"] is False assert runtime_fields["enable_llm_hooks"]["widget"] == "toggle" assert runtime_fields["enable_llm_hooks"]["value"] is False + hook_target = runtime_fields["llm_hook_injection_target"] + assert hook_target["value"] == "extra_user_content_parts" + assert hook_target["options"][0]["value"] == "extra_user_content_parts" + assert "prefix cache" in hook_target["hint"] basic_fields = {field["key"]: field for field in groups["Self_Learning_Basic"]["fields"]} assert basic_fields["enable_webui_password"]["widget"] == "toggle" diff --git a/tests/unit/test_feature_delegation.py b/tests/unit/test_feature_delegation.py index dc9180c0..65ceeead 100644 --- a/tests/unit/test_feature_delegation.py +++ b/tests/unit/test_feature_delegation.py @@ -13,6 +13,7 @@ from self_learning_EterU.config import PluginConfig from self_learning_EterU.core.feature_delegation import FeatureDelegation from self_learning_EterU.core.factory import ServiceFactory +from self_learning_EterU.services.hooks import llm_hook_handler as llm_hook_module from self_learning_EterU.services.hooks.llm_hook_handler import LLMHookHandler @@ -117,6 +118,70 @@ async def test_llm_hook_handle_returns_without_context_fetches_when_disabled(): assert req.extra_user_content_parts == [] +def test_llm_hook_injects_temp_extra_user_content_without_touching_system_prompt(monkeypatch): + class FakeTextPart: + def __init__(self, text): + self.text = text + self.temp = False + + def mark_as_temp(self): + self.temp = True + + monkeypatch.setattr(llm_hook_module, "TextPart", FakeTextPart) + handler = LLMHookHandler( + plugin_config=SimpleNamespace( + llm_hook_injection_target="extra_user_content_parts" + ), + diversity_manager=SimpleNamespace( + get_current_style=lambda: "style", + get_current_pattern=lambda: "pattern", + ), + social_context_injector=None, + v2_integration=None, + jargon_query_service=None, + temporary_persona_updater=None, + perf_tracker=SimpleNamespace(record=lambda payload: None), + group_id_to_unified_origin={}, + db_manager=None, + ) + req = SimpleNamespace( + prompt="用户消息", + system_prompt="stable system prompt", + extra_user_content_parts=[], + ) + + handler._inject(req, ["dynamic context"], 0) + + assert req.system_prompt == "stable system prompt" + assert len(req.extra_user_content_parts) == 1 + assert req.extra_user_content_parts[0].text == "\ndynamic context\n" + assert req.extra_user_content_parts[0].temp is True + + +def test_llm_hook_legacy_prompt_fallback_when_extra_parts_unavailable(monkeypatch): + monkeypatch.setattr(llm_hook_module, "TextPart", None) + handler = LLMHookHandler( + plugin_config=SimpleNamespace(llm_hook_injection_target="prompt"), + diversity_manager=SimpleNamespace( + get_current_style=lambda: "style", + get_current_pattern=lambda: "pattern", + ), + social_context_injector=None, + v2_integration=None, + jargon_query_service=None, + temporary_persona_updater=None, + perf_tracker=SimpleNamespace(record=lambda payload: None), + group_id_to_unified_origin={}, + db_manager=None, + ) + req = SimpleNamespace(prompt="用户消息", system_prompt="stable system prompt") + + handler._inject(req, ["legacy context"], 0) + + assert req.system_prompt == "stable system prompt" + assert req.prompt == "用户消息\n\nlegacy context" + + @pytest.mark.asyncio async def test_llm_hook_omits_local_v2_memories_when_livingmemory_delegated(): v2 = SimpleNamespace( diff --git a/webui/services/config_service.py b/webui/services/config_service.py index cc007d3b..3db1b1fb 100644 --- a/webui/services/config_service.py +++ b/webui/services/config_service.py @@ -150,11 +150,19 @@ def _load_schema_definition() -> Dict[str, Any]: "llm_hook_injection_target": { "description": "LLM Hook 注入目标", "type": "string", - "hint": "控制注入到 system_prompt 还是 prompt", - "default": "system_prompt", + "hint": ( + "推荐保持 extra_user_content_parts:动态上下文会追加到用户消息尾部" + "并标记为临时内容,避免改动 system_prompt 影响 provider prefix cache。" + "system_prompt/prompt 仅作为旧版 AstrBot fallback" + ), + "default": "extra_user_content_parts", "options": [ - {"value": "system_prompt", "label": "system_prompt"}, - {"value": "prompt", "label": "prompt"}, + { + "value": "extra_user_content_parts", + "label": "extra_user_content_parts(推荐)", + }, + {"value": "system_prompt", "label": "system_prompt(旧版 fallback)"}, + {"value": "prompt", "label": "prompt(旧版 fallback)"}, ], }, "enable_llm_hooks": {