MoonshotAI · CanerKocak · Mar 21, 2026 · Mar 21, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,9 @@ Only write entries that are worth mentioning to users.
 
 ## Unreleased
 
+- Core: Support custom context compaction via plugins — plugins can declare a `compaction.entrypoint` in `plugin.json` to provide their own compaction implementation; use `loop_control.compaction_plugin` to select which plugin to use
+- Core: Add `loop_control.compaction_model` config option to use a dedicated model for context compaction
+
 ## 1.25.0 (2026-03-23)
 
 - Core: Add plugin system (Skills + Tools) — plugins extend Kimi Code CLI with custom tools packaged as `plugin.json`; tools are commands that run in isolated subprocesses and return their stdout to the agent; plugins support automatic credential injection via `inject` configuration

diff --git a/docs/en/release-notes/changelog.md b/docs/en/release-notes/changelog.md
@@ -4,6 +4,9 @@ This page documents the changes in each Kimi Code CLI release.
 
 ## Unreleased
 
+- Core: Support custom context compaction via plugins — plugins can declare a `compaction.entrypoint` in `plugin.json` to provide their own compaction implementation; use `loop_control.compaction_plugin` to select which plugin to use
+- Core: Add `loop_control.compaction_model` config option to use a dedicated model for context compaction
+
 ## 1.25.0 (2026-03-23)
 
 - Core: Add plugin system (Skills + Tools) — plugins extend Kimi Code CLI with custom tools packaged as `plugin.json`; tools are commands that run in isolated subprocesses and return their stdout to the agent; plugins support automatic credential injection via `inject` configuration

diff --git a/docs/zh/release-notes/changelog.md b/docs/zh/release-notes/changelog.md
@@ -4,6 +4,9 @@
 
 ## 未发布
 
+- Core：支持通过插件提供自定义上下文压缩实现——插件可在 `plugin.json` 中声明 `compaction.entrypoint` 提供自定义压缩器；通过 `loop_control.compaction_plugin` 配置项指定使用哪个插件的压缩器
+- Core：新增 `loop_control.compaction_model` 配置项，可为上下文压缩指定专用模型
+
 ## 1.25.0 (2026-03-23)
 
 - Core：新增插件系统（Skills + Tools）——插件通过 `plugin.json` 为 Kimi Code CLI 扩展自定义工具；工具是在独立子进程中运行的命令，其 stdout 返回给 Agent；插件支持通过 `inject` 配置自动注入凭证

diff --git a/examples/custom-kimi-soul/main.py b/examples/custom-kimi-soul/main.py
@@ -35,6 +35,7 @@ async def create(
             config=config,
             oauth=OAuthManager(config),
             llm=llm,
+            compaction_llm=None,
             session=session,
             yolo=True,
         )

diff --git a/examples/kimi-psql/main.py b/examples/kimi-psql/main.py
@@ -276,6 +276,7 @@ async def create_psql_soul(llm: LLM | None, conninfo: str) -> KimiSoul:
         config=config,
         oauth=OAuthManager(config),
         llm=llm,
+        compaction_llm=None,
         session=session,
         yolo=True,  # Auto-approve read-only SQL queries
     )

diff --git a/src/kimi_cli/app.py b/src/kimi_cli/app.py
@@ -16,7 +16,13 @@
 from kimi_cli.auth.oauth import OAuthManager
 from kimi_cli.cli import InputFormat, OutputFormat
 from kimi_cli.config import Config, LLMModel, LLMProvider, load_config
-from kimi_cli.llm import augment_provider_with_env_vars, create_llm, model_display_name
+from kimi_cli.exception import ConfigError
+from kimi_cli.llm import (
+    augment_provider_credentials_with_env_vars,
+    augment_provider_with_env_vars,
+    create_llm,
+    model_display_name,
+)
 from kimi_cli.session import Session
 from kimi_cli.share import get_share_dir
 from kimi_cli.soul import run_soul
@@ -143,26 +149,28 @@ async def create(
 
         oauth = OAuthManager(config)
 
-        model: LLMModel | None = None
-        provider: LLMProvider | None = None
-
-        # try to use config file
-        if not model_name and config.default_model:
-            # no --model specified && default model is set in config
-            model = config.models[config.default_model]
-            provider = config.providers[model.provider]
-        if model_name and model_name in config.models:
-            # --model specified && model is set in config
-            model = config.models[model_name]
-            provider = config.providers[model.provider]
-
-        if not model:
+        selected_model = model_name or config.default_model
+        if selected_model and selected_model in config.models:
+            model = config.models[selected_model]
+            provider = config.providers.get(model.provider)
+            if provider is None:
+                logger.warning(
+                    "Provider {provider!r} for model {model!r} missing; using placeholder",
+                    provider=model.provider,
+                    model=selected_model,
+                )
+                model = LLMModel(provider="", model="", max_context_size=100_000)
+                provider = LLMProvider(type="kimi", base_url="", api_key=SecretStr(""))
+        else:
+            if selected_model:
+                logger.warning(
+                    "Model {model!r} not found in config, using placeholder",
+                    model=selected_model,
+                )
             model = LLMModel(provider="", model="", max_context_size=100_000)
             provider = LLMProvider(type="kimi", base_url="", api_key=SecretStr(""))
 
         # try overwrite with environment variables
-        assert provider is not None
-        assert model is not None
         env_overrides = augment_provider_with_env_vars(provider, model)
 
         # determine thinking mode
@@ -183,10 +191,52 @@ async def create(
             logger.info("Using LLM model: {model}", model=model)
             logger.info("Thinking mode: {thinking}", thinking=thinking)
 
+        compaction_llm = None
+        if config.loop_control.compaction_model is not None:
+            compaction_model_name = config.loop_control.compaction_model
+            compaction_model = config.models.get(compaction_model_name)
+            if compaction_model is None:
+                logger.warning(
+                    "Compaction model {model!r} not found in config, skipping",
+                    model=compaction_model_name,
+                )
+            else:
+                if llm is not None and compaction_model.max_context_size < llm.max_context_size:
+                    raise ConfigError(
+                        "Compaction model "
+                        f"{compaction_model_name!r} has max_context_size "
+                        f"{compaction_model.max_context_size}, smaller than active model "
+                        f"{selected_model!r} ({model.max_context_size})"
+                    )
+                compaction_provider = config.providers.get(compaction_model.provider)
+                if compaction_provider is None:
+                    logger.warning(
+                        "Compaction provider {provider!r} not found in config, skipping",
+                        provider=compaction_model.provider,
+                    )
+                else:
+                    compaction_provider = compaction_provider.model_copy(deep=True)
+                    compaction_model = compaction_model.model_copy(deep=True)
+                    augment_provider_credentials_with_env_vars(compaction_provider)
+                    compaction_llm = create_llm(
+                        compaction_provider,
+                        compaction_model,
+                        thinking=thinking,
+                        session_id=session.id,
+                        oauth=oauth,
+                    )
+                    if compaction_llm is not None:
+                        logger.info(
+                            "Using compaction LLM model: {model}",
+                            model=compaction_model,
+                        )
+
         if startup_progress is not None:
             startup_progress("Scanning workspace...")
 
-        runtime = await Runtime.create(config, oauth, llm, session, yolo, skills_dir)
+        runtime = await Runtime.create(
+            config, oauth, llm, compaction_llm, session, yolo, skills_dir
+        )
         runtime.notifications.recover()
         runtime.background_tasks.reconcile()
         _cleanup_stale_foreground_subagents(runtime)
@@ -205,6 +255,20 @@ async def create(
         except Exception:
             logger.debug("Failed to refresh plugin configs, skipping")
 
+        if config.loop_control.compaction_plugin is not None:
+            from kimi_cli.plugin import PluginError
+            from kimi_cli.plugin.compaction import resolve_plugin_compactor
+            from kimi_cli.plugin.manager import get_plugins_dir
+
+            try:
+                runtime.compaction = resolve_plugin_compactor(
+                    get_plugins_dir(), config.loop_control.compaction_plugin
+                )
+            except PluginError as exc:
+                raise ConfigError(
+                    f"Invalid compaction plugin {config.loop_control.compaction_plugin!r}: {exc}"
+                ) from exc
+
         if agent_file is None:
             agent_file = DEFAULT_AGENT_FILE
         if startup_progress is not None:

diff --git a/src/kimi_cli/config.py b/src/kimi_cli/config.py
@@ -12,6 +12,7 @@
     SecretStr,
     ValidationError,
     field_serializer,
+    field_validator,
     model_validator,
 )
 from tomlkit.exceptions import TOMLKitError
@@ -78,6 +79,10 @@ class LoopControl(BaseModel):
     """Maximum number of retries in one step"""
     max_ralph_iterations: int = Field(default=0, ge=-1)
     """Extra iterations after the first turn in Ralph mode. Use -1 for unlimited."""
+    compaction_model: str | None = Field(default=None)
+    """Optional model name to use for context compaction."""
+    compaction_plugin: str | None = Field(default=None)
+    """Installed plugin name to use for context compaction."""
     reserved_context_size: int = Field(default=50_000, ge=1000)
     """Reserved token count for LLM response generation. Auto-compaction triggers when
     either context_tokens + reserved_context_size >= max_context_size or
@@ -87,6 +92,14 @@ class LoopControl(BaseModel):
     Auto-compaction triggers when context_tokens >= max_context_size * compaction_trigger_ratio
     or when context_tokens + reserved_context_size >= max_context_size."""
 
+    @field_validator("compaction_model", "compaction_plugin", mode="before")
+    @classmethod
+    def normalize_optional_compaction_name(cls, value: object) -> object:
+        if isinstance(value, str):
+            value = value.strip()
+            return value or None
+        return value
+
 
 class BackgroundConfig(BaseModel):
     """Background task runtime configuration."""
@@ -207,6 +220,13 @@ class Config(BaseModel):
     def validate_model(self) -> Self:
         if self.default_model and self.default_model not in self.models:
             raise ValueError(f"Default model {self.default_model} not found in models")
+        if (
+            self.loop_control.compaction_model
+            and self.loop_control.compaction_model not in self.models
+        ):
+            raise ValueError(
+                f"Compaction model {self.loop_control.compaction_model} not found in models"
+            )
         for model in self.models.values():
             if model.provider not in self.providers:
                 raise ValueError(f"Provider {model.provider} not found in providers")

diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py
@@ -94,6 +94,36 @@ def augment_provider_with_env_vars(provider: LLMProvider, model: LLMModel) -> di
     return applied
 
 
+def augment_provider_credentials_with_env_vars(provider: LLMProvider) -> dict[str, str]:
+    """Override provider credentials/base URL from environment variables without changing model.
+
+    This is used for secondary model selections, such as compaction, where the configured model
+    alias should remain stable even if the main chat model is overridden from the environment.
+    """
+
+    applied: dict[str, str] = {}
+
+    match provider.type:
+        case "kimi":
+            if base_url := os.getenv("KIMI_BASE_URL"):
+                provider.base_url = base_url
+                applied["KIMI_BASE_URL"] = base_url
+            if api_key := os.getenv("KIMI_API_KEY"):
+                provider.api_key = SecretStr(api_key)
+                applied["KIMI_API_KEY"] = "******"
+        case "openai_legacy" | "openai_responses":
+            if base_url := os.getenv("OPENAI_BASE_URL"):
+                provider.base_url = base_url
+                applied["OPENAI_BASE_URL"] = base_url
+            if api_key := os.getenv("OPENAI_API_KEY"):
+                provider.api_key = SecretStr(api_key)
+                applied["OPENAI_API_KEY"] = "******"
+        case _:
+            pass
+
+    return applied
+
+
 def _kimi_default_headers(provider: LLMProvider, oauth: OAuthManager | None) -> dict[str, str]:
     headers = {"User-Agent": USER_AGENT}
     if oauth:

diff --git a/src/kimi_cli/plugin/__init__.py b/src/kimi_cli/plugin/__init__.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 from typing import Any
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, field_validator
 
 
 class PluginError(Exception):
@@ -27,6 +27,23 @@ class PluginToolSpec(BaseModel):
     parameters: dict[str, object] = Field(default_factory=dict)
 
 
+class PluginCompactionSpec(BaseModel):
+    """In-process compaction hook: a class importable from the plugin directory."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    entrypoint: str
+    """Dotted path ``module.Class`` resolved with the plugin directory on ``sys.path``."""
+
+    @field_validator("entrypoint")
+    @classmethod
+    def entrypoint_must_include_class(cls, value: str) -> str:
+        cleaned = value.strip()
+        if "." not in cleaned:
+            raise ValueError("compaction.entrypoint must look like 'module.ClassName'")
+        return cleaned
+
+
 class PluginSpec(BaseModel):
     """Parsed representation of a plugin.json file."""
 
@@ -38,6 +55,7 @@ class PluginSpec(BaseModel):
     config_file: str | None = None
     inject: dict[str, str] = Field(default_factory=dict)
     tools: list[PluginToolSpec] = Field(default_factory=list)  # pyright: ignore[reportUnknownVariableType]
+    compaction: PluginCompactionSpec | None = None
     runtime: PluginRuntime | None = None