From e4383e1806d2028aa1db20083a310c8201aee235 Mon Sep 17 00:00:00 2001
From: Kaiyi <me@kaiyi.cool>
Date: Thu, 2 Apr 2026 21:10:38 +0800
Subject: [PATCH 1/2] fix(core): pass custom_headers to all LLM providers

Previously only the Kimi provider forwarded provider.custom_headers to
the underlying SDK client. OpenAI legacy/responses, Anthropic, Google
GenAI, and Vertex AI silently dropped them.

Each provider branch in create_llm() now passes a defensive copy of
custom_headers to the SDK constructor, and GoogleGenAI.__init__ accepts
a new default_headers parameter merged into HttpOptions.
---
 CHANGELOG.md                                  |   2 +
 .../contrib/chat_provider/google_genai.py     |   4 +-
 src/kimi_cli/llm.py                           |   5 +
 tests/core/test_create_llm.py                 | 166 ++++++++++++++++++
 4 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9da067ed2..c6610c8a4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,8 @@ Only write entries that are worth mentioning to users.
 - Core: Add `merge_all_available_skills` config option — when enabled, skills from all existing brand directories (`~/.kimi/skills/`, `~/.claude/skills/`, `~/.codex/skills/`) are loaded and merged instead of using only the first one found; same-name skills follow priority order kimi > claude > codex; disabled by default
 
 - CLI: Add `--plan` flag and `default_plan_mode` config option — start new sessions in plan mode via `kimi --plan` or by setting `default_plan_mode = true` in `~/.kimi/config.toml`; resumed sessions preserve their existing plan mode state
+- Core: Fix `custom_headers` not being passed to non-Kimi providers — OpenAI, Anthropic, Google GenAI, and Vertex AI providers now correctly forward custom headers configured in `providers.*.custom_headers`
+
 ## 1.29.0 (2026-04-01)
 
 - Core: Support hierarchical `AGENTS.md` loading — the CLI now discovers and merges `AGENTS.md` files from the git project root down to the working directory, including `.kimi/AGENTS.md` at each level; deeper files take priority under a 32 KiB budget cap, ensuring the most specific instructions are never truncated
diff --git a/packages/kosong/src/kosong/contrib/chat_provider/google_genai.py b/packages/kosong/src/kosong/contrib/chat_provider/google_genai.py
index cbb0d3db4..ff3294cd2 100644
--- a/packages/kosong/src/kosong/contrib/chat_provider/google_genai.py
+++ b/packages/kosong/src/kosong/contrib/chat_provider/google_genai.py
@@ -89,13 +89,15 @@ def __init__(
         base_url: str | None = None,
         stream: bool = True,
         vertexai: bool | None = None,
+        default_headers: dict[str, str] | None = None,
         **client_kwargs: Any,
     ):
         self._model = model
         self._stream = stream
         self._base_url = base_url
+        http_options = HttpOptions(base_url=base_url, headers=default_headers)
         self._client: genai_client.Client = genai.Client(
-            http_options=HttpOptions(base_url=base_url),
+            http_options=http_options,
             api_key=api_key,
             vertexai=vertexai,
             **client_kwargs,
diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py
index fba2f10e9..4e9801965 100644
--- a/src/kimi_cli/llm.py
+++ b/src/kimi_cli/llm.py
@@ -152,6 +152,7 @@ def create_llm(
                 model=model.model,
                 base_url=provider.base_url,
                 api_key=resolved_api_key,
+                default_headers=dict(provider.custom_headers) if provider.custom_headers else None,
             )
         case "openai_responses":
             from kosong.contrib.chat_provider.openai_responses import OpenAIResponses
@@ -160,6 +161,7 @@ def create_llm(
                 model=model.model,
                 base_url=provider.base_url,
                 api_key=resolved_api_key,
+                default_headers=dict(provider.custom_headers) if provider.custom_headers else None,
             )
         case "anthropic":
             from kosong.contrib.chat_provider.anthropic import Anthropic
@@ -170,6 +172,7 @@ def create_llm(
                 api_key=resolved_api_key,
                 default_max_tokens=50000,
                 metadata={"user_id": session_id} if session_id else None,
+                default_headers=dict(provider.custom_headers) if provider.custom_headers else None,
             )
         case "google_genai" | "gemini":
             from kosong.contrib.chat_provider.google_genai import GoogleGenAI
@@ -178,6 +181,7 @@ def create_llm(
                 model=model.model,
                 base_url=provider.base_url,
                 api_key=resolved_api_key,
+                default_headers=dict(provider.custom_headers) if provider.custom_headers else None,
             )
         case "vertexai":
             from kosong.contrib.chat_provider.google_genai import GoogleGenAI
@@ -188,6 +192,7 @@ def create_llm(
                 base_url=provider.base_url,
                 api_key=resolved_api_key,
                 vertexai=True,
+                default_headers=dict(provider.custom_headers) if provider.custom_headers else None,
             )
         case "_echo":
             from kosong.chat_provider.echo import EchoChatProvider
diff --git a/tests/core/test_create_llm.py b/tests/core/test_create_llm.py
index f52a0e315..8ad1045bf 100644
--- a/tests/core/test_create_llm.py
+++ b/tests/core/test_create_llm.py
@@ -136,6 +136,172 @@ def test_create_llm_requires_base_url_for_kimi():
     assert create_llm(provider, model) is None
 
 
+def test_create_llm_openai_legacy_custom_headers():
+    from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy
+
+    provider = LLMProvider(
+        type="openai_legacy",
+        base_url="https://api.openai.com/v1",
+        api_key=SecretStr("test-key"),
+        custom_headers={"X-Custom": "value", "X-Canary": "always"},
+    )
+    model = LLMModel(
+        provider="openai",
+        model="gpt-4o",
+        max_context_size=128000,
+    )
+
+    llm = create_llm(provider, model)
+    assert llm is not None
+    assert isinstance(llm.chat_provider, OpenAILegacy)
+    assert llm.chat_provider._client_kwargs.get("default_headers") == {
+        "X-Custom": "value",
+        "X-Canary": "always",
+    }
+
+
+def test_create_llm_openai_responses_custom_headers():
+    provider = LLMProvider(
+        type="openai_responses",
+        base_url="https://api.openai.com/v1",
+        api_key=SecretStr("test-key"),
+        custom_headers={"X-Custom": "value"},
+    )
+    model = LLMModel(
+        provider="openai",
+        model="gpt-4o",
+        max_context_size=128000,
+    )
+
+    llm = create_llm(provider, model)
+    assert llm is not None
+    assert isinstance(llm.chat_provider, OpenAIResponses)
+    assert llm.chat_provider._client_kwargs.get("default_headers") == {"X-Custom": "value"}
+
+
+def test_create_llm_anthropic_custom_headers():
+    from kosong.contrib.chat_provider.anthropic import Anthropic
+
+    provider = LLMProvider(
+        type="anthropic",
+        base_url="https://api.anthropic.com",
+        api_key=SecretStr("test-key"),
+        custom_headers={"X-Custom": "value"},
+    )
+    model = LLMModel(
+        provider="anthropic",
+        model="claude-sonnet-4-20250514",
+        max_context_size=200000,
+    )
+
+    llm = create_llm(provider, model)
+    assert llm is not None
+    assert isinstance(llm.chat_provider, Anthropic)
+    # AsyncAnthropic stores custom headers in _custom_headers
+    assert llm.chat_provider._client._custom_headers.get("X-Custom") == "value"
+
+
+def test_create_llm_google_genai_custom_headers():
+    from kosong.contrib.chat_provider.google_genai import GoogleGenAI
+
+    provider = LLMProvider(
+        type="google_genai",
+        base_url="https://generativelanguage.googleapis.com",
+        api_key=SecretStr("test-key"),
+        custom_headers={"X-Custom": "value"},
+    )
+    model = LLMModel(
+        provider="google_genai",
+        model="gemini-2.5-pro",
+        max_context_size=1000000,
+    )
+
+    llm = create_llm(provider, model)
+    assert llm is not None
+    assert isinstance(llm.chat_provider, GoogleGenAI)
+    # Google GenAI client stores http_options on _api_client
+    http_options = llm.chat_provider._client._api_client._http_options
+    assert http_options.headers is not None
+    assert http_options.headers.get("X-Custom") == "value"
+
+
+def test_create_llm_vertexai_custom_headers():
+    from kosong.contrib.chat_provider.google_genai import GoogleGenAI
+
+    provider = LLMProvider(
+        type="vertexai",
+        base_url="https://us-central1-aiplatform.googleapis.com",
+        api_key=SecretStr("test-key"),
+        custom_headers={"X-Custom": "value"},
+    )
+    model = LLMModel(
+        provider="vertexai",
+        model="gemini-2.5-pro",
+        max_context_size=1000000,
+    )
+
+    llm = create_llm(provider, model)
+    assert llm is not None
+    assert isinstance(llm.chat_provider, GoogleGenAI)
+    http_options = llm.chat_provider._client._api_client._http_options
+    assert http_options.headers is not None
+    assert http_options.headers.get("X-Custom") == "value"
+
+
+def test_create_llm_custom_headers_isolated_between_instances():
+    """Mutating headers on one instance must not affect another created from the same provider."""
+    from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy
+
+    provider = LLMProvider(
+        type="openai_legacy",
+        base_url="https://api.openai.com/v1",
+        api_key=SecretStr("test-key"),
+        custom_headers={"X-Custom": "original"},
+    )
+    model = LLMModel(
+        provider="openai",
+        model="gpt-4o",
+        max_context_size=128000,
+    )
+
+    llm1 = create_llm(provider, model)
+    llm2 = create_llm(provider, model)
+    assert llm1 is not None and llm2 is not None
+    assert isinstance(llm1.chat_provider, OpenAILegacy)
+    assert isinstance(llm2.chat_provider, OpenAILegacy)
+
+    # Mutate headers on the first instance
+    llm1.chat_provider._client_kwargs["default_headers"]["X-Custom"] = "mutated"
+
+    # Second instance must be unaffected
+    assert llm2.chat_provider._client_kwargs["default_headers"]["X-Custom"] == "original"
+    # Original provider must also be unaffected
+    assert provider.custom_headers is not None
+    assert provider.custom_headers["X-Custom"] == "original"
+
+
+def test_create_llm_no_custom_headers_keeps_existing_behavior():
+    """When custom_headers is None, providers should work exactly as before."""
+    from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy
+
+    provider = LLMProvider(
+        type="openai_legacy",
+        base_url="https://api.openai.com/v1",
+        api_key=SecretStr("test-key"),
+    )
+    model = LLMModel(
+        provider="openai",
+        model="gpt-4o",
+        max_context_size=128000,
+    )
+
+    llm = create_llm(provider, model)
+    assert llm is not None
+    assert isinstance(llm.chat_provider, OpenAILegacy)
+    # When custom_headers is None, the SDK client should have no custom headers
+    assert llm.chat_provider.client._custom_headers == {}
+
+
 def test_create_llm_openai_responses_thinking_false_no_reasoning_in_params():
     """thinking=False should call with_thinking("off"), which sets reasoning_effort=None.
     The OpenAIResponses provider handles this by omitting reasoning from the request."""

From 4ea88f2978f8b8e9462430e23e1585e4d5d9ed25 Mon Sep 17 00:00:00 2001
From: Kaiyi <me@kaiyi.cool>
Date: Thu, 2 Apr 2026 21:31:26 +0800
Subject: [PATCH 2/2] docs: add changelog entries for custom_headers fix

Sync root CHANGELOG to English docs, add Chinese translation,
and add kosong sub-package changelog entry for GoogleGenAI
default_headers parameter.
---
 docs/en/release-notes/changelog.md | 1 +
 docs/zh/release-notes/changelog.md | 1 +
 packages/kosong/CHANGELOG.md       | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/docs/en/release-notes/changelog.md b/docs/en/release-notes/changelog.md
index 6e52c2a20..1c5c29800 100644
--- a/docs/en/release-notes/changelog.md
+++ b/docs/en/release-notes/changelog.md
@@ -16,6 +16,7 @@ This page documents the changes in each Kimi Code CLI release.
 - CLI: Add `--plan` flag and `default_plan_mode` config option — start new sessions in plan mode via `kimi --plan` or by setting `default_plan_mode = true` in `~/.kimi/config.toml`; resumed sessions preserve their existing plan mode state
 - Shell: Add `/undo` and `/fork` commands for session forking — `/undo` lets you pick a previous turn and fork a new session with the selected message pre-filled for re-editing; `/fork` duplicates the entire session history into a new session; the original session is always preserved
 - CLI: Add `-r` as a short alias for `--session` and print a resume hint (`kimi -r <session-id>`) whenever a session exits — covers normal exit, Ctrl-C, `/undo`, `/fork`, and `/sessions` switch so users can always find their way back
+- Core: Fix `custom_headers` not being passed to non-Kimi providers — OpenAI, Anthropic, Google GenAI, and Vertex AI providers now correctly forward custom headers configured in `providers.*.custom_headers`
 
 ## 1.29.0 (2026-04-01)
 
diff --git a/docs/zh/release-notes/changelog.md b/docs/zh/release-notes/changelog.md
index 6fbc8042d..92a982e1b 100644
--- a/docs/zh/release-notes/changelog.md
+++ b/docs/zh/release-notes/changelog.md
@@ -16,6 +16,7 @@
 - CLI：新增 `--plan` 启动参数和 `default_plan_mode` 配置项——通过 `kimi --plan` 或在 `~/.kimi/config.toml` 中设置 `default_plan_mode = true` 可让新会话直接进入计划模式；恢复的会话保留其原有的计划模式状态
 - Shell：新增 `/undo` 和 `/fork` 命令用于会话分支——`/undo` 支持选择一个历史轮次并 fork 出新会话，被选中轮次的用户消息会预填到输入框供重新编辑；`/fork` 将当前完整对话历史复制到新会话；原会话始终保留不丢失
 - CLI：新增 `-r` 作为 `--session` 的简写别名，并在会话退出时输出恢复提示（`kimi -r <session-id>`）——覆盖正常退出、Ctrl-C、`/undo`、`/fork` 和 `/sessions` 切换等场景，确保用户始终能找到回到会话的方式
+- Core：修复 `custom_headers` 未传递给非 Kimi provider 的问题——OpenAI、Anthropic、Google GenAI 和 Vertex AI provider 现在能正确转发 `providers.*.custom_headers` 中配置的自定义请求头
 
 ## 1.29.0 (2026-04-01)
 
diff --git a/packages/kosong/CHANGELOG.md b/packages/kosong/CHANGELOG.md
index 272e5f198..26160102f 100644
--- a/packages/kosong/CHANGELOG.md
+++ b/packages/kosong/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+- Google GenAI: Add `default_headers` parameter to `GoogleGenAI` constructor — custom headers are merged into `HttpOptions` so they are included in all API requests
+
 ## 0.47.0 (2026-03-30)
 
 - OpenAI: Fix implicit `reasoning_effort` causing 400 errors — auto-set `reasoning_effort` to `"medium"` when history contains `ThinkPart` and the parameter wasn't explicitly set