From e4383e1806d2028aa1db20083a310c8201aee235 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Thu, 2 Apr 2026 21:10:38 +0800 Subject: [PATCH 1/2] fix(core): pass custom_headers to all LLM providers Previously only the Kimi provider forwarded provider.custom_headers to the underlying SDK client. OpenAI legacy/responses, Anthropic, Google GenAI, and Vertex AI silently dropped them. Each provider branch in create_llm() now passes a defensive copy of custom_headers to the SDK constructor, and GoogleGenAI.__init__ accepts a new default_headers parameter merged into HttpOptions. --- CHANGELOG.md | 2 + .../contrib/chat_provider/google_genai.py | 4 +- src/kimi_cli/llm.py | 5 + tests/core/test_create_llm.py | 166 ++++++++++++++++++ 4 files changed, 176 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9da067ed2..c6610c8a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ Only write entries that are worth mentioning to users. - Core: Add `merge_all_available_skills` config option — when enabled, skills from all existing brand directories (`~/.kimi/skills/`, `~/.claude/skills/`, `~/.codex/skills/`) are loaded and merged instead of using only the first one found; same-name skills follow priority order kimi > claude > codex; disabled by default - CLI: Add `--plan` flag and `default_plan_mode` config option — start new sessions in plan mode via `kimi --plan` or by setting `default_plan_mode = true` in `~/.kimi/config.toml`; resumed sessions preserve their existing plan mode state +- Core: Fix `custom_headers` not being passed to non-Kimi providers — OpenAI, Anthropic, Google GenAI, and Vertex AI providers now correctly forward custom headers configured in `providers.*.custom_headers` + ## 1.29.0 (2026-04-01) - Core: Support hierarchical `AGENTS.md` loading — the CLI now discovers and merges `AGENTS.md` files from the git project root down to the working directory, including `.kimi/AGENTS.md` at each level; deeper files take priority under a 32 KiB budget cap, ensuring the most specific instructions are never truncated diff --git a/packages/kosong/src/kosong/contrib/chat_provider/google_genai.py b/packages/kosong/src/kosong/contrib/chat_provider/google_genai.py index cbb0d3db4..ff3294cd2 100644 --- a/packages/kosong/src/kosong/contrib/chat_provider/google_genai.py +++ b/packages/kosong/src/kosong/contrib/chat_provider/google_genai.py @@ -89,13 +89,15 @@ def __init__( base_url: str | None = None, stream: bool = True, vertexai: bool | None = None, + default_headers: dict[str, str] | None = None, **client_kwargs: Any, ): self._model = model self._stream = stream self._base_url = base_url + http_options = HttpOptions(base_url=base_url, headers=default_headers) self._client: genai_client.Client = genai.Client( - http_options=HttpOptions(base_url=base_url), + http_options=http_options, api_key=api_key, vertexai=vertexai, **client_kwargs, diff --git a/src/kimi_cli/llm.py b/src/kimi_cli/llm.py index fba2f10e9..4e9801965 100644 --- a/src/kimi_cli/llm.py +++ b/src/kimi_cli/llm.py @@ -152,6 +152,7 @@ def create_llm( model=model.model, base_url=provider.base_url, api_key=resolved_api_key, + default_headers=dict(provider.custom_headers) if provider.custom_headers else None, ) case "openai_responses": from kosong.contrib.chat_provider.openai_responses import OpenAIResponses @@ -160,6 +161,7 @@ def create_llm( model=model.model, base_url=provider.base_url, api_key=resolved_api_key, + default_headers=dict(provider.custom_headers) if provider.custom_headers else None, ) case "anthropic": from kosong.contrib.chat_provider.anthropic import Anthropic @@ -170,6 +172,7 @@ def create_llm( api_key=resolved_api_key, default_max_tokens=50000, metadata={"user_id": session_id} if session_id else None, + default_headers=dict(provider.custom_headers) if provider.custom_headers else None, ) case "google_genai" | "gemini": from kosong.contrib.chat_provider.google_genai import GoogleGenAI @@ -178,6 +181,7 @@ def create_llm( model=model.model, base_url=provider.base_url, api_key=resolved_api_key, + default_headers=dict(provider.custom_headers) if provider.custom_headers else None, ) case "vertexai": from kosong.contrib.chat_provider.google_genai import GoogleGenAI @@ -188,6 +192,7 @@ def create_llm( base_url=provider.base_url, api_key=resolved_api_key, vertexai=True, + default_headers=dict(provider.custom_headers) if provider.custom_headers else None, ) case "_echo": from kosong.chat_provider.echo import EchoChatProvider diff --git a/tests/core/test_create_llm.py b/tests/core/test_create_llm.py index f52a0e315..8ad1045bf 100644 --- a/tests/core/test_create_llm.py +++ b/tests/core/test_create_llm.py @@ -136,6 +136,172 @@ def test_create_llm_requires_base_url_for_kimi(): assert create_llm(provider, model) is None +def test_create_llm_openai_legacy_custom_headers(): + from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy + + provider = LLMProvider( + type="openai_legacy", + base_url="https://api.openai.com/v1", + api_key=SecretStr("test-key"), + custom_headers={"X-Custom": "value", "X-Canary": "always"}, + ) + model = LLMModel( + provider="openai", + model="gpt-4o", + max_context_size=128000, + ) + + llm = create_llm(provider, model) + assert llm is not None + assert isinstance(llm.chat_provider, OpenAILegacy) + assert llm.chat_provider._client_kwargs.get("default_headers") == { + "X-Custom": "value", + "X-Canary": "always", + } + + +def test_create_llm_openai_responses_custom_headers(): + provider = LLMProvider( + type="openai_responses", + base_url="https://api.openai.com/v1", + api_key=SecretStr("test-key"), + custom_headers={"X-Custom": "value"}, + ) + model = LLMModel( + provider="openai", + model="gpt-4o", + max_context_size=128000, + ) + + llm = create_llm(provider, model) + assert llm is not None + assert isinstance(llm.chat_provider, OpenAIResponses) + assert llm.chat_provider._client_kwargs.get("default_headers") == {"X-Custom": "value"} + + +def test_create_llm_anthropic_custom_headers(): + from kosong.contrib.chat_provider.anthropic import Anthropic + + provider = LLMProvider( + type="anthropic", + base_url="https://api.anthropic.com", + api_key=SecretStr("test-key"), + custom_headers={"X-Custom": "value"}, + ) + model = LLMModel( + provider="anthropic", + model="claude-sonnet-4-20250514", + max_context_size=200000, + ) + + llm = create_llm(provider, model) + assert llm is not None + assert isinstance(llm.chat_provider, Anthropic) + # AsyncAnthropic stores custom headers in _custom_headers + assert llm.chat_provider._client._custom_headers.get("X-Custom") == "value" + + +def test_create_llm_google_genai_custom_headers(): + from kosong.contrib.chat_provider.google_genai import GoogleGenAI + + provider = LLMProvider( + type="google_genai", + base_url="https://generativelanguage.googleapis.com", + api_key=SecretStr("test-key"), + custom_headers={"X-Custom": "value"}, + ) + model = LLMModel( + provider="google_genai", + model="gemini-2.5-pro", + max_context_size=1000000, + ) + + llm = create_llm(provider, model) + assert llm is not None + assert isinstance(llm.chat_provider, GoogleGenAI) + # Google GenAI client stores http_options on _api_client + http_options = llm.chat_provider._client._api_client._http_options + assert http_options.headers is not None + assert http_options.headers.get("X-Custom") == "value" + + +def test_create_llm_vertexai_custom_headers(): + from kosong.contrib.chat_provider.google_genai import GoogleGenAI + + provider = LLMProvider( + type="vertexai", + base_url="https://us-central1-aiplatform.googleapis.com", + api_key=SecretStr("test-key"), + custom_headers={"X-Custom": "value"}, + ) + model = LLMModel( + provider="vertexai", + model="gemini-2.5-pro", + max_context_size=1000000, + ) + + llm = create_llm(provider, model) + assert llm is not None + assert isinstance(llm.chat_provider, GoogleGenAI) + http_options = llm.chat_provider._client._api_client._http_options + assert http_options.headers is not None + assert http_options.headers.get("X-Custom") == "value" + + +def test_create_llm_custom_headers_isolated_between_instances(): + """Mutating headers on one instance must not affect another created from the same provider.""" + from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy + + provider = LLMProvider( + type="openai_legacy", + base_url="https://api.openai.com/v1", + api_key=SecretStr("test-key"), + custom_headers={"X-Custom": "original"}, + ) + model = LLMModel( + provider="openai", + model="gpt-4o", + max_context_size=128000, + ) + + llm1 = create_llm(provider, model) + llm2 = create_llm(provider, model) + assert llm1 is not None and llm2 is not None + assert isinstance(llm1.chat_provider, OpenAILegacy) + assert isinstance(llm2.chat_provider, OpenAILegacy) + + # Mutate headers on the first instance + llm1.chat_provider._client_kwargs["default_headers"]["X-Custom"] = "mutated" + + # Second instance must be unaffected + assert llm2.chat_provider._client_kwargs["default_headers"]["X-Custom"] == "original" + # Original provider must also be unaffected + assert provider.custom_headers is not None + assert provider.custom_headers["X-Custom"] == "original" + + +def test_create_llm_no_custom_headers_keeps_existing_behavior(): + """When custom_headers is None, providers should work exactly as before.""" + from kosong.contrib.chat_provider.openai_legacy import OpenAILegacy + + provider = LLMProvider( + type="openai_legacy", + base_url="https://api.openai.com/v1", + api_key=SecretStr("test-key"), + ) + model = LLMModel( + provider="openai", + model="gpt-4o", + max_context_size=128000, + ) + + llm = create_llm(provider, model) + assert llm is not None + assert isinstance(llm.chat_provider, OpenAILegacy) + # When custom_headers is None, the SDK client should have no custom headers + assert llm.chat_provider.client._custom_headers == {} + + def test_create_llm_openai_responses_thinking_false_no_reasoning_in_params(): """thinking=False should call with_thinking("off"), which sets reasoning_effort=None. The OpenAIResponses provider handles this by omitting reasoning from the request.""" From 4ea88f2978f8b8e9462430e23e1585e4d5d9ed25 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Thu, 2 Apr 2026 21:31:26 +0800 Subject: [PATCH 2/2] docs: add changelog entries for custom_headers fix Sync root CHANGELOG to English docs, add Chinese translation, and add kosong sub-package changelog entry for GoogleGenAI default_headers parameter. --- docs/en/release-notes/changelog.md | 1 + docs/zh/release-notes/changelog.md | 1 + packages/kosong/CHANGELOG.md | 2 ++ 3 files changed, 4 insertions(+) diff --git a/docs/en/release-notes/changelog.md b/docs/en/release-notes/changelog.md index 6e52c2a20..1c5c29800 100644 --- a/docs/en/release-notes/changelog.md +++ b/docs/en/release-notes/changelog.md @@ -16,6 +16,7 @@ This page documents the changes in each Kimi Code CLI release. - CLI: Add `--plan` flag and `default_plan_mode` config option — start new sessions in plan mode via `kimi --plan` or by setting `default_plan_mode = true` in `~/.kimi/config.toml`; resumed sessions preserve their existing plan mode state - Shell: Add `/undo` and `/fork` commands for session forking — `/undo` lets you pick a previous turn and fork a new session with the selected message pre-filled for re-editing; `/fork` duplicates the entire session history into a new session; the original session is always preserved - CLI: Add `-r` as a short alias for `--session` and print a resume hint (`kimi -r `) whenever a session exits — covers normal exit, Ctrl-C, `/undo`, `/fork`, and `/sessions` switch so users can always find their way back +- Core: Fix `custom_headers` not being passed to non-Kimi providers — OpenAI, Anthropic, Google GenAI, and Vertex AI providers now correctly forward custom headers configured in `providers.*.custom_headers` ## 1.29.0 (2026-04-01) diff --git a/docs/zh/release-notes/changelog.md b/docs/zh/release-notes/changelog.md index 6fbc8042d..92a982e1b 100644 --- a/docs/zh/release-notes/changelog.md +++ b/docs/zh/release-notes/changelog.md @@ -16,6 +16,7 @@ - CLI:新增 `--plan` 启动参数和 `default_plan_mode` 配置项——通过 `kimi --plan` 或在 `~/.kimi/config.toml` 中设置 `default_plan_mode = true` 可让新会话直接进入计划模式;恢复的会话保留其原有的计划模式状态 - Shell:新增 `/undo` 和 `/fork` 命令用于会话分支——`/undo` 支持选择一个历史轮次并 fork 出新会话,被选中轮次的用户消息会预填到输入框供重新编辑;`/fork` 将当前完整对话历史复制到新会话;原会话始终保留不丢失 - CLI:新增 `-r` 作为 `--session` 的简写别名,并在会话退出时输出恢复提示(`kimi -r `)——覆盖正常退出、Ctrl-C、`/undo`、`/fork` 和 `/sessions` 切换等场景,确保用户始终能找到回到会话的方式 +- Core:修复 `custom_headers` 未传递给非 Kimi provider 的问题——OpenAI、Anthropic、Google GenAI 和 Vertex AI provider 现在能正确转发 `providers.*.custom_headers` 中配置的自定义请求头 ## 1.29.0 (2026-04-01) diff --git a/packages/kosong/CHANGELOG.md b/packages/kosong/CHANGELOG.md index 272e5f198..26160102f 100644 --- a/packages/kosong/CHANGELOG.md +++ b/packages/kosong/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- Google GenAI: Add `default_headers` parameter to `GoogleGenAI` constructor — custom headers are merged into `HttpOptions` so they are included in all API requests + ## 0.47.0 (2026-03-30) - OpenAI: Fix implicit `reasoning_effort` causing 400 errors — auto-set `reasoning_effort` to `"medium"` when history contains `ThinkPart` and the parameter wasn't explicitly set