From 3987feed55cf1f3aad305e3ceeb78732421607a3 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 14:58:04 +0800 Subject: [PATCH 1/7] fix(todo): refactor SetTodoList to persist state and prevent tool call storms Refactored the SetTodoList tool to address issue #1710 where the model would repeatedly call SetTodoList in a loop ("storm") especially when Shell was disabled. Key changes: - Persist todos to SessionState (root agent) and independent state.json (sub-agents) for proper state management - Add query mode: omit todos param to read current state, pass [] to clear - Add anti-storm guidance in tool description preventing repeated calls without progress - Protect against race conditions in Session.save_state() for todos field - Return non-empty output ("Todo list updated") so models get confirmation Closes #1710 --- src/kimi_cli/session.py | 1 + src/kimi_cli/session_state.py | 10 + src/kimi_cli/tools/todo/__init__.py | 133 +++++++++++- src/kimi_cli/tools/todo/set_todo_list.md | 12 +- tests/conftest.py | 4 +- tests/core/test_session.py | 23 +++ tests/tools/test_todo.py | 248 +++++++++++++++++++++++ tests/tools/test_tool_descriptions.py | 12 +- tests/tools/test_tool_schemas.py | 39 ++-- tests_e2e/test_wire_approvals_tools.py | 4 +- tests_e2e/test_wire_prompt.py | 2 +- 11 files changed, 458 insertions(+), 30 deletions(-) create mode 100644 tests/tools/test_todo.py diff --git a/src/kimi_cli/session.py b/src/kimi_cli/session.py index cdef6c0e4..a0ab72dee 100644 --- a/src/kimi_cli/session.py +++ b/src/kimi_cli/session.py @@ -94,6 +94,7 @@ def save_state(self) -> None: self.state.archived = fresh.archived self.state.archived_at = fresh.archived_at self.state.auto_archive_exempt = fresh.auto_archive_exempt + self.state.todos = fresh.todos save_session_state(self.state, self.dir) async def delete(self) -> None: diff --git a/src/kimi_cli/session_state.py b/src/kimi_cli/session_state.py index f7a54bde9..8cc4ba3b4 100644 --- a/src/kimi_cli/session_state.py +++ b/src/kimi_cli/session_state.py @@ -2,6 +2,7 @@ import json from pathlib import Path +from typing import Literal from pydantic import BaseModel, Field, ValidationError @@ -16,6 +17,13 @@ class ApprovalStateData(BaseModel): auto_approve_actions: set[str] = Field(default_factory=set) +class TodoItemState(BaseModel): + """A single todo item stored in session or subagent state.""" + + title: str + status: Literal["pending", "in_progress", "done"] + + class SessionState(BaseModel): version: int = 1 approval: ApprovalStateData = Field(default_factory=ApprovalStateData) @@ -31,6 +39,8 @@ class SessionState(BaseModel): archived: bool = False archived_at: float | None = None auto_archive_exempt: bool = False + # Todo list state + todos: list[TodoItemState] = Field(default_factory=lambda: list[TodoItemState]()) _LEGACY_METADATA_FILENAME = "metadata.json" diff --git a/src/kimi_cli/tools/todo/__init__.py b/src/kimi_cli/tools/todo/__init__.py index b5b8b61f0..5310d706a 100644 --- a/src/kimi_cli/tools/todo/__init__.py +++ b/src/kimi_cli/tools/todo/__init__.py @@ -1,11 +1,15 @@ +import json from pathlib import Path -from typing import Literal, override +from typing import Any, Literal, override from kosong.tooling import CallableTool2, ToolReturnValue from pydantic import BaseModel, Field +from kimi_cli.session_state import TodoItemState +from kimi_cli.soul.agent import Runtime from kimi_cli.tools.display import TodoDisplayBlock, TodoDisplayItem from kimi_cli.tools.utils import load_desc +from kimi_cli.utils.logging import logger class Todo(BaseModel): @@ -14,7 +18,13 @@ class Todo(BaseModel): class Params(BaseModel): - todos: list[Todo] = Field(description="The updated todo list") + todos: list[Todo] | None = Field( + default=None, + description=( + "The updated todo list. " + "If not provided, returns the current todo list without making changes." + ), + ) class SetTodoList(CallableTool2[Params]): @@ -22,12 +32,127 @@ class SetTodoList(CallableTool2[Params]): description: str = load_desc(Path(__file__).parent / "set_todo_list.md") params: type[Params] = Params + def __init__(self, runtime: Runtime) -> None: + super().__init__() + self._runtime = runtime + @override async def __call__(self, params: Params) -> ToolReturnValue: - items = [TodoDisplayItem(title=todo.title, status=todo.status) for todo in params.todos] + if params.todos is None: + return self._read_todos() + return self._write_todos(params.todos) + + # ---- Write mode -------------------------------------------------------- + + def _write_todos(self, todos: list[Todo]) -> ToolReturnValue: + """Persist the todo list and return confirmation.""" + self._save_todos(todos) + + items = [TodoDisplayItem(title=todo.title, status=todo.status) for todo in todos] return ToolReturnValue( is_error=False, - output="", + output="Todo list updated", message="Todo list updated", display=[TodoDisplayBlock(items=items)], ) + + # ---- Read mode --------------------------------------------------------- + + def _read_todos(self) -> ToolReturnValue: + """Return the current todo list as text output for the model.""" + todos = self._load_todos() + if not todos: + return ToolReturnValue( + is_error=False, + output="Todo list is empty.", + message="", + display=[], + ) + + lines: list[str] = ["Current todo list:"] + for todo in todos: + lines.append(f"- [{todo.status}] {todo.title}") + return ToolReturnValue( + is_error=False, + output="\n".join(lines), + message="", + display=[], + ) + + # ---- Persistence ------------------------------------------------------- + + def _save_todos(self, todos: list[Todo]) -> None: + """Persist todos to the appropriate state file.""" + items = [TodoItemState(title=t.title, status=t.status) for t in todos] + + if self._runtime.role == "root": + self._save_root_todos(items) + else: + self._save_subagent_todos(items) + + def _load_todos(self) -> list[Todo]: + """Load todos from the appropriate state file.""" + if self._runtime.role == "root": + return self._load_root_todos() + else: + return self._load_subagent_todos() + + def _save_root_todos(self, items: list[TodoItemState]) -> None: + from kimi_cli.session_state import load_session_state, save_session_state + + session = self._runtime.session + # Read-modify-write to avoid overwriting concurrent changes + fresh = load_session_state(session.dir) + fresh.todos = items + save_session_state(fresh, session.dir) + session.state.todos = items + + def _load_root_todos(self) -> list[Todo]: + todos = self._runtime.session.state.todos + return [Todo(title=t.title, status=t.status) for t in todos] + + def _save_subagent_todos(self, items: list[TodoItemState]) -> None: + state_file = self._subagent_state_file() + if state_file is None: + return + data = self._read_subagent_state(state_file) + data["todos"] = [item.model_dump() for item in items] + self._write_subagent_state(state_file, data) + + def _load_subagent_todos(self) -> list[Todo]: + state_file = self._subagent_state_file() + if state_file is None: + return [] + data = self._read_subagent_state(state_file) + raw_todos = data.get("todos", []) + result: list[Todo] = [] + for item in raw_todos: + try: + result.append(Todo(**item)) + except Exception: + logger.warning("Skipping malformed todo item in subagent state: {item}", item=item) + return result + + def _subagent_state_file(self) -> Path | None: + store = self._runtime.subagent_store + agent_id = self._runtime.subagent_id + if store is None or agent_id is None: + return None + return store.instance_dir(agent_id) / "state.json" + + @staticmethod + def _read_subagent_state(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + try: + return json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + logger.warning("Corrupted subagent todo state, using defaults: {path}", path=path) + return {} + + @staticmethod + def _write_subagent_state(path: Path, data: dict[str, Any]) -> None: + from kimi_cli.utils.io import atomic_json_write + + path.parent.mkdir(parents=True, exist_ok=True) + atomic_json_write(data, path) diff --git a/src/kimi_cli/tools/todo/set_todo_list.md b/src/kimi_cli/tools/todo/set_todo_list.md index d28889940..441dd8d3a 100644 --- a/src/kimi_cli/tools/todo/set_todo_list.md +++ b/src/kimi_cli/tools/todo/set_todo_list.md @@ -1,8 +1,14 @@ -Update the whole todo list. +Manage your todo list for tracking task progress. Todo list is a simple yet powerful tool to help you get things done. You typically want to use this tool when the given task involves multiple subtasks/milestones, or, multiple tasks are given in a single request. This tool can help you to break down the task and track the progress. -This is the only todo list tool available to you. That said, each time you want to operate on the todo list, you need to update the whole. Make sure to maintain the todo items and their statuses properly. +**Usage modes:** + +- **Update mode**: Pass `todos` to set the entire todo list. The previous list is replaced. +- **Query mode**: Omit `todos` (or pass null) to retrieve the current todo list without changes. +- **Clear mode**: Pass an empty array `[]` to clear all todos. + +This is the only todo list tool available to you. That said, each time you want to update the todo list, you need to provide the whole list. Make sure to maintain the todo items and their statuses properly. Once you finished a subtask/milestone, remember to update the todo list to reflect the progress. Also, you can give yourself a self-encouragement to keep you motivated. @@ -13,3 +19,5 @@ Abusing this tool to track too small steps will just waste your time and make yo - When the user prompt is very specific and the only thing you need to do is brainlessly following the instructions. E.g. "Replace xxx to yyy in the file zzz", "Create a file xxx with content yyy." However, do not get stuck in a rut. Be flexible. Sometimes, you may try to use todo list at first, then realize the task is too simple and you can simply stop using it; or, sometimes, you may realize the task is complex after a few steps and then you can start using todo list to break it down. + +IMPORTANT: Do not call this tool repeatedly without making real progress on at least one task between calls. If you are unsure about the current state, use Query mode (omit `todos`) to check before updating. If you find yourself unable to advance any task with your available tools, inform the user about what is blocking you instead of replanning. Repeatedly updating the todo list without doing actual work is counterproductive. diff --git a/tests/conftest.py b/tests/conftest.py index 3ad430136..ef5c7ad03 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -249,9 +249,9 @@ def think_tool() -> Think: @pytest.fixture -def set_todo_list_tool() -> SetTodoList: +def set_todo_list_tool(runtime: Runtime) -> SetTodoList: """Create a SetTodoList tool instance.""" - return SetTodoList() + return SetTodoList(runtime) @pytest.fixture diff --git a/tests/core/test_session.py b/tests/core/test_session.py index 8f5367e83..dc45fb9a9 100644 --- a/tests/core/test_session.py +++ b/tests/core/test_session.py @@ -359,6 +359,29 @@ async def test_save_state_reload_does_not_lose_worker_fields( assert result.custom_title == "External Title" +async def test_save_state_preserves_external_todos(isolated_share_dir: Path, work_dir: KaosPath): + """save_state() should not overwrite todos written externally (e.g., by SetTodoList).""" + from kimi_cli.session_state import TodoItemState, load_session_state, save_session_state + + session = await Session.create(work_dir) + + # Simulate SetTodoList writing todos directly to disk (as _save_root_todos does) + fresh = load_session_state(session.dir) + fresh.todos = [TodoItemState(title="External todo", status="pending")] + save_session_state(fresh, session.dir) + + # Worker's in-memory state still has empty todos + assert session.state.todos == [] + + # Worker saves — should reload todos from disk, not overwrite with empty list + session.save_state() + + result = load_session_state(session.dir) + assert len(result.todos) == 1 + assert result.todos[0].title == "External todo" + assert result.todos[0].status == "pending" + + async def test_is_empty_with_only_metadata_records( isolated_share_dir: Path, work_dir: KaosPath ) -> None: diff --git a/tests/tools/test_todo.py b/tests/tools/test_todo.py new file mode 100644 index 000000000..f5ca1804b --- /dev/null +++ b/tests/tools/test_todo.py @@ -0,0 +1,248 @@ +"""Tests for SetTodoList tool.""" + +from __future__ import annotations + +import pytest + +from kimi_cli.soul.agent import Runtime +from kimi_cli.tools.todo import Params, SetTodoList, Todo + + +@pytest.fixture +def set_todo_list_tool(runtime: Runtime) -> SetTodoList: + """Create a SetTodoList tool instance with runtime.""" + return SetTodoList(runtime) + + +class TestSetTodoListOutputNotEmpty: + """Regression test for issue #1710: SetTodoList storm. + + The root cause is that SetTodoList returned output="" which meant the model + only saw 'Todo list updated' — no confirmation of what it + saved. This led to repeated calls (a "storm") especially when Shell was disabled. + """ + + async def test_write_mode_returns_nonempty_output(self, set_todo_list_tool: SetTodoList): + """When todos are provided, the tool must return a non-empty output + so the model gets meaningful feedback (not just 'Todo list updated').""" + params = Params( + todos=[ + Todo(title="Analyze code", status="pending"), + Todo(title="Write tests", status="in_progress"), + Todo(title="Read requirements", status="done"), + ] + ) + result = await set_todo_list_tool(params) + assert not result.is_error + # The critical assertion: output must NOT be empty + assert result.output != "", ( + "SetTodoList output must not be empty — this is the root cause of issue #1710. " + "The model needs to see confirmation of the todo state it just set." + ) + assert result.message == "Todo list updated" + + async def test_read_mode_returns_current_todos(self, set_todo_list_tool: SetTodoList): + """When no todos are provided (None), the tool should return the current + todo list from persistent storage, including status.""" + # First write some todos + write_params = Params( + todos=[ + Todo(title="Task A", status="pending"), + Todo(title="Task B", status="done"), + ] + ) + await set_todo_list_tool(write_params) + + # Then read without providing todos + read_params = Params(todos=None) + result = await set_todo_list_tool(read_params) + assert not result.is_error + assert "Task A" in result.output + assert "Task B" in result.output + assert "pending" in result.output + assert "done" in result.output + + async def test_read_mode_empty_list(self, set_todo_list_tool: SetTodoList): + """Reading with no prior todos should return a clear empty message.""" + read_params = Params(todos=None) + result = await set_todo_list_tool(read_params) + assert not result.is_error + assert result.output # non-empty even when no todos + + async def test_write_empty_list_clears_todos(self, set_todo_list_tool: SetTodoList): + """Passing an empty list [] should clear all todos.""" + # Write some todos first + write_params = Params(todos=[Todo(title="Task A", status="pending")]) + await set_todo_list_tool(write_params) + + # Clear with empty list + clear_params = Params(todos=[]) + result = await set_todo_list_tool(clear_params) + assert not result.is_error + assert result.output == "Todo list updated" + + # Verify cleared + read_params = Params(todos=None) + result = await set_todo_list_tool(read_params) + assert isinstance(result.output, str) + assert "empty" in result.output.lower() or result.output.strip() == "Todo list is empty." + + async def test_root_todos_persisted_to_disk( + self, set_todo_list_tool: SetTodoList, runtime: Runtime + ): + """Write mode should persist todos to disk via SessionState.""" + from kimi_cli.session_state import load_session_state + + params = Params( + todos=[ + Todo(title="Disk task", status="in_progress"), + Todo(title="Another task", status="done"), + ] + ) + await set_todo_list_tool(params) + + # Verify by loading directly from disk, bypassing in-memory state + disk_state = load_session_state(runtime.session.dir) + assert len(disk_state.todos) == 2 + assert disk_state.todos[0].title == "Disk task" + assert disk_state.todos[0].status == "in_progress" + assert disk_state.todos[1].title == "Another task" + assert disk_state.todos[1].status == "done" + + async def test_write_mode_display_block(self, set_todo_list_tool: SetTodoList): + """Write mode should still produce TodoDisplayBlock for UI rendering.""" + from kimi_cli.tools.display import TodoDisplayBlock + + params = Params(todos=[Todo(title="UI task", status="pending")]) + result = await set_todo_list_tool(params) + assert len(result.display) == 1 + assert isinstance(result.display[0], TodoDisplayBlock) + assert result.display[0].items[0].title == "UI task" + + async def test_read_mode_no_display_block(self, set_todo_list_tool: SetTodoList): + """Read mode should not produce display blocks (no UI side-effect).""" + read_params = Params(todos=None) + result = await set_todo_list_tool(read_params) + assert result.display == [] + + +class TestSetTodoListSubagent: + """Test SetTodoList behavior in subagent context.""" + + async def test_subagent_uses_independent_storage(self, runtime: Runtime): + """Subagent todos should be stored independently from root agent.""" + # Create root tool and set a todo + root_tool = SetTodoList(runtime) + await root_tool(Params(todos=[Todo(title="Root task", status="pending")])) + + # Create a subagent runtime + subagent_runtime = runtime.copy_for_subagent( + agent_id="test-sub-1", + subagent_type="coder", + ) + # Initialize the subagent instance directory + assert subagent_runtime.subagent_store is not None + subagent_runtime.subagent_store.instance_dir("test-sub-1", create=True) + + sub_tool = SetTodoList(subagent_runtime) + + # Subagent should start with empty todos + result = await sub_tool(Params(todos=None)) + assert isinstance(result.output, str) + assert "empty" in result.output.lower() or "Root task" not in result.output + + # Subagent writes its own todo + await sub_tool(Params(todos=[Todo(title="Sub task", status="in_progress")])) + result = await sub_tool(Params(todos=None)) + assert "Sub task" in result.output + + # Root agent should still have its own todo + result = await root_tool(Params(todos=None)) + assert "Root task" in result.output + assert "Sub task" not in result.output + + async def test_subagent_no_store_or_id_graceful(self, runtime: Runtime): + """When subagent_store or subagent_id is None, save is a no-op and load returns empty.""" + subagent_runtime = runtime.copy_for_subagent( + agent_id="test-sub-2", + subagent_type="coder", + ) + # Force store/id to None to simulate edge case + subagent_runtime.subagent_store = None + subagent_runtime.subagent_id = None + + tool = SetTodoList(subagent_runtime) + + # Write should silently succeed (no-op) + result = await tool(Params(todos=[Todo(title="Ghost task", status="pending")])) + assert not result.is_error + assert result.output == "Todo list updated" + + # Read should return empty + result = await tool(Params(todos=None)) + assert not result.is_error + assert isinstance(result.output, str) + assert "empty" in result.output.lower() + + async def test_corrupted_subagent_state_file(self, runtime: Runtime): + """Corrupted subagent state.json should be handled gracefully.""" + subagent_runtime = runtime.copy_for_subagent( + agent_id="test-sub-3", + subagent_type="coder", + ) + assert subagent_runtime.subagent_store is not None + instance_dir = subagent_runtime.subagent_store.instance_dir("test-sub-3", create=True) + + # Write corrupted JSON to state.json + state_file = instance_dir / "state.json" + state_file.write_text("not valid json {{{", encoding="utf-8") + + tool = SetTodoList(subagent_runtime) + + # Read should return empty (corrupted file treated as empty) + result = await tool(Params(todos=None)) + assert not result.is_error + assert isinstance(result.output, str) + assert "empty" in result.output.lower() + + # Write should overwrite the corrupted file successfully + result = await tool(Params(todos=[Todo(title="Recovery task", status="pending")])) + assert not result.is_error + + # Verify recovery + result = await tool(Params(todos=None)) + assert "Recovery task" in result.output + + async def test_subagent_malformed_individual_item(self, runtime: Runtime): + """Malformed individual items in state.json should be skipped, valid ones preserved.""" + import json + + subagent_runtime = runtime.copy_for_subagent( + agent_id="test-sub-4", + subagent_type="coder", + ) + assert subagent_runtime.subagent_store is not None + instance_dir = subagent_runtime.subagent_store.instance_dir("test-sub-4", create=True) + + # Write JSON with one valid and one invalid todo item + state_file = instance_dir / "state.json" + state_file.write_text( + json.dumps( + { + "todos": [ + {"title": "Valid task", "status": "pending"}, + {"bad": "item"}, # missing title and status + {"title": "Also valid", "status": "done"}, + ] + } + ), + encoding="utf-8", + ) + + tool = SetTodoList(subagent_runtime) + result = await tool(Params(todos=None)) + assert not result.is_error + assert "Valid task" in result.output + assert "Also valid" in result.output + # The malformed item should be silently skipped + assert "bad" not in result.output diff --git a/tests/tools/test_tool_descriptions.py b/tests/tools/test_tool_descriptions.py index 90d7128e9..398ef7bc6 100644 --- a/tests/tools/test_tool_descriptions.py +++ b/tests/tools/test_tool_descriptions.py @@ -107,11 +107,17 @@ def test_set_todo_list_description(set_todo_list_tool: SetTodoList): """Test the description of SetTodoList tool.""" assert set_todo_list_tool.base.description == snapshot( """\ -Update the whole todo list. +Manage your todo list for tracking task progress. Todo list is a simple yet powerful tool to help you get things done. You typically want to use this tool when the given task involves multiple subtasks/milestones, or, multiple tasks are given in a single request. This tool can help you to break down the task and track the progress. -This is the only todo list tool available to you. That said, each time you want to operate on the todo list, you need to update the whole. Make sure to maintain the todo items and their statuses properly. +**Usage modes:** + +- **Update mode**: Pass `todos` to set the entire todo list. The previous list is replaced. +- **Query mode**: Omit `todos` (or pass null) to retrieve the current todo list without changes. +- **Clear mode**: Pass an empty array `[]` to clear all todos. + +This is the only todo list tool available to you. That said, each time you want to update the todo list, you need to provide the whole list. Make sure to maintain the todo items and their statuses properly. Once you finished a subtask/milestone, remember to update the todo list to reflect the progress. Also, you can give yourself a self-encouragement to keep you motivated. @@ -122,6 +128,8 @@ def test_set_todo_list_description(set_todo_list_tool: SetTodoList): - When the user prompt is very specific and the only thing you need to do is brainlessly following the instructions. E.g. "Replace xxx to yyy in the file zzz", "Create a file xxx with content yyy." However, do not get stuck in a rut. Be flexible. Sometimes, you may try to use todo list at first, then realize the task is too simple and you can simply stop using it; or, sometimes, you may realize the task is complex after a few steps and then you can start using todo list to break it down. + +IMPORTANT: Do not call this tool repeatedly without making real progress on at least one task between calls. If you are unsure about the current state, use Query mode (omit `todos`) to check before updating. If you find yourself unable to advance any task with your available tools, inform the user about what is blocking you instead of replanning. Repeatedly updating the todo list without doing actual work is counterproductive. """ ) diff --git a/tests/tools/test_tool_schemas.py b/tests/tools/test_tool_schemas.py index 82852e5ea..32c736ec4 100644 --- a/tests/tools/test_tool_schemas.py +++ b/tests/tools/test_tool_schemas.py @@ -108,27 +108,32 @@ def test_set_todo_list_params_schema(set_todo_list_tool: SetTodoList): { "properties": { "todos": { - "description": "The updated todo list", - "items": { - "properties": { - "title": { - "description": "The title of the todo", - "minLength": 1, - "type": "string", - }, - "status": { - "description": "The status of the todo", - "enum": ["pending", "in_progress", "done"], - "type": "string", + "anyOf": [ + { + "items": { + "properties": { + "title": { + "description": "The title of the todo", + "minLength": 1, + "type": "string", + }, + "status": { + "description": "The status of the todo", + "enum": ["pending", "in_progress", "done"], + "type": "string", + }, + }, + "required": ["title", "status"], + "type": "object", }, + "type": "array", }, - "required": ["title", "status"], - "type": "object", - }, - "type": "array", + {"type": "null"}, + ], + "default": None, + "description": "The updated todo list. If not provided, returns the current todo list without making changes.", } }, - "required": ["todos"], "type": "object", } ) diff --git a/tests_e2e/test_wire_approvals_tools.py b/tests_e2e/test_wire_approvals_tools.py index b2b5df9c8..19a41d3c6 100644 --- a/tests_e2e/test_wire_approvals_tools.py +++ b/tests_e2e/test_wire_approvals_tools.py @@ -894,7 +894,7 @@ def test_display_block_todo(tmp_path) -> None: "tool_call_id": "tc-1", "return_value": { "is_error": False, - "output": "", + "output": "Todo list updated", "message": "Todo list updated", "display": [ {"type": "todo", "items": [{"title": "one", "status": "pending"}]} @@ -1012,7 +1012,7 @@ def test_tool_call_part_streaming(tmp_path) -> None: "tool_call_id": "tc-1", "return_value": { "is_error": False, - "output": "", + "output": "Todo list updated", "message": "Todo list updated", "display": [ {"type": "todo", "items": [{"title": "a", "status": "pending"}]} diff --git a/tests_e2e/test_wire_prompt.py b/tests_e2e/test_wire_prompt.py index 047139856..d6058717a 100644 --- a/tests_e2e/test_wire_prompt.py +++ b/tests_e2e/test_wire_prompt.py @@ -304,7 +304,7 @@ def test_max_steps_reached(tmp_path) -> None: "tool_call_id": "tc-1", "return_value": { "is_error": False, - "output": "", + "output": "Todo list updated", "message": "Todo list updated", "display": [ { From c2e201553e3d6221819a63e60ea6288a97cd07c7 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 15:10:22 +0800 Subject: [PATCH 2/7] fix(todo): load root todos from disk and harden subagent state parsing - Load root todos from disk in query mode to ensure freshness - Catch UnicodeDecodeError in subagent state file reading - Validate that parsed JSON is a dict before using it - Add cast for pyright type narrowing after isinstance check Addresses PR review feedback from #1742 --- src/kimi_cli/tools/todo/__init__.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/kimi_cli/tools/todo/__init__.py b/src/kimi_cli/tools/todo/__init__.py index 5310d706a..ef85911f7 100644 --- a/src/kimi_cli/tools/todo/__init__.py +++ b/src/kimi_cli/tools/todo/__init__.py @@ -1,6 +1,6 @@ import json from pathlib import Path -from typing import Any, Literal, override +from typing import Any, Literal, cast, override from kosong.tooling import CallableTool2, ToolReturnValue from pydantic import BaseModel, Field @@ -108,8 +108,12 @@ def _save_root_todos(self, items: list[TodoItemState]) -> None: session.state.todos = items def _load_root_todos(self) -> list[Todo]: - todos = self._runtime.session.state.todos - return [Todo(title=t.title, status=t.status) for t in todos] + from kimi_cli.session_state import load_session_state + + session = self._runtime.session + fresh = load_session_state(session.dir) + session.state.todos = fresh.todos + return [Todo(title=t.title, status=t.status) for t in fresh.todos] def _save_subagent_todos(self, items: list[TodoItemState]) -> None: state_file = self._subagent_state_file() @@ -145,10 +149,14 @@ def _read_subagent_state(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: - return json.loads(path.read_text(encoding="utf-8")) - except (json.JSONDecodeError, OSError): + data = json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError, UnicodeDecodeError): logger.warning("Corrupted subagent todo state, using defaults: {path}", path=path) return {} + if not isinstance(data, dict): + logger.warning("Invalid subagent todo state type, using defaults: {path}", path=path) + return {} + return cast(dict[str, Any], data) @staticmethod def _write_subagent_state(path: Path, data: dict[str, Any]) -> None: From 08336f47178203d34f9d37e6c45764f106428785 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 15:29:38 +0800 Subject: [PATCH 3/7] fix(todo): validate todos value is a list before iteration Guard against non-list values in subagent state.json (e.g. {"todos": null}) that would cause TypeError during iteration. Addresses PR review feedback from #1742 --- src/kimi_cli/tools/todo/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kimi_cli/tools/todo/__init__.py b/src/kimi_cli/tools/todo/__init__.py index ef85911f7..8e7f5679f 100644 --- a/src/kimi_cli/tools/todo/__init__.py +++ b/src/kimi_cli/tools/todo/__init__.py @@ -128,7 +128,8 @@ def _load_subagent_todos(self) -> list[Todo]: if state_file is None: return [] data = self._read_subagent_state(state_file) - raw_todos = data.get("todos", []) + raw_todos_val = data.get("todos", []) + raw_todos = cast(list[Any], raw_todos_val) if isinstance(raw_todos_val, list) else [] result: list[Todo] = [] for item in raw_todos: try: From 6bd46e7a31b0dabbdcb4cbe8dbbac4095fc59aa8 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 16:04:19 +0800 Subject: [PATCH 4/7] fix(todo): use Session.save_state() for root todos persistence Replace standalone read-modify-write in _save_root_todos with session.save_state() to reuse existing merge semantics and avoid overwriting worker-owned fields. Remove todos from externally-mutable field refresh in save_state() since todos are worker-owned, not externally-mutable. Addresses PR review feedback from #1742 --- src/kimi_cli/session.py | 1 - src/kimi_cli/tools/todo/__init__.py | 7 +------ tests/core/test_session.py | 20 +++++++------------- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/src/kimi_cli/session.py b/src/kimi_cli/session.py index a0ab72dee..cdef6c0e4 100644 --- a/src/kimi_cli/session.py +++ b/src/kimi_cli/session.py @@ -94,7 +94,6 @@ def save_state(self) -> None: self.state.archived = fresh.archived self.state.archived_at = fresh.archived_at self.state.auto_archive_exempt = fresh.auto_archive_exempt - self.state.todos = fresh.todos save_session_state(self.state, self.dir) async def delete(self) -> None: diff --git a/src/kimi_cli/tools/todo/__init__.py b/src/kimi_cli/tools/todo/__init__.py index 8e7f5679f..af6323338 100644 --- a/src/kimi_cli/tools/todo/__init__.py +++ b/src/kimi_cli/tools/todo/__init__.py @@ -98,14 +98,9 @@ def _load_todos(self) -> list[Todo]: return self._load_subagent_todos() def _save_root_todos(self, items: list[TodoItemState]) -> None: - from kimi_cli.session_state import load_session_state, save_session_state - session = self._runtime.session - # Read-modify-write to avoid overwriting concurrent changes - fresh = load_session_state(session.dir) - fresh.todos = items - save_session_state(fresh, session.dir) session.state.todos = items + session.save_state() def _load_root_todos(self) -> list[Todo]: from kimi_cli.session_state import load_session_state diff --git a/tests/core/test_session.py b/tests/core/test_session.py index dc45fb9a9..0bda4794a 100644 --- a/tests/core/test_session.py +++ b/tests/core/test_session.py @@ -359,26 +359,20 @@ async def test_save_state_reload_does_not_lose_worker_fields( assert result.custom_title == "External Title" -async def test_save_state_preserves_external_todos(isolated_share_dir: Path, work_dir: KaosPath): - """save_state() should not overwrite todos written externally (e.g., by SetTodoList).""" - from kimi_cli.session_state import TodoItemState, load_session_state, save_session_state +async def test_save_state_preserves_in_memory_todos(isolated_share_dir: Path, work_dir: KaosPath): + """save_state() should persist in-memory todos (worker-owned) to disk.""" + from kimi_cli.session_state import TodoItemState, load_session_state session = await Session.create(work_dir) - # Simulate SetTodoList writing todos directly to disk (as _save_root_todos does) - fresh = load_session_state(session.dir) - fresh.todos = [TodoItemState(title="External todo", status="pending")] - save_session_state(fresh, session.dir) - - # Worker's in-memory state still has empty todos - assert session.state.todos == [] - - # Worker saves — should reload todos from disk, not overwrite with empty list + # Simulate SetTodoList setting todos in memory before calling save_state() + session.state.todos = [TodoItemState(title="Worker todo", status="pending")] session.save_state() + # Verify todos were persisted to disk result = load_session_state(session.dir) assert len(result.todos) == 1 - assert result.todos[0].title == "External todo" + assert result.todos[0].title == "Worker todo" assert result.todos[0].status == "pending" From 2acdcac3f34f59395297cb9df57d1623f42b741a Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 16:12:11 +0800 Subject: [PATCH 5/7] docs: add changelog entry for SetTodoList refactoring (#1710) --- CHANGELOG.md | 2 ++ docs/en/release-notes/changelog.md | 2 ++ docs/zh/release-notes/changelog.md | 2 ++ 3 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50f93a4ea..5e5255830 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Only write entries that are worth mentioning to users. ## Unreleased +- Todo: Refactor SetTodoList to persist state and prevent tool call storms — todos are now persisted to session state (root agent) and independent state files (sub-agents); adds query mode (omit `todos` to read current state) and clear mode (pass `[]`); includes anti-storm guidance in tool description to prevent repeated calls without progress (fixes #1710) + ## 1.30.0 (2026-04-02) - Shell: Refine idle background completion auto-trigger — resumed shell sessions no longer auto-start a foreground turn from stale pending background notifications before the user sends a message, and fresh background completions now wait briefly while the user is actively typing to avoid stealing the prompt or breaking CJK IME composition diff --git a/docs/en/release-notes/changelog.md b/docs/en/release-notes/changelog.md index 4b0e3a2be..074f38571 100644 --- a/docs/en/release-notes/changelog.md +++ b/docs/en/release-notes/changelog.md @@ -4,6 +4,8 @@ This page documents the changes in each Kimi Code CLI release. ## Unreleased +- Todo: Refactor SetTodoList to persist state and prevent tool call storms — todos are now persisted to session state (root agent) and independent state files (sub-agents); adds query mode (omit `todos` to read current state) and clear mode (pass `[]`); includes anti-storm guidance in tool description to prevent repeated calls without progress (fixes #1710) + ## 1.30.0 (2026-04-02) - Shell: Refine idle background completion auto-trigger — resumed shell sessions no longer auto-start a foreground turn from stale pending background notifications before the user sends a message, and fresh background completions now wait briefly while the user is actively typing to avoid stealing the prompt or breaking CJK IME composition diff --git a/docs/zh/release-notes/changelog.md b/docs/zh/release-notes/changelog.md index e88ceba79..f92da1646 100644 --- a/docs/zh/release-notes/changelog.md +++ b/docs/zh/release-notes/changelog.md @@ -4,6 +4,8 @@ ## 未发布 +- Todo:重构 `SetTodoList` 工具,支持状态持久化并防止工具调用风暴——待办事项现在会持久化到会话状态(主 Agent)和独立状态文件(子 Agent);新增查询模式(省略 `todos` 参数可读取当前状态)和清空模式(传 `[]` 清空);工具描述中增加了防风暴指导,防止在没有实际进展的情况下反复调用(修复 #1710) + ## 1.30.0 (2026-04-02) - Shell:细化空闲时后台完成的自动触发行为——恢复的 Shell 会话在用户发送消息前,不会因为历史遗留的后台通知而自动启动新的前景轮次;当用户正在输入时,新的后台完成事件也会短暂延后触发,避免抢占提示符或打断 CJK 输入法组合态 From cfe8c25f0a4d351a023806f1c317b10ddbb04249 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 16:14:44 +0800 Subject: [PATCH 6/7] fix(todo): handle malformed root todo items gracefully in query mode Add per-item try/except in _load_root_todos to skip invalid items (e.g. empty titles) instead of crashing query mode, consistent with the subagent loading path. Addresses PR review feedback from #1742 --- src/kimi_cli/tools/todo/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/kimi_cli/tools/todo/__init__.py b/src/kimi_cli/tools/todo/__init__.py index af6323338..2d5575cc9 100644 --- a/src/kimi_cli/tools/todo/__init__.py +++ b/src/kimi_cli/tools/todo/__init__.py @@ -108,7 +108,13 @@ def _load_root_todos(self) -> list[Todo]: session = self._runtime.session fresh = load_session_state(session.dir) session.state.todos = fresh.todos - return [Todo(title=t.title, status=t.status) for t in fresh.todos] + result: list[Todo] = [] + for t in fresh.todos: + try: + result.append(Todo(title=t.title, status=t.status)) + except Exception: + logger.warning("Skipping malformed todo item in root state: {t}", t=t) + return result def _save_subagent_todos(self, items: list[TodoItemState]) -> None: state_file = self._subagent_state_file() From 2e14f61d328cd8db795475b984b7ad1426c542c4 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 16:28:36 +0800 Subject: [PATCH 7/7] style(todo): simplify TodoItemState default_factory to plain list Replace misleading lambda: list[TodoItemState]() with default_factory=list for consistency with other list fields in SessionState. --- src/kimi_cli/session_state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kimi_cli/session_state.py b/src/kimi_cli/session_state.py index 8cc4ba3b4..b8f031cde 100644 --- a/src/kimi_cli/session_state.py +++ b/src/kimi_cli/session_state.py @@ -40,7 +40,7 @@ class SessionState(BaseModel): archived_at: float | None = None auto_archive_exempt: bool = False # Todo list state - todos: list[TodoItemState] = Field(default_factory=lambda: list[TodoItemState]()) + todos: list[TodoItemState] = Field(default_factory=list) # pyright: ignore[reportUnknownVariableType] _LEGACY_METADATA_FILENAME = "metadata.json"