From ecd6f638a87fd4e90a8dec95d492ff0f356295f1 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 14:38:01 +0800 Subject: [PATCH 1/4] feat(tools): add totalLines and tail mode to ReadFile tool Return total line count in ReadFile response message so the model knows file length on every read. Support negative line_offset for tail mode (e.g. line_offset=-100 reads last 100 lines) using a deque sliding window. Cap abs(line_offset) at MAX_LINES to prevent unbounded memory. --- src/kimi_cli/tools/file/read.md | 2 + src/kimi_cli/tools/file/read.py | 196 +++++++++++++++++++------- tests/tools/test_read_file.py | 182 +++++++++++++++++++++--- tests/tools/test_tool_descriptions.py | 2 + tests/tools/test_tool_schemas.py | 3 +- 5 files changed, 314 insertions(+), 71 deletions(-) diff --git a/src/kimi_cli/tools/file/read.md b/src/kimi_cli/tools/file/read.md index 57e08a24c..4ea7be59b 100644 --- a/src/kimi_cli/tools/file/read.md +++ b/src/kimi_cli/tools/file/read.md @@ -10,5 +10,7 @@ Read text content from a file. - If you want to search for a certain content/pattern, prefer Grep tool over ReadFile. - Content will be returned with a line number before each line like `cat -n` format. - Use `line_offset` and `n_lines` parameters when you only need to read a part of the file. +- Use negative `line_offset` to read from the end of the file (e.g. `line_offset=-100` reads the last 100 lines). This is useful for viewing the tail of log files. The absolute value cannot exceed ${MAX_LINES}. +- The tool always returns the total number of lines in the file in its message, which you can use to plan subsequent reads. - The maximum number of lines that can be read at once is ${MAX_LINES}. - Any lines longer than ${MAX_LINE_LENGTH} characters will be truncated, ending with "...". diff --git a/src/kimi_cli/tools/file/read.py b/src/kimi_cli/tools/file/read.py index eef3a8acc..f840fb67c 100644 --- a/src/kimi_cli/tools/file/read.py +++ b/src/kimi_cli/tools/file/read.py @@ -1,9 +1,10 @@ +from collections import deque from pathlib import Path from typing import override from kaos.path import KaosPath from kosong.tooling import CallableTool2, ToolError, ToolOk, ToolReturnValue -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator from kimi_cli.soul.agent import Runtime from kimi_cli.tools.file.utils import MEDIA_SNIFF_BYTES, detect_file_type @@ -27,10 +28,11 @@ class Params(BaseModel): description=( "The line number to start reading from. " "By default read from the beginning of the file. " - "Set this when the file is too large to read at once." + "Set this when the file is too large to read at once. " + "Negative values read from the end of the file (e.g. -100 reads the last 100 lines). " + f"The absolute value of negative offset cannot exceed {MAX_LINES}." ), default=1, - ge=1, ) n_lines: int = Field( description=( @@ -42,6 +44,20 @@ class Params(BaseModel): ge=1, ) + @model_validator(mode="after") + def _validate_line_offset(self) -> "Params": + if self.line_offset == 0: + raise ValueError( + "line_offset cannot be 0; use 1 for the first line or -1 for the last line" + ) + if self.line_offset < -MAX_LINES: + raise ValueError( + f"line_offset cannot be less than -{MAX_LINES}. " + "Use a positive line_offset with the total line count " + "to read from a specific position." + ) + return self + class ReadFile(CallableTool2[Params]): name: str = "ReadFile" @@ -138,60 +154,134 @@ async def __call__(self, params: Params) -> ToolReturnValue: brief="File not readable", ) - assert params.line_offset >= 1 assert params.n_lines >= 1 + assert params.line_offset != 0 - lines: list[str] = [] - n_bytes = 0 - truncated_line_numbers: list[int] = [] - max_lines_reached = False - max_bytes_reached = False - current_line_no = 0 - async for line in p.read_lines(errors="replace"): - current_line_no += 1 - if current_line_no < params.line_offset: - continue - truncated = truncate_line(line, MAX_LINE_LENGTH) - if truncated != line: - truncated_line_numbers.append(current_line_no) - lines.append(truncated) - n_bytes += len(truncated.encode("utf-8")) - if len(lines) >= params.n_lines: - break - if len(lines) >= MAX_LINES: - max_lines_reached = True - break - if n_bytes >= MAX_BYTES: - max_bytes_reached = True - break - - # Format output with line numbers like `cat -n` - lines_with_no: list[str] = [] - for line_num, line in zip( - range(params.line_offset, params.line_offset + len(lines)), lines, strict=True - ): - # Use 6-digit line number width, right-aligned, with tab separator - lines_with_no.append(f"{line_num:6d}\t{line}") - - message = ( - f"{len(lines)} lines read from file starting from line {params.line_offset}." - if len(lines) > 0 - else "No lines read from file." - ) - if max_lines_reached: - message += f" Max {MAX_LINES} lines reached." - elif max_bytes_reached: - message += f" Max {MAX_BYTES} bytes reached." - elif len(lines) < params.n_lines: - message += " End of file reached." - if truncated_line_numbers: - message += f" Lines {truncated_line_numbers} were truncated." - return ToolOk( - output="".join(lines_with_no), # lines already contain \n, just join them - message=message, - ) + if params.line_offset < 0: + return await self._read_tail(p, params) + else: + return await self._read_forward(p, params) except Exception as e: return ToolError( message=f"Failed to read {params.path}. Error: {e}", brief="Failed to read file", ) + + async def _read_forward(self, p: KaosPath, params: Params) -> ToolReturnValue: + """Read file from a positive line_offset, counting total lines.""" + lines: list[str] = [] + n_bytes = 0 + truncated_line_numbers: list[int] = [] + max_lines_reached = False + max_bytes_reached = False + collecting = True # False once we've collected enough lines + current_line_no = 0 + async for line in p.read_lines(errors="replace"): + current_line_no += 1 + if not collecting: + continue + if current_line_no < params.line_offset: + continue + truncated = truncate_line(line, MAX_LINE_LENGTH) + if truncated != line: + truncated_line_numbers.append(current_line_no) + lines.append(truncated) + n_bytes += len(truncated.encode("utf-8")) + if len(lines) >= params.n_lines: + collecting = False + elif len(lines) >= MAX_LINES: + max_lines_reached = True + collecting = False + elif n_bytes >= MAX_BYTES: + max_bytes_reached = True + collecting = False + + total_lines = current_line_no + + # Format output with line numbers like `cat -n` + start_line = params.line_offset + lines_with_no: list[str] = [] + for line_num, line in zip(range(start_line, start_line + len(lines)), lines, strict=True): + lines_with_no.append(f"{line_num:6d}\t{line}") + + message = ( + f"{len(lines)} lines read from file starting from line {start_line}." + if len(lines) > 0 + else "No lines read from file." + ) + message += f" Total lines in file: {total_lines}." + if max_lines_reached: + message += f" Max {MAX_LINES} lines reached." + elif max_bytes_reached: + message += f" Max {MAX_BYTES} bytes reached." + elif len(lines) < params.n_lines: + message += " End of file reached." + if truncated_line_numbers: + message += f" Lines {truncated_line_numbers} were truncated." + return ToolOk( + output="".join(lines_with_no), + message=message, + ) + + async def _read_tail(self, p: KaosPath, params: Params) -> ToolReturnValue: + """Read file from a negative line_offset (tail mode).""" + tail_count = abs(params.line_offset) + + # Use a deque to keep the last `tail_count` lines with their line numbers + # Each entry: (line_no, truncated_line, was_truncated) + tail_buf: deque[tuple[int, str, bool]] = deque(maxlen=tail_count) + current_line_no = 0 + async for line in p.read_lines(errors="replace"): + current_line_no += 1 + truncated = truncate_line(line, MAX_LINE_LENGTH) + tail_buf.append((current_line_no, truncated, truncated != line)) + + total_lines = current_line_no + + # Apply n_lines limit and collect results + lines: list[str] = [] + line_numbers: list[int] = [] + truncated_line_numbers: list[int] = [] + n_bytes = 0 + max_lines_reached = False + max_bytes_reached = False + + for line_no, truncated, was_truncated in tail_buf: + if was_truncated: + truncated_line_numbers.append(line_no) + lines.append(truncated) + line_numbers.append(line_no) + n_bytes += len(truncated.encode("utf-8")) + if len(lines) >= params.n_lines: + break + if len(lines) >= MAX_LINES: + max_lines_reached = True + break + if n_bytes >= MAX_BYTES: + max_bytes_reached = True + break + + # Format output with absolute line numbers + lines_with_no: list[str] = [] + for line_num, line in zip(line_numbers, lines, strict=True): + lines_with_no.append(f"{line_num:6d}\t{line}") + + start_line = line_numbers[0] if line_numbers else total_lines + 1 + message = ( + f"{len(lines)} lines read from file starting from line {start_line}." + if len(lines) > 0 + else "No lines read from file." + ) + message += f" Total lines in file: {total_lines}." + if max_lines_reached: + message += f" Max {MAX_LINES} lines reached." + elif max_bytes_reached: + message += f" Max {MAX_BYTES} bytes reached." + elif len(lines) < params.n_lines: + message += " End of file reached." + if truncated_line_numbers: + message += f" Lines {truncated_line_numbers} were truncated." + return ToolOk( + output="".join(lines_with_no), + message=message, + ) diff --git a/tests/tools/test_read_file.py b/tests/tools/test_read_file.py index 0aa1b6a0d..1ab660bca 100644 --- a/tests/tools/test_read_file.py +++ b/tests/tools/test_read_file.py @@ -44,7 +44,7 @@ async def test_read_entire_file(read_file_tool: ReadFile, sample_file: KaosPath) """ ) assert result.message == snapshot( - "5 lines read from file starting from line 1. End of file reached." + "5 lines read from file starting from line 1. Total lines in file: 5. End of file reached." ) @@ -60,7 +60,7 @@ async def test_read_with_line_offset(read_file_tool: ReadFile, sample_file: Kaos """ ) assert result.message == snapshot( - "3 lines read from file starting from line 3. End of file reached." + "3 lines read from file starting from line 3. Total lines in file: 5. End of file reached." ) @@ -74,7 +74,9 @@ async def test_read_with_n_lines(read_file_tool: ReadFile, sample_file: KaosPath 2 Line 2: This is a test file """ ) - assert result.message == snapshot("2 lines read from file starting from line 1.") + assert result.message == snapshot( + "2 lines read from file starting from line 1. Total lines in file: 5." + ) async def test_read_with_line_offset_and_n_lines(read_file_tool: ReadFile, sample_file: KaosPath): @@ -87,7 +89,9 @@ async def test_read_with_line_offset_and_n_lines(read_file_tool: ReadFile, sampl 3 Line 3: With multiple lines """ ) - assert result.message == snapshot("2 lines read from file starting from line 2.") + assert result.message == snapshot( + "2 lines read from file starting from line 2. Total lines in file: 5." + ) async def test_read_nonexistent_file(read_file_tool: ReadFile, temp_work_dir: KaosPath): @@ -114,7 +118,7 @@ async def test_read_with_relative_path( result = await read_file_tool(Params(path=str(sample_file.relative_to(temp_work_dir)))) assert not result.is_error assert result.message == snapshot( - "5 lines read from file starting from line 1. End of file reached." + "5 lines read from file starting from line 1. Total lines in file: 5. End of file reached." ) assert result.output == snapshot("""\ 1 Line 1: Hello World @@ -147,7 +151,9 @@ async def test_read_empty_file(read_file_tool: ReadFile, temp_work_dir: KaosPath result = await read_file_tool(Params(path=str(empty_file))) assert not result.is_error assert result.output == snapshot("") - assert result.message == snapshot("No lines read from file. End of file reached.") + assert result.message == snapshot( + "No lines read from file. Total lines in file: 0. End of file reached." + ) async def test_read_image_file(read_file_tool: ReadFile, temp_work_dir: KaosPath): @@ -200,7 +206,9 @@ async def test_read_line_offset_beyond_file_length(read_file_tool: ReadFile, sam result = await read_file_tool(Params(path=str(sample_file), line_offset=10)) assert not result.is_error assert result.output == snapshot("") - assert result.message == snapshot("No lines read from file. End of file reached.") + assert result.message == snapshot( + "No lines read from file. Total lines in file: 5. End of file reached." + ) async def test_read_unicode_file(read_file_tool: ReadFile, temp_work_dir: KaosPath): @@ -218,7 +226,7 @@ async def test_read_unicode_file(read_file_tool: ReadFile, temp_work_dir: KaosPa """ ) assert result.message == snapshot( - "2 lines read from file starting from line 1. End of file reached." + "2 lines read from file starting from line 1. Total lines in file: 2. End of file reached." ) @@ -237,7 +245,7 @@ async def test_read_edge_cases(read_file_tool: ReadFile, sample_file: KaosPath): """ ) assert result.message == snapshot( - "5 lines read from file starting from line 1. End of file reached." + "5 lines read from file starting from line 1. Total lines in file: 5. End of file reached." ) # Test reading from line 5 (last line) @@ -245,14 +253,16 @@ async def test_read_edge_cases(read_file_tool: ReadFile, sample_file: KaosPath): assert not result.is_error assert result.output == snapshot(" 5\tLine 5: End of file") assert result.message == snapshot( - "1 lines read from file starting from line 5. End of file reached." + "1 lines read from file starting from line 5. Total lines in file: 5. End of file reached." ) # Test reading with offset and n_lines combined result = await read_file_tool(Params(path=str(sample_file), line_offset=2, n_lines=1)) assert not result.is_error assert result.output == snapshot(" 2\tLine 2: This is a test file\n") - assert result.message == snapshot("1 lines read from file starting from line 2.") + assert result.message == snapshot( + "1 lines read from file starting from line 2. Total lines in file: 5." + ) async def test_line_truncation_and_messaging(read_file_tool: ReadFile, temp_work_dir: KaosPath): @@ -288,8 +298,7 @@ async def test_line_truncation_and_messaging(read_file_tool: ReadFile, temp_work assert not result.is_error assert isinstance(result.output, str) assert result.message == snapshot( - "3 lines read from file starting from line 1. End of file reached. " - "Lines [1, 3] were truncated." + "3 lines read from file starting from line 1. Total lines in file: 3. End of file reached. Lines [1, 3] were truncated." ) # Verify truncation actually happened for specific lines @@ -306,12 +315,21 @@ async def test_line_truncation_and_messaging(read_file_tool: ReadFile, temp_work async def test_parameter_validation_line_offset(read_file_tool: ReadFile, sample_file: KaosPath): """Test that line_offset parameter validation works correctly.""" - # Test line_offset < 1 should be rejected by Pydantic validation + # line_offset=0 is invalid (must be positive or negative, not zero) with pytest.raises(ValueError, match="line_offset"): Params(path=str(sample_file), line_offset=0) + # Negative values are now valid (tail mode) + params = Params(path=str(sample_file), line_offset=-1) + assert params.line_offset == -1 + + # Negative offset exceeding MAX_LINES should be rejected with pytest.raises(ValueError, match="line_offset"): - Params(path=str(sample_file), line_offset=-1) + Params(path=str(sample_file), line_offset=-(MAX_LINES + 1)) + + # Exactly -MAX_LINES should be accepted + params = Params(path=str(sample_file), line_offset=-MAX_LINES) + assert params.line_offset == -MAX_LINES async def test_parameter_validation_n_lines(read_file_tool: ReadFile, sample_file: KaosPath): @@ -377,7 +395,7 @@ async def test_read_with_tilde_path_expansion(read_file_tool: ReadFile, temp_wor assert not result.is_error assert "Test content for tilde expansion" in result.output assert result.message == snapshot( - "1 lines read from file starting from line 1. End of file reached." + "1 lines read from file starting from line 1. Total lines in file: 1. End of file reached." ) finally: # Clean up @@ -406,3 +424,135 @@ async def test_read_allows_non_sensitive_dotfile(read_file_tool: ReadFile, temp_ assert not result.is_error assert "node_modules" in result.output + + +# ── Tests for totalLines and tail (negative offset) ────────────────────────── + + +async def test_read_tail_basic(read_file_tool: ReadFile, sample_file: KaosPath): + """Negative line_offset=-3 on a 5-line file should return the last 3 lines.""" + result = await read_file_tool(Params(path=str(sample_file), line_offset=-3)) + assert not result.is_error + # Should return lines 3, 4, 5 with absolute line numbers + assert " 3\tLine 3: With multiple lines\n" in result.output + assert " 4\tLine 4: For testing purposes\n" in result.output + assert " 5\tLine 5: End of file" in result.output + # Should NOT contain lines 1 or 2 + assert "Line 1:" not in result.output + assert "Line 2:" not in result.output + # Message must include total lines info + assert "Total lines in file: 5." in result.message + + +async def test_read_tail_with_n_lines(read_file_tool: ReadFile, sample_file: KaosPath): + """Negative offset=-5 with n_lines=2 should return 2 lines starting from the tail position.""" + result = await read_file_tool(Params(path=str(sample_file), line_offset=-5, n_lines=2)) + assert not result.is_error + # -5 on a 5-line file means start from line 1, then n_lines=2 limits to lines 1-2 + assert " 1\tLine 1: Hello World\n" in result.output + assert " 2\tLine 2: This is a test file\n" in result.output + assert "Line 3:" not in result.output + assert "Total lines in file: 5." in result.message + + +async def test_read_tail_exceeds_file(read_file_tool: ReadFile, sample_file: KaosPath): + """Negative offset exceeding file length should return the entire file.""" + result = await read_file_tool(Params(path=str(sample_file), line_offset=-100)) + assert not result.is_error + # Should return all 5 lines + assert " 1\tLine 1: Hello World\n" in result.output + assert " 5\tLine 5: End of file" in result.output + assert "Total lines in file: 5." in result.message + + +async def test_read_tail_empty_file(read_file_tool: ReadFile, temp_work_dir: KaosPath): + """Negative offset on an empty file should return nothing with totalLines=0.""" + empty_file = temp_work_dir / "empty_tail.txt" + await empty_file.write_text("") + + result = await read_file_tool(Params(path=str(empty_file), line_offset=-10)) + assert not result.is_error + assert result.output == "" + assert "Total lines in file: 0." in result.message + + +async def test_read_total_lines_with_positive_offset( + read_file_tool: ReadFile, sample_file: KaosPath +): + """Positive offset should also include totalLines in the message.""" + result = await read_file_tool(Params(path=str(sample_file), line_offset=3, n_lines=1)) + assert not result.is_error + # Should return only line 3 + assert " 3\tLine 3: With multiple lines" in result.output + assert "Line 1:" not in result.output + assert "Line 4:" not in result.output + # Message must include total lines even for positive offset + assert "Total lines in file: 5." in result.message + + +async def test_read_tail_last_line(read_file_tool: ReadFile, sample_file: KaosPath): + """line_offset=-1 should return only the last line with correct absolute line number.""" + result = await read_file_tool(Params(path=str(sample_file), line_offset=-1)) + assert not result.is_error + assert result.output == " 5\tLine 5: End of file" + assert "1 lines read from file starting from line 5." in result.message + assert "Total lines in file: 5." in result.message + assert "End of file reached." in result.message + + +async def test_read_tail_max_lines(read_file_tool: ReadFile, temp_work_dir: KaosPath): + """Tail mode with -MAX_LINES on a file larger than MAX_LINES should return MAX_LINES lines.""" + # Create a file with more than MAX_LINES lines + large_file = temp_work_dir / "tail_large.txt" + total = MAX_LINES + 500 # 1500 lines + content = "\n".join([f"Line {i}" for i in range(1, total + 1)]) + await large_file.write_text(content) + + # Use -MAX_LINES (the maximum allowed negative offset) + result = await read_file_tool(Params(path=str(large_file), line_offset=-MAX_LINES)) + assert not result.is_error + assert f"Total lines in file: {total}." in result.message + # deque captures last 1000 lines (501-1500), n_lines defaults to MAX_LINES so all 1000 are output + assert isinstance(result.output, str) + output_lines = [line for line in result.output.split("\n") if line.strip()] + assert len(output_lines) == MAX_LINES + # First line should be line 501 (total - MAX_LINES + 1) + assert output_lines[0].endswith(f"Line {total - MAX_LINES + 1}") + + +async def test_read_tail_max_bytes(read_file_tool: ReadFile, temp_work_dir: KaosPath): + """Tail mode should respect MAX_BYTES boundary.""" + large_file = temp_work_dir / "tail_bytes.txt" + # Each line ~1000 bytes, need > 100KB = ~105 lines to exceed MAX_BYTES + line_content = "B" * 1000 + num_lines = (MAX_BYTES // 1000) + 20 + content = "\n".join([line_content] * num_lines) + await large_file.write_text(content) + + result = await read_file_tool(Params(path=str(large_file), line_offset=-(num_lines))) + assert not result.is_error + assert f"Max {MAX_BYTES} bytes reached" in result.message + assert f"Total lines in file: {num_lines}." in result.message + + +async def test_read_tail_line_truncation(read_file_tool: ReadFile, temp_work_dir: KaosPath): + """Tail mode should correctly report truncated lines via was_truncated flag in deque.""" + trunc_file = temp_work_dir / "tail_truncation.txt" + short_line = "Short line" + long_line = "X" * 2500 # Exceeds MAX_LINE_LENGTH=2000 + # 5 lines: short, long, short, long, short + content = f"{short_line}\n{long_line}\n{short_line}\n{long_line}\n{short_line}" + await trunc_file.write_text(content) + + # Read last 3 lines (lines 3, 4, 5) + result = await read_file_tool(Params(path=str(trunc_file), line_offset=-3)) + assert not result.is_error + assert "Total lines in file: 5." in result.message + # Line 4 is a long line that should be truncated + assert "Lines [4] were truncated." in result.message + # Verify the truncated line ends with "..." + assert isinstance(result.output, str) + output_lines = result.output.split("\n") + line_4 = [x for x in output_lines if x.strip().startswith("4")][0] + actual_content = line_4.split("\t", 1)[1] + assert actual_content.endswith("...") diff --git a/tests/tools/test_tool_descriptions.py b/tests/tools/test_tool_descriptions.py index 90d7128e9..cf7c689e0 100644 --- a/tests/tools/test_tool_descriptions.py +++ b/tests/tools/test_tool_descriptions.py @@ -236,6 +236,8 @@ def test_read_file_description(read_file_tool: ReadFile): - If you want to search for a certain content/pattern, prefer Grep tool over ReadFile. - Content will be returned with a line number before each line like `cat -n` format. - Use `line_offset` and `n_lines` parameters when you only need to read a part of the file. +- Use negative `line_offset` to read from the end of the file (e.g. `line_offset=-100` reads the last 100 lines). This is useful for viewing the tail of log files. The absolute value cannot exceed 1000. +- The tool always returns the total number of lines in the file in its message, which you can use to plan subsequent reads. - The maximum number of lines that can be read at once is 1000. - Any lines longer than 2000 characters will be truncated, ending with "...". """ diff --git a/tests/tools/test_tool_schemas.py b/tests/tools/test_tool_schemas.py index 82852e5ea..41a2fcf73 100644 --- a/tests/tools/test_tool_schemas.py +++ b/tests/tools/test_tool_schemas.py @@ -247,8 +247,7 @@ def test_read_file_params_schema(read_file_tool: ReadFile): }, "line_offset": { "default": 1, - "description": "The line number to start reading from. By default read from the beginning of the file. Set this when the file is too large to read at once.", - "minimum": 1, + "description": "The line number to start reading from. By default read from the beginning of the file. Set this when the file is too large to read at once. Negative values read from the end of the file (e.g. -100 reads the last 100 lines). The absolute value of negative offset cannot exceed 1000.", "type": "integer", }, "n_lines": { From 0e9cdc893375f662215b87cfe15bbc54b07f7411 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 15:03:37 +0800 Subject: [PATCH 2/4] fix(tools): keep newest lines when tail mode hits MAX_BYTES When tail mode exceeds MAX_BYTES, reverse-scan from EOF to determine the byte-safe range so the lines closest to end-of-file are preserved instead of the oldest lines in the tail window. --- src/kimi_cli/tools/file/read.py | 25 +++++++++++++++++-------- tests/tools/test_read_file.py | 22 +++++++++++++++++----- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/kimi_cli/tools/file/read.py b/src/kimi_cli/tools/file/read.py index f840fb67c..3482e85e6 100644 --- a/src/kimi_cli/tools/file/read.py +++ b/src/kimi_cli/tools/file/read.py @@ -238,28 +238,37 @@ async def _read_tail(self, p: KaosPath, params: Params) -> ToolReturnValue: total_lines = current_line_no - # Apply n_lines limit and collect results + # Determine byte-safe range by scanning from the newest (end) of tail_buf. + # This ensures MAX_BYTES truncation keeps the lines closest to EOF. + byte_safe_count = 0 + n_bytes = 0 + for _, truncated, _ in reversed(tail_buf): + n_bytes += len(truncated.encode("utf-8")) + if n_bytes > MAX_BYTES: + break + byte_safe_count += 1 + max_bytes_reached = byte_safe_count < len(tail_buf) + + # Take only the byte-safe suffix of tail_buf + byte_safe_start = len(tail_buf) - byte_safe_count + safe_entries = list(tail_buf)[byte_safe_start:] + + # Apply n_lines / MAX_LINES limits (forward from the start of safe range) lines: list[str] = [] line_numbers: list[int] = [] truncated_line_numbers: list[int] = [] - n_bytes = 0 max_lines_reached = False - max_bytes_reached = False - for line_no, truncated, was_truncated in tail_buf: + for line_no, truncated, was_truncated in safe_entries: if was_truncated: truncated_line_numbers.append(line_no) lines.append(truncated) line_numbers.append(line_no) - n_bytes += len(truncated.encode("utf-8")) if len(lines) >= params.n_lines: break if len(lines) >= MAX_LINES: max_lines_reached = True break - if n_bytes >= MAX_BYTES: - max_bytes_reached = True - break # Format output with absolute line numbers lines_with_no: list[str] = [] diff --git a/tests/tools/test_read_file.py b/tests/tools/test_read_file.py index 1ab660bca..e97312fe4 100644 --- a/tests/tools/test_read_file.py +++ b/tests/tools/test_read_file.py @@ -521,12 +521,13 @@ async def test_read_tail_max_lines(read_file_tool: ReadFile, temp_work_dir: Kaos async def test_read_tail_max_bytes(read_file_tool: ReadFile, temp_work_dir: KaosPath): - """Tail mode should respect MAX_BYTES boundary.""" + """Tail mode MAX_BYTES truncation should keep newest lines (closest to EOF).""" large_file = temp_work_dir / "tail_bytes.txt" - # Each line ~1000 bytes, need > 100KB = ~105 lines to exceed MAX_BYTES - line_content = "B" * 1000 - num_lines = (MAX_BYTES // 1000) + 20 - content = "\n".join([line_content] * num_lines) + # Each line ~1001 bytes (1000 chars + \n), need > 100KB to exceed MAX_BYTES + num_lines = (MAX_BYTES // 1001) + 20 + # Tag each line with its number so we can verify which lines are kept + lines_data = [f"{i:04d}{'B' * 996}" for i in range(1, num_lines + 1)] + content = "\n".join(lines_data) await large_file.write_text(content) result = await read_file_tool(Params(path=str(large_file), line_offset=-(num_lines))) @@ -534,6 +535,17 @@ async def test_read_tail_max_bytes(read_file_tool: ReadFile, temp_work_dir: Kaos assert f"Max {MAX_BYTES} bytes reached" in result.message assert f"Total lines in file: {num_lines}." in result.message + # Verify that the LAST line of the file is included (newest lines kept) + assert isinstance(result.output, str) + output_lines = [x for x in result.output.split("\n") if x.strip()] + last_output = output_lines[-1].split("\t", 1)[1] + assert last_output.startswith(f"{num_lines:04d}"), ( + "MAX_BYTES truncation should keep newest lines closest to EOF" + ) + # Verify that the first output line is NOT line 1 (oldest lines trimmed) + first_output = output_lines[0].split("\t", 1)[1] + assert not first_output.startswith("0001"), "MAX_BYTES truncation should trim oldest lines" + async def test_read_tail_line_truncation(read_file_tool: ReadFile, temp_work_dir: KaosPath): """Tail mode should correctly report truncated lines via was_truncated flag in deque.""" From 396155d21c1193321ab2f88c87c58fe26fa8dfae Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 15:31:09 +0800 Subject: [PATCH 3/4] fix(tools): apply n_lines before byte-cap in tail mode Reorder tail mode limits so n_lines/MAX_LINES is applied first (from head of tail buffer), then MAX_BYTES reverse-scans the selected range. This prevents byte-cap from shifting the start position when n_lines is small enough to fit within the byte budget. --- src/kimi_cli/tools/file/read.py | 48 +++++++++++++++++---------------- tests/tools/test_read_file.py | 32 ++++++++++++++++++++++ 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/src/kimi_cli/tools/file/read.py b/src/kimi_cli/tools/file/read.py index 3482e85e6..9e4e12fee 100644 --- a/src/kimi_cli/tools/file/read.py +++ b/src/kimi_cli/tools/file/read.py @@ -238,37 +238,39 @@ async def _read_tail(self, p: KaosPath, params: Params) -> ToolReturnValue: total_lines = current_line_no - # Determine byte-safe range by scanning from the newest (end) of tail_buf. - # This ensures MAX_BYTES truncation keeps the lines closest to EOF. - byte_safe_count = 0 - n_bytes = 0 - for _, truncated, _ in reversed(tail_buf): - n_bytes += len(truncated.encode("utf-8")) - if n_bytes > MAX_BYTES: - break - byte_safe_count += 1 - max_bytes_reached = byte_safe_count < len(tail_buf) - - # Take only the byte-safe suffix of tail_buf - byte_safe_start = len(tail_buf) - byte_safe_count - safe_entries = list(tail_buf)[byte_safe_start:] - - # Apply n_lines / MAX_LINES limits (forward from the start of safe range) + # Step 1: Apply n_lines / MAX_LINES from head of tail_buf. + # This preserves the user's requested start position. + all_entries = list(tail_buf) + line_limit = min(params.n_lines, MAX_LINES) + candidates = all_entries[:line_limit] + max_lines_reached = len(all_entries) > MAX_LINES and len(candidates) == MAX_LINES + + # Step 2: Apply MAX_BYTES — if candidates exceed the byte budget, + # reverse-scan to keep the newest (closest to EOF) lines that fit. + total_candidate_bytes = sum(len(entry[1].encode("utf-8")) for entry in candidates) + if total_candidate_bytes > MAX_BYTES: + max_bytes_reached = True + kept = 0 + n_bytes = 0 + for entry in reversed(candidates): + n_bytes += len(entry[1].encode("utf-8")) + if n_bytes > MAX_BYTES: + break + kept += 1 + candidates = candidates[len(candidates) - kept :] + else: + max_bytes_reached = False + + # Step 3: Collect results from candidates lines: list[str] = [] line_numbers: list[int] = [] truncated_line_numbers: list[int] = [] - max_lines_reached = False - for line_no, truncated, was_truncated in safe_entries: + for line_no, truncated, was_truncated in candidates: if was_truncated: truncated_line_numbers.append(line_no) lines.append(truncated) line_numbers.append(line_no) - if len(lines) >= params.n_lines: - break - if len(lines) >= MAX_LINES: - max_lines_reached = True - break # Format output with absolute line numbers lines_with_no: list[str] = [] diff --git a/tests/tools/test_read_file.py b/tests/tools/test_read_file.py index e97312fe4..26b0772f0 100644 --- a/tests/tools/test_read_file.py +++ b/tests/tools/test_read_file.py @@ -547,6 +547,38 @@ async def test_read_tail_max_bytes(read_file_tool: ReadFile, temp_work_dir: Kaos assert not first_output.startswith("0001"), "MAX_BYTES truncation should trim oldest lines" +async def test_read_tail_n_lines_not_affected_by_byte_cap( + read_file_tool: ReadFile, temp_work_dir: KaosPath +): + """Small n_lines should not be affected by MAX_BYTES truncation. + + Regression test: line_offset=-N, n_lines=1 on a file with long lines + should return the first line of the tail window, not a line shifted by byte-cap. + """ + large_file = temp_work_dir / "tail_nlines_bytecap.txt" + # Create a file where tail_buf total bytes >> MAX_BYTES but n_lines=1 is fine. + # Each line ~2000 bytes (after truncation), 500 lines total. + num_lines = 500 + lines_data = [f"{i:04d}{'X' * 1996}" for i in range(1, num_lines + 1)] + content = "\n".join(lines_data) + await large_file.write_text(content) + + # Request tail window of 200 lines but only read 1 + result = await read_file_tool(Params(path=str(large_file), line_offset=-200, n_lines=1)) + assert not result.is_error + assert isinstance(result.output, str) + + # The first line of the tail window (last 200 lines) is line 301 + output_lines = [x for x in result.output.split("\n") if x.strip()] + assert len(output_lines) == 1 + line_content = output_lines[0].split("\t", 1)[1] + assert line_content.startswith("0301"), ( + f"Expected line 301 (start of tail window), got content starting with: {line_content[:10]}" + ) + # Should NOT report MAX_BYTES since 1 line is well within budget + assert "Max" not in result.message + + async def test_read_tail_line_truncation(read_file_tool: ReadFile, temp_work_dir: KaosPath): """Tail mode should correctly report truncated lines via was_truncated flag in deque.""" trunc_file = temp_work_dir / "tail_truncation.txt" From 50b3585b85b61d9f4327d95ecbd5b60604c32401 Mon Sep 17 00:00:00 2001 From: Kaiyi Date: Fri, 3 Apr 2026 16:06:45 +0800 Subject: [PATCH 4/4] docs: add changelog and update ReadFile tool documentation Add changelog entry for ReadFile totalLines and tail mode features. Update tool parameter docs in both English and Chinese to document negative line_offset and total line count behavior. --- CHANGELOG.md | 2 ++ docs/en/customization/agents.md | 4 ++-- docs/en/release-notes/changelog.md | 2 ++ docs/zh/customization/agents.md | 4 ++-- docs/zh/release-notes/changelog.md | 2 ++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50f93a4ea..4d1e07133 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Only write entries that are worth mentioning to users. ## Unreleased +- ReadFile: Add total line count to every read response and support negative `line_offset` for tail mode — the tool now reports `Total lines in file: N.` in its message so the model can plan subsequent reads; negative `line_offset` (e.g. `-100`) reads the last N lines using a sliding window, useful for viewing recent log output without shell commands; the absolute value is capped at 1000 (MAX_LINES) + ## 1.30.0 (2026-04-02) - Shell: Refine idle background completion auto-trigger — resumed shell sessions no longer auto-start a foreground turn from stale pending background notifications before the user sends a message, and fresh background completions now wait briefly while the user is actively typing to avoid stealing the prompt or breaking CJK IME composition diff --git a/docs/en/customization/agents.md b/docs/en/customization/agents.md index d20231796..26431883a 100644 --- a/docs/en/customization/agents.md +++ b/docs/en/customization/agents.md @@ -215,12 +215,12 @@ When `run_in_background=true`, the command is launched as a background task and ### `ReadFile` - **Path**: `kimi_cli.tools.file:ReadFile` -- **Description**: Read text file content. Max 1000 lines per read, max 2000 characters per line. Files outside working directory require absolute paths. +- **Description**: Read text file content. Max 1000 lines per read, max 2000 characters per line. Files outside working directory require absolute paths. Every read returns the total number of lines in the file. | Parameter | Type | Description | |-----------|------|-------------| | `path` | string | File path | -| `line_offset` | int | Starting line number, default 1 | +| `line_offset` | int | Starting line number, default 1. Supports negative values to read from the end of the file (e.g. `-100` reads the last 100 lines); absolute value cannot exceed 1000 | | `n_lines` | int | Number of lines to read, default/max 1000 | ### `ReadMediaFile` diff --git a/docs/en/release-notes/changelog.md b/docs/en/release-notes/changelog.md index 4b0e3a2be..3bda4bc87 100644 --- a/docs/en/release-notes/changelog.md +++ b/docs/en/release-notes/changelog.md @@ -4,6 +4,8 @@ This page documents the changes in each Kimi Code CLI release. ## Unreleased +- ReadFile: Add total line count to every read response and support negative `line_offset` for tail mode — the tool now reports `Total lines in file: N.` in its message so the model can plan subsequent reads; negative `line_offset` (e.g. `-100`) reads the last N lines using a sliding window, useful for viewing recent log output without shell commands; the absolute value is capped at 1000 (MAX_LINES) + ## 1.30.0 (2026-04-02) - Shell: Refine idle background completion auto-trigger — resumed shell sessions no longer auto-start a foreground turn from stale pending background notifications before the user sends a message, and fresh background completions now wait briefly while the user is actively typing to avoid stealing the prompt or breaking CJK IME composition diff --git a/docs/zh/customization/agents.md b/docs/zh/customization/agents.md index d237b09f5..19c924881 100644 --- a/docs/zh/customization/agents.md +++ b/docs/zh/customization/agents.md @@ -215,12 +215,12 @@ agent: ### `ReadFile` - **路径**:`kimi_cli.tools.file:ReadFile` -- **描述**:读取文本文件内容。单次最多读取 1000 行,每行最多 2000 字符。工作目录外的文件需使用绝对路径。 +- **描述**:读取文本文件内容。单次最多读取 1000 行,每行最多 2000 字符。工作目录外的文件需使用绝对路径。每次读取都会在消息中返回文件总行数。 | 参数 | 类型 | 说明 | |------|------|------| | `path` | string | 文件路径 | -| `line_offset` | int | 起始行号,默认 1 | +| `line_offset` | int | 起始行号,默认 1。支持负数表示从文件末尾读取(如 `-100` 读取最后 100 行),绝对值不超过 1000 | | `n_lines` | int | 读取行数,默认/最大 1000 | ### `ReadMediaFile` diff --git a/docs/zh/release-notes/changelog.md b/docs/zh/release-notes/changelog.md index e88ceba79..2082c4f0e 100644 --- a/docs/zh/release-notes/changelog.md +++ b/docs/zh/release-notes/changelog.md @@ -4,6 +4,8 @@ ## 未发布 +- ReadFile:每次读取返回文件总行数,并支持负数 `line_offset` 实现 tail 模式——工具现在会在消息中报告 `Total lines in file: N.`,方便模型规划后续读取;负数 `line_offset`(如 `-100`)通过滑动窗口读取文件末尾 N 行,适用于无需 Shell 命令即可查看最新日志输出的场景;绝对值上限为 1000(MAX_LINES) + ## 1.30.0 (2026-04-02) - Shell:细化空闲时后台完成的自动触发行为——恢复的 Shell 会话在用户发送消息前,不会因为历史遗留的后台通知而自动启动新的前景轮次;当用户正在输入时,新的后台完成事件也会短暂延后触发,避免抢占提示符或打断 CJK 输入法组合态