From 66f97cebabafe43a8bbecbe58b0e8ef5860399ce Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Thu, 26 Mar 2026 08:06:53 +0800 Subject: [PATCH 01/15] fix(shell): use git ls-files for @ file mention discovery (#1375) Replace os.walk with git ls-files as the primary file discovery mechanism for the @ file mention completer. This fixes large repos (e.g. apache/superset with 65k+ files) where the 1000-file limit caused late-alphabetical directories to be unreachable. Changes: - Add _get_git_paths(): runs `git ls-files --recurse-submodules` with 5s timeout, extracts directory entries from file paths - Add _walk_paths(): refactored os.walk fallback for non-git repos - Add scoped search: when fragment contains "/" (e.g. @src/utils/), both git and walk paths restrict to that subtree - Lazy git detection with _detect_git() (cached) - Scope-aware caching: cache invalidates when search scope changes Fixes #1375, helps #1276 --- src/kimi_cli/ui/shell/prompt.py | 105 +++++++++++++++++++++-- tests/ui_and_conv/test_file_completer.py | 28 ++++++ 2 files changed, 127 insertions(+), 6 deletions(-) diff --git a/src/kimi_cli/ui/shell/prompt.py b/src/kimi_cli/ui/shell/prompt.py index ede338e05..29bab53c5 100644 --- a/src/kimi_cli/ui/shell/prompt.py +++ b/src/kimi_cli/ui/shell/prompt.py @@ -688,6 +688,9 @@ class LocalFileMentionCompleter(Completer): re.IGNORECASE, ) + _GIT_LS_FILES_TIMEOUT = 5 + """Seconds to wait for ``git ls-files`` before falling back to ``os.walk``.""" + def __init__( self, root: Path, @@ -700,9 +703,11 @@ def __init__( self._limit = limit self._cache_time: float = 0.0 self._cached_paths: list[str] = [] + self._cache_scope: str | None = None self._top_cache_time: float = 0.0 self._top_cached_paths: list[str] = [] self._fragment_hint: str | None = None + self._is_git: bool | None = None # lazily detected self._word_completer = WordCompleter( self._get_paths, @@ -752,13 +757,103 @@ def _get_top_level_paths(self) -> list[str]: return self._top_cached_paths def _get_deep_paths(self) -> list[str]: + fragment = self._fragment_hint or "" + + # Determine scope: if fragment contains "/", restrict to that subtree. + scope: str | None = None + if "/" in fragment: + scope = fragment.rsplit("/", 1)[0] + now = time.monotonic() - if now - self._cache_time <= self._refresh_interval: + if ( + now - self._cache_time <= self._refresh_interval + and self._cache_scope == scope + ): return self._cached_paths + # Try git ls-files first (fast, respects .gitignore, no file-count limit). + paths = self._get_git_paths(scope) + if paths is None: + # Fallback to os.walk for non-git repos. + paths = self._walk_paths(scope) + + self._cached_paths = paths + self._cache_scope = scope + self._cache_time = now + return self._cached_paths + + # ------------------------------------------------------------------ + # git ls-files based discovery (preferred) + # ------------------------------------------------------------------ + + def _detect_git(self) -> bool: + if self._is_git is not None: + return self._is_git + try: + result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + cwd=self._root, + capture_output=True, + timeout=2, + ) + self._is_git = result.returncode == 0 + except Exception: + self._is_git = False + return self._is_git + + def _get_git_paths(self, scope: str | None) -> list[str] | None: + """Return workspace paths via ``git ls-files``, or *None* to fall back.""" + if not self._detect_git(): + return None + + try: + cmd = [ + "git", + "-c", "core.quotepath=false", + "ls-files", + "--recurse-submodules", + ] + if scope: + cmd.append(scope + "/") + result = subprocess.run( + cmd, + cwd=self._root, + capture_output=True, + text=True, + timeout=self._GIT_LS_FILES_TIMEOUT, + ) + if result.returncode != 0: + return None + except Exception: + return None + + paths: list[str] = [] + seen_dirs: set[str] = set() + for line in result.stdout.splitlines(): + line = line.strip() + if not line: + continue + # Add parent directories as navigable entries. + parts = line.split("/") + for i in range(1, len(parts)): + dir_path = "/".join(parts[:i]) + "/" + if dir_path not in seen_dirs: + seen_dirs.add(dir_path) + paths.append(dir_path) + paths.append(line) + + return paths + + # ------------------------------------------------------------------ + # os.walk based discovery (fallback for non-git repos) + # ------------------------------------------------------------------ + + def _walk_paths(self, scope: str | None) -> list[str]: + walk_root = self._root / scope if scope else self._root + paths: list[str] = [] try: - for current_root, dirs, files in os.walk(self._root): + for current_root, dirs, files in os.walk(walk_root): relative_root = Path(current_root).relative_to(self._root) # Prevent descending into ignored directories. @@ -788,11 +883,9 @@ def _get_deep_paths(self) -> list[str]: if len(paths) >= self._limit: break except OSError: - return self._cached_paths + pass - self._cached_paths = paths - self._cache_time = now - return self._cached_paths + return paths @staticmethod def _extract_fragment(text: str) -> str | None: diff --git a/tests/ui_and_conv/test_file_completer.py b/tests/ui_and_conv/test_file_completer.py index 08e5fc54e..f9557db60 100644 --- a/tests/ui_and_conv/test_file_completer.py +++ b/tests/ui_and_conv/test_file_completer.py @@ -91,6 +91,34 @@ def test_at_guard_prevents_email_like_fragments(tmp_path: Path): assert not texts +def test_scoped_walk_finds_late_alphabetical_dirs(tmp_path: Path): + """Directories that sort late alphabetically must still be reachable. + + Regression test for #1375: in large repos, ``os.walk`` exhausted the + 1000-file limit on early directories, making later ones (like ``src/``) + invisible. With scoped search (fragment contains ``/``), the walk starts + at the target subtree. + """ + # Create many early-alphabetical directories with files to exhaust a small limit. + for i in range(20): + d = tmp_path / f"aaa_{i:03d}" + d.mkdir() + for j in range(10): + (d / f"file_{j}.txt").write_text("") + + # The target directory sorts late. + target = tmp_path / "zzz_target" + target.mkdir() + (target / "important.py").write_text("# find me") + + # With a low limit, the old os.walk approach would never reach zzz_target. + completer = LocalFileMentionCompleter(tmp_path, limit=50) + + texts = _completion_texts(completer, "@zzz_target/") + + assert "zzz_target/important.py" in texts + + def test_basename_prefix_is_ranked_first(tmp_path: Path): """Prefer basename prefix matches over cross-segment fuzzy matches. From 1d087614c3e7cf1977f2aa78f86b96d8c6dd94df Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:36:45 +0800 Subject: [PATCH 02/15] style: ruff format prompt.py --- src/kimi_cli/ui/shell/prompt.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/kimi_cli/ui/shell/prompt.py b/src/kimi_cli/ui/shell/prompt.py index 29bab53c5..0979ed336 100644 --- a/src/kimi_cli/ui/shell/prompt.py +++ b/src/kimi_cli/ui/shell/prompt.py @@ -765,10 +765,7 @@ def _get_deep_paths(self) -> list[str]: scope = fragment.rsplit("/", 1)[0] now = time.monotonic() - if ( - now - self._cache_time <= self._refresh_interval - and self._cache_scope == scope - ): + if now - self._cache_time <= self._refresh_interval and self._cache_scope == scope: return self._cached_paths # Try git ls-files first (fast, respects .gitignore, no file-count limit). @@ -809,7 +806,8 @@ def _get_git_paths(self, scope: str | None) -> list[str] | None: try: cmd = [ "git", - "-c", "core.quotepath=false", + "-c", + "core.quotepath=false", "ls-files", "--recurse-submodules", ] From 1117293e7002dd496556736feaea7b5939d1f335 Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:46:13 +0800 Subject: [PATCH 03/15] refactor: unify web/shell file mention logic via shared file_filter module Extract ignore rules, git ls-files integration, and directory listing into kimi_cli/utils/file_filter.py. Both the shell completer and the web backend API now use the same filtering logic. Changes: - New kimi_cli/utils/file_filter.py: shared is_ignored(), detect_git(), list_files_git(), list_files_walk(), list_directory_filtered() - Shell prompt.py: delegate to file_filter instead of inline rules - Web sessions.py: use list_directory_filtered() which filters .git, node_modules, etc. (fixes #1339) - New e2e tests (4): @ trigger, scoped search, git ls-files deep files, gitignore filtering --- src/kimi_cli/ui/shell/prompt.py | 212 +++---------------------- src/kimi_cli/utils/file_filter.py | 247 +++++++++++++++++++++++++++++ src/kimi_cli/web/api/sessions.py | 27 ++-- tests/e2e/test_file_mention_e2e.py | 184 +++++++++++++++++++++ 4 files changed, 459 insertions(+), 211 deletions(-) create mode 100644 src/kimi_cli/utils/file_filter.py create mode 100644 tests/e2e/test_file_mention_e2e.py diff --git a/src/kimi_cli/ui/shell/prompt.py b/src/kimi_cli/ui/shell/prompt.py index 0979ed336..57f03cd66 100644 --- a/src/kimi_cli/ui/shell/prompt.py +++ b/src/kimi_cli/ui/shell/prompt.py @@ -611,85 +611,15 @@ def _render_selected_item_lines( class LocalFileMentionCompleter(Completer): - """Offer fuzzy `@` path completion by indexing workspace files.""" + """Offer fuzzy `@` path completion by indexing workspace files. + + File discovery and ignore rules are delegated to + :mod:`kimi_cli.utils.file_filter` so that the web backend can reuse + them. + """ _FRAGMENT_PATTERN = re.compile(r"[^\s@]+") _TRIGGER_GUARDS = frozenset((".", "-", "_", "`", "'", '"', ":", "@", "#", "~")) - _IGNORED_NAME_GROUPS: dict[str, tuple[str, ...]] = { - "vcs_metadata": (".DS_Store", ".bzr", ".git", ".hg", ".svn"), - "tooling_caches": ( - ".build", - ".cache", - ".coverage", - ".fleet", - ".gradle", - ".idea", - ".ipynb_checkpoints", - ".pnpm-store", - ".pytest_cache", - ".pub-cache", - ".ruff_cache", - ".swiftpm", - ".tox", - ".venv", - ".vs", - ".vscode", - ".yarn", - ".yarn-cache", - ), - "js_frontend": ( - ".next", - ".nuxt", - ".parcel-cache", - ".svelte-kit", - ".turbo", - ".vercel", - "node_modules", - ), - "python_packaging": ( - "__pycache__", - "build", - "coverage", - "dist", - "htmlcov", - "pip-wheel-metadata", - "venv", - ), - "java_jvm": (".mvn", "out", "target"), - "dotnet_native": ("bin", "cmake-build-debug", "cmake-build-release", "obj"), - "bazel_buck": ("bazel-bin", "bazel-out", "bazel-testlogs", "buck-out"), - "misc_artifacts": ( - ".dart_tool", - ".serverless", - ".stack-work", - ".terraform", - ".terragrunt-cache", - "DerivedData", - "Pods", - "deps", - "tmp", - "vendor", - ), - } - _IGNORED_NAMES = frozenset(name for group in _IGNORED_NAME_GROUPS.values() for name in group) - _IGNORED_PATTERN_PARTS: tuple[str, ...] = ( - r".*_cache$", - r".*-cache$", - r".*\.egg-info$", - r".*\.dist-info$", - r".*\.py[co]$", - r".*\.class$", - r".*\.sw[po]$", - r".*~$", - r".*\.(?:tmp|bak)$", - ) - _IGNORED_PATTERNS = re.compile( - "|".join(f"(?:{part})" for part in _IGNORED_PATTERN_PARTS), - re.IGNORECASE, - ) - - _GIT_LS_FILES_TIMEOUT = 5 - """Seconds to wait for ``git ls-files`` before falling back to ``os.walk``.""" def __init__( self, @@ -721,14 +651,6 @@ def __init__( pattern=r"^[^\s@]*", ) - @classmethod - def _is_ignored(cls, name: str) -> bool: - if not name: - return True - if name in cls._IGNORED_NAMES: - return True - return bool(cls._IGNORED_PATTERNS.fullmatch(name)) - def _get_paths(self) -> list[str]: fragment = self._fragment_hint or "" if "/" not in fragment and len(fragment) < 3: @@ -736,6 +658,8 @@ def _get_paths(self) -> list[str]: return self._get_deep_paths() def _get_top_level_paths(self) -> list[str]: + from kimi_cli.utils.file_filter import is_ignored + now = time.monotonic() if now - self._top_cache_time <= self._refresh_interval: return self._top_cached_paths @@ -744,7 +668,7 @@ def _get_top_level_paths(self) -> list[str]: try: for entry in sorted(self._root.iterdir(), key=lambda p: p.name): name = entry.name - if self._is_ignored(name): + if is_ignored(name): continue entries.append(f"{name}/" if entry.is_dir() else name) if len(entries) >= self._limit: @@ -757,6 +681,8 @@ def _get_top_level_paths(self) -> list[str]: return self._top_cached_paths def _get_deep_paths(self) -> list[str]: + from kimi_cli.utils.file_filter import detect_git, list_files_git, list_files_walk + fragment = self._fragment_hint or "" # Determine scope: if fragment contains "/", restrict to that subtree. @@ -769,122 +695,20 @@ def _get_deep_paths(self) -> list[str]: return self._cached_paths # Try git ls-files first (fast, respects .gitignore, no file-count limit). - paths = self._get_git_paths(scope) + if self._is_git is None: + self._is_git = detect_git(self._root) + + paths: list[str] | None = None + if self._is_git: + paths = list_files_git(self._root, scope) if paths is None: - # Fallback to os.walk for non-git repos. - paths = self._walk_paths(scope) + paths = list_files_walk(self._root, scope, limit=self._limit) self._cached_paths = paths self._cache_scope = scope self._cache_time = now return self._cached_paths - # ------------------------------------------------------------------ - # git ls-files based discovery (preferred) - # ------------------------------------------------------------------ - - def _detect_git(self) -> bool: - if self._is_git is not None: - return self._is_git - try: - result = subprocess.run( - ["git", "rev-parse", "--git-dir"], - cwd=self._root, - capture_output=True, - timeout=2, - ) - self._is_git = result.returncode == 0 - except Exception: - self._is_git = False - return self._is_git - - def _get_git_paths(self, scope: str | None) -> list[str] | None: - """Return workspace paths via ``git ls-files``, or *None* to fall back.""" - if not self._detect_git(): - return None - - try: - cmd = [ - "git", - "-c", - "core.quotepath=false", - "ls-files", - "--recurse-submodules", - ] - if scope: - cmd.append(scope + "/") - result = subprocess.run( - cmd, - cwd=self._root, - capture_output=True, - text=True, - timeout=self._GIT_LS_FILES_TIMEOUT, - ) - if result.returncode != 0: - return None - except Exception: - return None - - paths: list[str] = [] - seen_dirs: set[str] = set() - for line in result.stdout.splitlines(): - line = line.strip() - if not line: - continue - # Add parent directories as navigable entries. - parts = line.split("/") - for i in range(1, len(parts)): - dir_path = "/".join(parts[:i]) + "/" - if dir_path not in seen_dirs: - seen_dirs.add(dir_path) - paths.append(dir_path) - paths.append(line) - - return paths - - # ------------------------------------------------------------------ - # os.walk based discovery (fallback for non-git repos) - # ------------------------------------------------------------------ - - def _walk_paths(self, scope: str | None) -> list[str]: - walk_root = self._root / scope if scope else self._root - - paths: list[str] = [] - try: - for current_root, dirs, files in os.walk(walk_root): - relative_root = Path(current_root).relative_to(self._root) - - # Prevent descending into ignored directories. - dirs[:] = sorted(d for d in dirs if not self._is_ignored(d)) - - if relative_root.parts and any( - self._is_ignored(part) for part in relative_root.parts - ): - dirs[:] = [] - continue - - if relative_root.parts: - paths.append(relative_root.as_posix() + "/") - if len(paths) >= self._limit: - break - - for file_name in sorted(files): - if self._is_ignored(file_name): - continue - relative = (relative_root / file_name).as_posix() - if not relative: - continue - paths.append(relative) - if len(paths) >= self._limit: - break - - if len(paths) >= self._limit: - break - except OSError: - pass - - return paths - @staticmethod def _extract_fragment(text: str) -> str | None: index = text.rfind("@") diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py new file mode 100644 index 000000000..fa3869ee1 --- /dev/null +++ b/src/kimi_cli/utils/file_filter.py @@ -0,0 +1,247 @@ +"""Shared file-discovery utilities for ``@`` file mentions. + +Both the shell completer (``prompt.py``) and the web backend +(``web/api/sessions.py``) use these functions so ignore rules, git +integration, and limits are maintained in one place. +""" + +from __future__ import annotations + +import os +import re +import subprocess +from pathlib import Path + +# --------------------------------------------------------------------------- +# Ignore rules +# --------------------------------------------------------------------------- + +IGNORED_NAME_GROUPS: dict[str, tuple[str, ...]] = { + "vcs_metadata": (".DS_Store", ".bzr", ".git", ".hg", ".svn"), + "tooling_caches": ( + ".build", + ".cache", + ".coverage", + ".fleet", + ".gradle", + ".idea", + ".ipynb_checkpoints", + ".pnpm-store", + ".pytest_cache", + ".pub-cache", + ".ruff_cache", + ".swiftpm", + ".tox", + ".venv", + ".vs", + ".vscode", + ".yarn", + ".yarn-cache", + ), + "js_frontend": ( + ".next", + ".nuxt", + ".parcel-cache", + ".svelte-kit", + ".turbo", + ".vercel", + "node_modules", + ), + "python_packaging": ( + "__pycache__", + "build", + "coverage", + "dist", + "htmlcov", + "pip-wheel-metadata", + "venv", + ), + "java_jvm": (".mvn", "out", "target"), + "dotnet_native": ("bin", "cmake-build-debug", "cmake-build-release", "obj"), + "bazel_buck": ("bazel-bin", "bazel-out", "bazel-testlogs", "buck-out"), + "misc_artifacts": ( + ".dart_tool", + ".serverless", + ".stack-work", + ".terraform", + ".terragrunt-cache", + "DerivedData", + "Pods", + "deps", + "tmp", + "vendor", + ), +} + +IGNORED_NAMES: frozenset[str] = frozenset( + name for group in IGNORED_NAME_GROUPS.values() for name in group +) + +_IGNORED_PATTERN_PARTS: tuple[str, ...] = ( + r".*_cache$", + r".*-cache$", + r".*\.egg-info$", + r".*\.dist-info$", + r".*\.py[co]$", + r".*\.class$", + r".*\.sw[po]$", + r".*~$", + r".*\.(?:tmp|bak)$", +) + +IGNORED_PATTERNS: re.Pattern[str] = re.compile( + "|".join(f"(?:{part})" for part in _IGNORED_PATTERN_PARTS), + re.IGNORECASE, +) + + +def is_ignored(name: str) -> bool: + """Return *True* if *name* should be excluded from file mention results.""" + if not name: + return True + if name in IGNORED_NAMES: + return True + return bool(IGNORED_PATTERNS.fullmatch(name)) + + +# --------------------------------------------------------------------------- +# Git detection +# --------------------------------------------------------------------------- + +_GIT_LS_FILES_TIMEOUT = 5 + + +def detect_git(root: Path) -> bool: + """Return *True* if *root* is inside a git work tree.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + cwd=root, + capture_output=True, + timeout=2, + ) + return result.returncode == 0 + except Exception: + return False + + +# --------------------------------------------------------------------------- +# File listing +# --------------------------------------------------------------------------- + + +def list_files_git(root: Path, scope: str | None = None) -> list[str] | None: + """List workspace paths via ``git ls-files``, or *None* on failure. + + When *scope* is given (e.g. ``"src/utils"``), only files under that + subtree are returned. + """ + try: + cmd = [ + "git", + "-c", + "core.quotepath=false", + "ls-files", + "--recurse-submodules", + ] + if scope: + cmd.append(scope + "/") + result = subprocess.run( + cmd, + cwd=root, + capture_output=True, + text=True, + timeout=_GIT_LS_FILES_TIMEOUT, + ) + if result.returncode != 0: + return None + except Exception: + return None + + paths: list[str] = [] + seen_dirs: set[str] = set() + for line in result.stdout.splitlines(): + line = line.strip() + if not line: + continue + # Add parent directories as navigable entries. + parts = line.split("/") + for i in range(1, len(parts)): + dir_path = "/".join(parts[:i]) + "/" + if dir_path not in seen_dirs: + seen_dirs.add(dir_path) + paths.append(dir_path) + paths.append(line) + + return paths + + +def list_files_walk( + root: Path, + scope: str | None = None, + *, + limit: int = 1000, +) -> list[str]: + """List workspace paths via ``os.walk`` (fallback for non-git repos). + + When *scope* is given, the walk starts from that subdirectory. + """ + walk_root = root / scope if scope else root + + paths: list[str] = [] + try: + for current_root, dirs, files in os.walk(walk_root): + relative_root = Path(current_root).relative_to(root) + + dirs[:] = sorted(d for d in dirs if not is_ignored(d)) + + if relative_root.parts and any(is_ignored(part) for part in relative_root.parts): + dirs[:] = [] + continue + + if relative_root.parts: + paths.append(relative_root.as_posix() + "/") + if len(paths) >= limit: + break + + for file_name in sorted(files): + if is_ignored(file_name): + continue + relative = (relative_root / file_name).as_posix() + if not relative: + continue + paths.append(relative) + if len(paths) >= limit: + break + + if len(paths) >= limit: + break + except OSError: + pass + + return paths + + +def list_directory_filtered(directory: Path) -> list[dict[str, str | int]]: + """List immediate children of *directory*, filtering ignored entries. + + Returns dicts with ``name``, ``type`` (``"file"``/``"directory"``), and + optionally ``size``. Suitable for the web API response. + """ + result: list[dict[str, str | int]] = [] + try: + for subpath in directory.iterdir(): + if is_ignored(subpath.name): + continue + if subpath.is_dir(): + result.append({"name": subpath.name, "type": "directory"}) + else: + try: + size = subpath.stat().st_size + except OSError: + size = 0 + result.append({"name": subpath.name, "type": "file", "size": size}) + except OSError: + pass + result.sort(key=lambda x: (str(x["type"]), str(x["name"]))) + return result diff --git a/src/kimi_cli/web/api/sessions.py b/src/kimi_cli/web/api/sessions.py index 36cd42507..7ba0334d2 100644 --- a/src/kimi_cli/web/api/sessions.py +++ b/src/kimi_cli/web/api/sessions.py @@ -524,23 +524,16 @@ async def get_session_file( ) if file_path.is_dir(): - result: list[dict[str, str | int]] = [] - for subpath in file_path.iterdir(): - if restrict_sensitive_apis: - rel_subpath = rel_path / subpath.name - if _is_sensitive_relative_path(rel_subpath): - continue - if subpath.is_dir(): - result.append({"name": subpath.name, "type": "directory"}) - else: - result.append( - { - "name": subpath.name, - "type": "file", - "size": subpath.stat().st_size, - } - ) - result.sort(key=lambda x: (cast(str, x["type"]), cast(str, x["name"]))) + from kimi_cli.utils.file_filter import list_directory_filtered + + result = list_directory_filtered(file_path) + # Apply additional sensitive-path filtering when in public mode. + if restrict_sensitive_apis: + result = [ + entry + for entry in result + if not _is_sensitive_relative_path(rel_path / str(entry["name"])) + ] return Response(content=json.dumps(result), media_type="application/json") content = file_path.read_bytes() diff --git a/tests/e2e/test_file_mention_e2e.py b/tests/e2e/test_file_mention_e2e.py new file mode 100644 index 000000000..cae3e86e5 --- /dev/null +++ b/tests/e2e/test_file_mention_e2e.py @@ -0,0 +1,184 @@ +"""E2E tests for ``@`` file mention auto-completion. + +These tests verify that the file mention completer discovers files +correctly in a real PTY environment, including: +- Basic @ trigger and completion popup +- Scoped search with ``/`` prefix +- git ls-files integration (large repo simulation) +- Ignored directories (.git, node_modules) are filtered +""" + +from __future__ import annotations + +import subprocess +import sys +import time +from pathlib import Path + +import pytest + +from tests.e2e.shell_pty_helpers import ( + make_home_dir, + make_work_dir, + read_until_prompt_ready, + start_shell_pty, + write_scripted_config, +) + +pytestmark = pytest.mark.skipif( + sys.platform == "win32", + reason="Shell PTY E2E tests require a Unix-like PTY.", +) + + +def _init_git_repo(work_dir: Path) -> None: + """Initialise a git repo, stage all files, and commit.""" + subprocess.run(["git", "init"], cwd=work_dir, capture_output=True, check=True) + subprocess.run( + ["git", "config", "user.email", "test@test.com"], + cwd=work_dir, + capture_output=True, + check=True, + ) + subprocess.run( + ["git", "config", "user.name", "Test"], + cwd=work_dir, + capture_output=True, + check=True, + ) + subprocess.run(["git", "add", "-A"], cwd=work_dir, capture_output=True, check=True) + subprocess.run( + ["git", "commit", "-m", "init"], + cwd=work_dir, + capture_output=True, + check=True, + ) + + +def _setup_shell(tmp_path: Path, work_dir: Path): + """Start a kimi-cli shell in PTY with a scripted (no-op) model.""" + home_dir = make_home_dir(tmp_path) + config_path = write_scripted_config(tmp_path, scripts=["Hello!"]) + shell = start_shell_pty( + config_path=config_path, + work_dir=work_dir, + home_dir=home_dir, + yolo=False, + ) + # Wait for the welcome prompt + read_until_prompt_ready(shell, after=0, timeout=20.0) + return shell + + +def test_at_trigger_shows_top_level_entries(tmp_path: Path): + """Typing ``@`` shows top-level files/directories.""" + work_dir = make_work_dir(tmp_path) + (work_dir / "README.md").write_text("# Hello") + (work_dir / "src").mkdir() + (work_dir / "src" / "main.py").write_text("print('hi')") + # Ignored dir — should NOT appear + (work_dir / "node_modules").mkdir() + (work_dir / "node_modules" / "junk.js").write_text("") + + shell = _setup_shell(tmp_path, work_dir) + try: + mark = shell.mark() + shell.send_text("@") + time.sleep(1.0) + output = shell.wait_for_quiet(timeout=3.0, after=mark) + + # Should show real files + assert "README.md" in output or "src/" in output, ( + f"Expected top-level entries in output, got:\n{output}" + ) + # Should NOT show ignored dirs + assert "node_modules" not in output, f"node_modules should be filtered, got:\n{output}" + finally: + shell.send_key("escape") + shell.send_key("ctrl_c") + shell.close() + + +def test_at_scoped_search_with_slash(tmp_path: Path): + """Typing ``@src/`` shows files inside ``src/`` directory.""" + work_dir = make_work_dir(tmp_path) + src = work_dir / "src" + src.mkdir() + (src / "app.py").write_text("# app") + (src / "utils.py").write_text("# utils") + # Another top-level dir + (work_dir / "docs").mkdir() + (work_dir / "docs" / "readme.md").write_text("") + + shell = _setup_shell(tmp_path, work_dir) + try: + mark = shell.mark() + shell.send_text("@src/") + time.sleep(1.0) + output = shell.wait_for_quiet(timeout=3.0, after=mark) + + # Should show src/ contents + assert "app.py" in output or "utils.py" in output, ( + f"Expected src/ contents in output, got:\n{output}" + ) + finally: + shell.send_key("escape") + shell.send_key("ctrl_c") + shell.close() + + +def test_git_ls_files_finds_deep_files(tmp_path: Path): + """In a git repo, deep files are discoverable even with many early dirs.""" + work_dir = make_work_dir(tmp_path) + + # Create many early-alphabetical directories (would exhaust os.walk limit) + for i in range(30): + d = work_dir / f"aaa_{i:03d}" + d.mkdir() + for j in range(20): + (d / f"file_{j}.txt").write_text(f"content {i}/{j}") + + # The target — late alphabetically + target = work_dir / "zzz_target" + target.mkdir() + (target / "important.py").write_text("# find me") + + # Init git repo so git ls-files is used (files already created above). + _init_git_repo(work_dir) + + shell = _setup_shell(tmp_path, work_dir) + try: + mark = shell.mark() + shell.send_text("@zzz_target/") + time.sleep(1.5) + output = shell.wait_for_quiet(timeout=5.0, after=mark) + + assert "important.py" in output, f"Expected important.py via git ls-files, got:\n{output}" + finally: + shell.send_key("escape") + shell.send_key("ctrl_c") + shell.close() + + +def test_git_ignores_are_respected(tmp_path: Path): + """Files in .gitignore should not appear in @ completion.""" + work_dir = make_work_dir(tmp_path) + (work_dir / "visible.py").write_text("# visible") + (work_dir / "secret.log").write_text("secret stuff") + (work_dir / ".gitignore").write_text("*.log\n") + + _init_git_repo(work_dir) + + shell = _setup_shell(tmp_path, work_dir) + try: + mark = shell.mark() + shell.send_text("@sec") + time.sleep(1.5) + output = shell.wait_for_quiet(timeout=3.0, after=mark) + + # secret.log is gitignored — should NOT appear + assert "secret.log" not in output, f"secret.log should be gitignored, got:\n{output}" + finally: + shell.send_key("escape") + shell.send_key("ctrl_c") + shell.close() From 38324b1d0ec61649eea7ae11ed957183205287bd Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:25:29 +0800 Subject: [PATCH 04/15] refactor: align file_filter style with codebase conventions - Remove section separator banners and module docstring (match stdrc style) - Make all constants private (_IGNORED_NAMES, _IGNORED_PATTERNS) - Add untracked file discovery via git ls-files --others --exclude-standard - Add .git/index mtime-based cache invalidation (vs pure TTL) - Extract _parse_ls_files_output helper to reduce duplication - Add git_index_mtime() utility for cache staleness detection --- src/kimi_cli/ui/shell/prompt.py | 23 +++- src/kimi_cli/utils/file_filter.py | 199 +++++++++++++++++++----------- 2 files changed, 143 insertions(+), 79 deletions(-) diff --git a/src/kimi_cli/ui/shell/prompt.py b/src/kimi_cli/ui/shell/prompt.py index 57f03cd66..e1afb11d4 100644 --- a/src/kimi_cli/ui/shell/prompt.py +++ b/src/kimi_cli/ui/shell/prompt.py @@ -638,6 +638,7 @@ def __init__( self._top_cached_paths: list[str] = [] self._fragment_hint: str | None = None self._is_git: bool | None = None # lazily detected + self._git_index_mtime: float | None = None self._word_completer = WordCompleter( self._get_paths, @@ -681,26 +682,40 @@ def _get_top_level_paths(self) -> list[str]: return self._top_cached_paths def _get_deep_paths(self) -> list[str]: - from kimi_cli.utils.file_filter import detect_git, list_files_git, list_files_walk + from kimi_cli.utils.file_filter import ( + detect_git, + git_index_mtime, + list_files_git, + list_files_walk, + ) fragment = self._fragment_hint or "" - # Determine scope: if fragment contains "/", restrict to that subtree. scope: str | None = None if "/" in fragment: scope = fragment.rsplit("/", 1)[0] now = time.monotonic() - if now - self._cache_time <= self._refresh_interval and self._cache_scope == scope: + cache_valid = ( + now - self._cache_time <= self._refresh_interval and self._cache_scope == scope + ) + + # Invalidate on .git/index mtime change (like Claude Code). + if cache_valid and self._is_git: + mtime = git_index_mtime(self._root) + if mtime != self._git_index_mtime: + cache_valid = False + + if cache_valid: return self._cached_paths - # Try git ls-files first (fast, respects .gitignore, no file-count limit). if self._is_git is None: self._is_git = detect_git(self._root) paths: list[str] | None = None if self._is_git: paths = list_files_git(self._root, scope) + self._git_index_mtime = git_index_mtime(self._root) if paths is None: paths = list_files_walk(self._root, scope, limit=self._limit) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index fa3869ee1..c23bd901f 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -1,10 +1,3 @@ -"""Shared file-discovery utilities for ``@`` file mentions. - -Both the shell completer (``prompt.py``) and the web backend -(``web/api/sessions.py``) use these functions so ignore rules, git -integration, and limits are maintained in one place. -""" - from __future__ import annotations import os @@ -12,13 +5,15 @@ import subprocess from pathlib import Path -# --------------------------------------------------------------------------- -# Ignore rules -# --------------------------------------------------------------------------- - -IGNORED_NAME_GROUPS: dict[str, tuple[str, ...]] = { - "vcs_metadata": (".DS_Store", ".bzr", ".git", ".hg", ".svn"), - "tooling_caches": ( +_IGNORED_NAMES: frozenset[str] = frozenset( + ( + # vcs metadata + ".DS_Store", + ".bzr", + ".git", + ".hg", + ".svn", + # tooling caches ".build", ".cache", ".coverage", @@ -37,8 +32,7 @@ ".vscode", ".yarn", ".yarn-cache", - ), - "js_frontend": ( + # js / frontend ".next", ".nuxt", ".parcel-cache", @@ -46,8 +40,7 @@ ".turbo", ".vercel", "node_modules", - ), - "python_packaging": ( + # python packaging "__pycache__", "build", "coverage", @@ -55,11 +48,21 @@ "htmlcov", "pip-wheel-metadata", "venv", - ), - "java_jvm": (".mvn", "out", "target"), - "dotnet_native": ("bin", "cmake-build-debug", "cmake-build-release", "obj"), - "bazel_buck": ("bazel-bin", "bazel-out", "bazel-testlogs", "buck-out"), - "misc_artifacts": ( + # java / jvm + ".mvn", + "out", + "target", + # dotnet / native + "bin", + "cmake-build-debug", + "cmake-build-release", + "obj", + # bazel / buck + "bazel-bin", + "bazel-out", + "bazel-testlogs", + "buck-out", + # misc artifacts ".dart_tool", ".serverless", ".stack-work", @@ -70,45 +73,36 @@ "deps", "tmp", "vendor", - ), -} - -IGNORED_NAMES: frozenset[str] = frozenset( - name for group in IGNORED_NAME_GROUPS.values() for name in group + ) ) -_IGNORED_PATTERN_PARTS: tuple[str, ...] = ( - r".*_cache$", - r".*-cache$", - r".*\.egg-info$", - r".*\.dist-info$", - r".*\.py[co]$", - r".*\.class$", - r".*\.sw[po]$", - r".*~$", - r".*\.(?:tmp|bak)$", -) - -IGNORED_PATTERNS: re.Pattern[str] = re.compile( - "|".join(f"(?:{part})" for part in _IGNORED_PATTERN_PARTS), +_IGNORED_PATTERNS: re.Pattern[str] = re.compile( + r"|".join( + ( + r".*_cache$", + r".*-cache$", + r".*\.egg-info$", + r".*\.dist-info$", + r".*\.py[co]$", + r".*\.class$", + r".*\.sw[po]$", + r".*~$", + r".*\.(?:tmp|bak)$", + ) + ), re.IGNORECASE, ) +_GIT_LS_FILES_TIMEOUT = 5 + def is_ignored(name: str) -> bool: """Return *True* if *name* should be excluded from file mention results.""" if not name: return True - if name in IGNORED_NAMES: + if name in _IGNORED_NAMES: return True - return bool(IGNORED_PATTERNS.fullmatch(name)) - - -# --------------------------------------------------------------------------- -# Git detection -# --------------------------------------------------------------------------- - -_GIT_LS_FILES_TIMEOUT = 5 + return bool(_IGNORED_PATTERNS.fullmatch(name)) def detect_git(root: Path) -> bool: @@ -125,46 +119,35 @@ def detect_git(root: Path) -> bool: return False -# --------------------------------------------------------------------------- -# File listing -# --------------------------------------------------------------------------- - - -def list_files_git(root: Path, scope: str | None = None) -> list[str] | None: - """List workspace paths via ``git ls-files``, or *None* on failure. - - When *scope* is given (e.g. ``"src/utils"``), only files under that - subtree are returned. - """ +def git_index_mtime(root: Path) -> float | None: + """Return the mtime of ``.git/index``, or *None* if unavailable.""" try: - cmd = [ - "git", - "-c", - "core.quotepath=false", - "ls-files", - "--recurse-submodules", - ] - if scope: - cmd.append(scope + "/") result = subprocess.run( - cmd, + ["git", "rev-parse", "--git-dir"], cwd=root, capture_output=True, text=True, - timeout=_GIT_LS_FILES_TIMEOUT, + timeout=2, ) if result.returncode != 0: return None + git_dir = Path(result.stdout.strip()) + if not git_dir.is_absolute(): + git_dir = root / git_dir + index = git_dir / "index" + return index.stat().st_mtime except Exception: return None + +def _parse_ls_files_output(stdout: str) -> list[str]: + """Parse ``git ls-files`` output into paths with synthesised directory entries.""" paths: list[str] = [] seen_dirs: set[str] = set() - for line in result.stdout.splitlines(): + for line in stdout.splitlines(): line = line.strip() if not line: continue - # Add parent directories as navigable entries. parts = line.split("/") for i in range(1, len(parts)): dir_path = "/".join(parts[:i]) + "/" @@ -172,6 +155,72 @@ def list_files_git(root: Path, scope: str | None = None) -> list[str] | None: seen_dirs.add(dir_path) paths.append(dir_path) paths.append(line) + return paths + + +def list_files_git( + root: Path, + scope: str | None = None, + *, + include_untracked: bool = True, +) -> list[str] | None: + """List workspace paths via ``git ls-files``, or *None* on failure. + + When *scope* is given (e.g. ``"src/utils"``), only files under that + subtree are returned. When *include_untracked* is *True*, untracked + files (respecting ``.gitignore``) are appended via + ``--others --exclude-standard``. + """ + cmd = [ + "git", + "-c", + "core.quotepath=false", + "ls-files", + "--recurse-submodules", + ] + if scope: + cmd.append(scope + "/") + try: + result = subprocess.run( + cmd, + cwd=root, + capture_output=True, + text=True, + timeout=_GIT_LS_FILES_TIMEOUT, + ) + if result.returncode != 0: + return None + except Exception: + return None + + paths = _parse_ls_files_output(result.stdout) + + if include_untracked: + others_cmd = [ + "git", + "-c", + "core.quotepath=false", + "ls-files", + "--others", + "--exclude-standard", + ] + if scope: + others_cmd.append(scope + "/") + try: + others = subprocess.run( + others_cmd, + cwd=root, + capture_output=True, + text=True, + timeout=_GIT_LS_FILES_TIMEOUT, + ) + if others.returncode == 0: + tracked = set(paths) + for p in _parse_ls_files_output(others.stdout): + if p not in tracked: + paths.append(p) + except Exception: + pass return paths From 28a6a54a122a1b32b1f6f078c7192704e2ea234e Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:43:14 +0800 Subject: [PATCH 05/15] fix: prevent path traversal in file mention scope parameter Reject scope values containing '..' to prevent @../ from escaping the workspace root in both git ls-files and os.walk code paths. --- src/kimi_cli/utils/file_filter.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index c23bd901f..e283653be 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -171,6 +171,9 @@ def list_files_git( files (respecting ``.gitignore``) are appended via ``--others --exclude-standard``. """ + if scope and ".." in scope.split("/"): + return None + cmd = [ "git", "-c", @@ -235,12 +238,20 @@ def list_files_walk( When *scope* is given, the walk starts from that subdirectory. """ - walk_root = root / scope if scope else root + resolved_root = root.resolve() + walk_root = (root / scope).resolve() if scope else resolved_root + + # Prevent path traversal outside the workspace (e.g. scope="../"). + try: + if not walk_root.is_relative_to(resolved_root): + return [] + except (OSError, ValueError): + return [] paths: list[str] = [] try: for current_root, dirs, files in os.walk(walk_root): - relative_root = Path(current_root).relative_to(root) + relative_root = Path(current_root).resolve().relative_to(resolved_root) dirs[:] = sorted(d for d in dirs if not is_ignored(d)) From b7736ef5d350398338879ae7859d07fa1858dd32 Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 17:25:49 +0800 Subject: [PATCH 06/15] fix(web): restore unfiltered directory listing in file browser API The get_session_file endpoint is a general-purpose file browser, not an @ mention completer. Revert to listing all directory entries without is_ignored() filtering so web UI users can still see node_modules, build, dist, etc. when browsing their project. --- src/kimi_cli/web/api/sessions.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/kimi_cli/web/api/sessions.py b/src/kimi_cli/web/api/sessions.py index 7ba0334d2..7c791ff58 100644 --- a/src/kimi_cli/web/api/sessions.py +++ b/src/kimi_cli/web/api/sessions.py @@ -524,16 +524,21 @@ async def get_session_file( ) if file_path.is_dir(): - from kimi_cli.utils.file_filter import list_directory_filtered - - result = list_directory_filtered(file_path) - # Apply additional sensitive-path filtering when in public mode. - if restrict_sensitive_apis: - result = [ - entry - for entry in result - if not _is_sensitive_relative_path(rel_path / str(entry["name"])) - ] + result: list[dict[str, str | int]] = [] + for subpath in file_path.iterdir(): + if restrict_sensitive_apis: + rel_subpath = rel_path / subpath.name + if _is_sensitive_relative_path(rel_subpath): + continue + if subpath.is_dir(): + result.append({"name": subpath.name, "type": "directory"}) + else: + try: + size = subpath.stat().st_size + except OSError: + size = 0 + result.append({"name": subpath.name, "type": "file", "size": size}) + result.sort(key=lambda x: (cast(str, x["type"]), cast(str, x["name"]))) return Response(content=json.dumps(result), media_type="application/json") content = file_path.read_bytes() From 9c5e334b741d63350794381b8dea65fb32dcfabd Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 17:39:56 +0800 Subject: [PATCH 07/15] fix: preserve filename whitespace in git ls-files output parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unnecessary strip() from _parse_ls_files_output — splitlines() already handles line endings, and strip() would silently mangle filenames with leading/trailing spaces. --- src/kimi_cli/utils/file_filter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index e283653be..aa1ca8ab7 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -145,7 +145,6 @@ def _parse_ls_files_output(stdout: str) -> list[str]: paths: list[str] = [] seen_dirs: set[str] = set() for line in stdout.splitlines(): - line = line.strip() if not line: continue parts = line.split("/") From 88230bcb2d4f7a68ec0ed6c82073e5f5110a38d9 Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 19:31:47 +0800 Subject: [PATCH 08/15] fix: filter ignored dirs and stale paths from git ls-files results Two regressions fixed: 1. Tracked ignored directories (node_modules/, vendor/, etc.) were bypassing is_ignored() when discovered via git ls-files, making completion inconsistent with top-level and os.walk fallback paths. Now _parse_ls_files_output applies is_ignored() to each path segment. 2. Files renamed/deleted on disk but not staged (e.g. mv old.py new.py without git add) remained as stale candidates because git ls-files reads the index. Now git ls-files --deleted is used to exclude working-tree deletions. Add regression tests for both scenarios. --- src/kimi_cli/utils/file_filter.py | 52 ++++++++++++++++- tests/ui_and_conv/test_file_completer.py | 72 ++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index aa1ca8ab7..fb99f6e23 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -140,14 +140,36 @@ def git_index_mtime(root: Path) -> float | None: return None -def _parse_ls_files_output(stdout: str) -> list[str]: - """Parse ``git ls-files`` output into paths with synthesised directory entries.""" +def _parse_ls_files_output(stdout: str, *, filter_ignored: bool = True) -> list[str]: + """Parse ``git ls-files`` output into paths with synthesised directory entries. + + When *filter_ignored* is *True*, paths whose segments match + ``is_ignored()`` are excluded so that tracked ``node_modules/``, + ``vendor/``, etc. do not pollute completion candidates. + """ paths: list[str] = [] seen_dirs: set[str] = set() + ignored_prefixes: set[str] = set() for line in stdout.splitlines(): if not line: continue + parts = line.split("/") + + if filter_ignored: + skip = False + for i, part in enumerate(parts): + prefix = "/".join(parts[: i + 1]) + "/" + if prefix in ignored_prefixes: + skip = True + break + if is_ignored(part): + ignored_prefixes.add(prefix) + skip = True + break + if skip: + continue + for i in range(1, len(parts)): dir_path = "/".join(parts[:i]) + "/" if dir_path not in seen_dirs: @@ -157,6 +179,26 @@ def _parse_ls_files_output(stdout: str) -> list[str]: return paths +def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: + """Return the set of tracked files deleted from the working tree.""" + cmd = ["git", "-c", "core.quotepath=false", "ls-files", "--deleted"] + if scope: + cmd.append(scope + "/") + try: + result = subprocess.run( + cmd, + cwd=root, + capture_output=True, + text=True, + timeout=_GIT_LS_FILES_TIMEOUT, + ) + if result.returncode == 0: + return {line for line in result.stdout.splitlines() if line} + except Exception: + pass + return set() + + def list_files_git( root: Path, scope: str | None = None, @@ -169,6 +211,9 @@ def list_files_git( subtree are returned. When *include_untracked* is *True*, untracked files (respecting ``.gitignore``) are appended via ``--others --exclude-standard``. + + Deleted working-tree files (``git ls-files --deleted``) are excluded + so that renamed / removed files do not appear as stale candidates. """ if scope and ".." in scope.split("/"): return None @@ -195,7 +240,10 @@ def list_files_git( except Exception: return None + deleted = _git_deleted_files(root, scope) paths = _parse_ls_files_output(result.stdout) + if deleted: + paths = [p for p in paths if p.endswith("/") or p not in deleted] if include_untracked: others_cmd = [ diff --git a/tests/ui_and_conv/test_file_completer.py b/tests/ui_and_conv/test_file_completer.py index f9557db60..9df041ee4 100644 --- a/tests/ui_and_conv/test_file_completer.py +++ b/tests/ui_and_conv/test_file_completer.py @@ -2,6 +2,7 @@ from __future__ import annotations +import subprocess from pathlib import Path from inline_snapshot import snapshot @@ -145,3 +146,74 @@ def test_basename_prefix_is_ranked_first(tmp_path: Path): "src/kimi_cli/tools/file/patch.py", ] ) + + +def _init_git_repo(work_dir: Path) -> None: + """Initialise a git repo, stage all files, and commit.""" + for cmd in ( + ["git", "init"], + ["git", "config", "user.email", "test@test.com"], + ["git", "config", "user.name", "Test"], + ["git", "add", "-A"], + ["git", "commit", "-m", "init"], + ): + subprocess.run(cmd, cwd=work_dir, capture_output=True, check=True) + + +def test_tracked_ignored_dirs_filtered_in_git_mode(tmp_path: Path): + """Tracked ``node_modules/`` and ``vendor/`` must still be filtered. + + Regression test: ``git ls-files`` returns all tracked paths, so + directories in ``_IGNORED_NAMES`` were surfacing in completion when + they happened to be committed. + """ + (tmp_path / "src").mkdir() + (tmp_path / "src" / "app.py").write_text("# app") + nm = tmp_path / "node_modules" / "pkg" + nm.mkdir(parents=True) + (nm / "index.js").write_text("module.exports = {}") + vendor = tmp_path / "vendor" + vendor.mkdir() + (vendor / "dep.py").write_text("# dep") + + _init_git_repo(tmp_path) + + completer = LocalFileMentionCompleter(tmp_path) + + texts = _completion_texts(completer, "@nod") + assert not any("node_modules" in t for t in texts), ( + f"node_modules should be filtered even if tracked, got: {texts}" + ) + + texts = _completion_texts(completer, "@ven") + assert not any("vendor" in t for t in texts), ( + f"vendor should be filtered even if tracked, got: {texts}" + ) + + +def test_unstaged_rename_hides_deleted_path(tmp_path: Path): + """After ``mv old.py new.py`` without staging, old.py must not appear. + + Regression test: ``git ls-files`` reads the index, so a file that was + moved on disk (but not staged) would still show up as a stale + candidate. + """ + (tmp_path / "src").mkdir() + (tmp_path / "src" / "old.py").write_text("# original") + + _init_git_repo(tmp_path) + + # Rename without staging. + (tmp_path / "src" / "old.py").rename(tmp_path / "src" / "new.py") + + completer = LocalFileMentionCompleter(tmp_path) + + texts = _completion_texts(completer, "@old") + assert not any("old.py" in t for t in texts), ( + f"Deleted old.py should not appear in completion, got: {texts}" + ) + + texts = _completion_texts(completer, "@new") + assert any("new.py" in t for t in texts), ( + f"Renamed new.py should appear via --others, got: {texts}" + ) From 246052c3f64aa13f9926a3fabbd89022cd551917 Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 19:40:55 +0800 Subject: [PATCH 09/15] fix: prune empty directory entries after deleted-file filtering When all files under a directory are deleted from the working tree (but not staged), the synthesized directory entry was left as a stale candidate. Now directory entries are pruned if no surviving file children exist after deleted-file exclusion. --- src/kimi_cli/utils/file_filter.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index fb99f6e23..1349f1be6 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -272,6 +272,16 @@ def list_files_git( except Exception: pass + # Prune directory entries that have no surviving file children. + if deleted: + live_dirs: set[str] = set() + for p in paths: + if not p.endswith("/"): + parts = p.split("/") + for i in range(1, len(parts)): + live_dirs.add("/".join(parts[:i]) + "/") + paths = [p for p in paths if not p.endswith("/") or p in live_dirs] + return paths From 570f9259d83598cb5d7687568d6b70e3bf7336ac Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 19:45:28 +0800 Subject: [PATCH 10/15] test: add file_filter cross-validation tests (git vs walk parity) 43 tests covering: - Git vs walk parity on clean repos (flat, nested, scoped, gitignored) - Tracked ignored dir filtering (node_modules, vendor, __pycache__, etc.) - Deleted/renamed file exclusion and empty dir pruning - Untracked file discovery and gitignore respect - Path traversal prevention - is_ignored unit tests - Non-git fallback behaviour --- tests/utils/test_file_filter.py | 291 ++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 tests/utils/test_file_filter.py diff --git a/tests/utils/test_file_filter.py b/tests/utils/test_file_filter.py new file mode 100644 index 000000000..b2974c083 --- /dev/null +++ b/tests/utils/test_file_filter.py @@ -0,0 +1,291 @@ +"""Tests for file_filter: git vs walk cross-validation and edge cases.""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +import pytest + +from kimi_cli.utils.file_filter import ( + is_ignored, + list_files_git, + list_files_walk, +) + + +def _init_git(root: Path) -> None: + for cmd in ( + ["git", "init"], + ["git", "config", "user.email", "t@t.com"], + ["git", "config", "user.name", "T"], + ["git", "add", "-A"], + ["git", "commit", "-m", "init"], + ): + subprocess.run(cmd, cwd=root, capture_output=True, check=True) + + +# --------------------------------------------------------------------------- +# Cross-validation: git vs walk must agree on a clean working tree +# --------------------------------------------------------------------------- + + +class TestGitWalkParity: + """On a clean git repo the two backends must return the same path set.""" + + def test_flat_repo(self, tmp_path: Path) -> None: + (tmp_path / "README.md").write_text("hi") + (tmp_path / "main.py").write_text("print(1)") + _init_git(tmp_path) + + git = set(list_files_git(tmp_path) or []) + walk = set(list_files_walk(tmp_path)) + assert git == walk + + def test_nested_dirs(self, tmp_path: Path) -> None: + (tmp_path / "src" / "pkg").mkdir(parents=True) + (tmp_path / "src" / "pkg" / "mod.py").write_text("") + (tmp_path / "src" / "app.py").write_text("") + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "guide.md").write_text("") + _init_git(tmp_path) + + git = set(list_files_git(tmp_path) or []) + walk = set(list_files_walk(tmp_path)) + assert git == walk + + def test_with_gitignore(self, tmp_path: Path) -> None: + """Gitignored files excluded from both paths.""" + (tmp_path / "app.py").write_text("") + (tmp_path / "debug.log").write_text("log") + (tmp_path / ".gitignore").write_text("*.log\n") + _init_git(tmp_path) + + git = set(list_files_git(tmp_path) or []) + walk = set(list_files_walk(tmp_path)) + + assert "debug.log" not in git + # walk doesn't read .gitignore, so it may include debug.log. + # The key invariant: git is a subset of walk for non-gitignored files. + assert git <= walk | {"debug.log"} + + def test_scoped_search_parity(self, tmp_path: Path) -> None: + (tmp_path / "src" / "core").mkdir(parents=True) + (tmp_path / "src" / "core" / "engine.py").write_text("") + (tmp_path / "src" / "util.py").write_text("") + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "api.md").write_text("") + _init_git(tmp_path) + + git = set(list_files_git(tmp_path, "src") or []) + walk = set(list_files_walk(tmp_path, "src")) + assert git == walk + + # No docs contamination + assert not any("docs" in p for p in git) + + +# --------------------------------------------------------------------------- +# Ignored directory filtering (tracked content must still be hidden) +# --------------------------------------------------------------------------- + + +class TestIgnoredDirFiltering: + """Tracked ignored dirs must not leak into git results.""" + + @pytest.mark.parametrize( + "dirname", ["node_modules", "vendor", "__pycache__", ".vscode", "dist"] + ) + def test_tracked_ignored_dir_filtered(self, tmp_path: Path, dirname: str) -> None: + (tmp_path / "keep.py").write_text("") + d = tmp_path / dirname + d.mkdir() + (d / "stuff.js").write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + walk = list_files_walk(tmp_path) + + assert not any(dirname in p for p in git), f"{dirname} leaked via git" + assert not any(dirname in p for p in walk), f"{dirname} leaked via walk" + + def test_nested_ignored_dir(self, tmp_path: Path) -> None: + """Ignored dir deep inside tree must also be filtered.""" + (tmp_path / "src" / "lib" / "node_modules" / "pkg").mkdir(parents=True) + (tmp_path / "src" / "lib" / "node_modules" / "pkg" / "index.js").write_text("") + (tmp_path / "src" / "lib" / "real.py").write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + assert "src/lib/real.py" in git + assert not any("node_modules" in p for p in git) + + +# --------------------------------------------------------------------------- +# Deleted / renamed file handling +# --------------------------------------------------------------------------- + + +class TestDeletedFileHandling: + """Stale index entries must not appear in results.""" + + def test_deleted_file_excluded(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + (tmp_path / "b.py").write_text("") + _init_git(tmp_path) + + os.remove(tmp_path / "a.py") + + git = list_files_git(tmp_path) or [] + assert "a.py" not in git + assert "b.py" in git + + def test_renamed_file_old_excluded_new_included(self, tmp_path: Path) -> None: + (tmp_path / "old.py").write_text("# old") + _init_git(tmp_path) + + (tmp_path / "old.py").rename(tmp_path / "new.py") + + git = list_files_git(tmp_path) or [] + assert "old.py" not in git + assert "new.py" in git + + def test_empty_dir_pruned_after_delete(self, tmp_path: Path) -> None: + """Deleting the only file under a dir must also remove the dir entry.""" + (tmp_path / "solo").mkdir() + (tmp_path / "solo" / "only.py").write_text("") + (tmp_path / "keep.py").write_text("") + _init_git(tmp_path) + + os.remove(tmp_path / "solo" / "only.py") + os.rmdir(tmp_path / "solo") + + git = list_files_git(tmp_path) or [] + assert "solo/" not in git + assert "solo/only.py" not in git + assert "keep.py" in git + + def test_partial_delete_preserves_dir(self, tmp_path: Path) -> None: + """Deleting one of two files keeps the dir entry.""" + (tmp_path / "pkg").mkdir() + (tmp_path / "pkg" / "a.py").write_text("") + (tmp_path / "pkg" / "b.py").write_text("") + _init_git(tmp_path) + + os.remove(tmp_path / "pkg" / "a.py") + + git = list_files_git(tmp_path) or [] + assert "pkg/" in git + assert "pkg/a.py" not in git + assert "pkg/b.py" in git + + +# --------------------------------------------------------------------------- +# Untracked file discovery +# --------------------------------------------------------------------------- + + +class TestUntrackedFiles: + """New untracked files (respecting .gitignore) must be discovered.""" + + def test_untracked_file_included(self, tmp_path: Path) -> None: + (tmp_path / "tracked.py").write_text("") + _init_git(tmp_path) + + (tmp_path / "untracked.py").write_text("# new") + + git = list_files_git(tmp_path) or [] + assert "tracked.py" in git + assert "untracked.py" in git + + def test_gitignored_untracked_excluded(self, tmp_path: Path) -> None: + (tmp_path / "app.py").write_text("") + (tmp_path / ".gitignore").write_text("*.log\n") + _init_git(tmp_path) + + (tmp_path / "debug.log").write_text("noise") + + git = list_files_git(tmp_path) or [] + assert "debug.log" not in git + + def test_untracked_without_flag(self, tmp_path: Path) -> None: + (tmp_path / "tracked.py").write_text("") + _init_git(tmp_path) + (tmp_path / "untracked.py").write_text("") + + git = list_files_git(tmp_path, include_untracked=False) or [] + assert "tracked.py" in git + assert "untracked.py" not in git + + +# --------------------------------------------------------------------------- +# Path traversal prevention +# --------------------------------------------------------------------------- + + +class TestPathTraversal: + """Scope containing ``..`` must be rejected.""" + + def test_git_rejects_dotdot(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + _init_git(tmp_path) + assert list_files_git(tmp_path, "..") is None + + def test_walk_rejects_dotdot(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + assert list_files_walk(tmp_path, "..") == [] + + def test_nested_dotdot_rejected(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + _init_git(tmp_path) + assert list_files_git(tmp_path, "src/../../etc") is None + + +# --------------------------------------------------------------------------- +# is_ignored unit tests +# --------------------------------------------------------------------------- + + +class TestIsIgnored: + @pytest.mark.parametrize( + "name", + ["node_modules", "__pycache__", ".git", ".DS_Store", "vendor", "dist", ".vscode"], + ) + def test_ignored_names(self, name: str) -> None: + assert is_ignored(name) + + @pytest.mark.parametrize( + "name", + ["foo_cache", "bar-cache", "pkg.egg-info", "lib.dist-info", "mod.pyc", "A.class", "f.swp"], + ) + def test_ignored_patterns(self, name: str) -> None: + assert is_ignored(name) + + @pytest.mark.parametrize( + "name", + ["src", "main.py", "README.md", "package.json", ".gitignore", "Makefile"], + ) + def test_not_ignored(self, name: str) -> None: + assert not is_ignored(name) + + def test_empty_is_ignored(self) -> None: + assert is_ignored("") + + +# --------------------------------------------------------------------------- +# Fallback behaviour +# --------------------------------------------------------------------------- + + +class TestFallback: + """list_files_git returns None for non-git dirs; walk always works.""" + + def test_non_git_returns_none(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + assert list_files_git(tmp_path) is None + + def test_walk_works_without_git(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + result = list_files_walk(tmp_path) + assert "a.py" in result From 4f2b1f96ced34bf17081d75e276e95aa40d283e6 Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 20:58:46 +0800 Subject: [PATCH 11/15] fix: use git ls-files -z for NUL-delimited output parsing Filenames containing tab, quotes, or backslash were C-style quoted by git even with core.quotepath=false, producing escaped strings like "tab\there.py" that don't match real paths on disk. Switch all three git ls-files invocations to -z (NUL-delimited) mode and parse with split('\0') instead of splitlines(). Add regression tests for tab, quote, and backslash in filenames. --- src/kimi_cli/utils/file_filter.py | 16 +++++++++------- tests/utils/test_file_filter.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index 1349f1be6..29ce540a6 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -141,7 +141,7 @@ def git_index_mtime(root: Path) -> float | None: def _parse_ls_files_output(stdout: str, *, filter_ignored: bool = True) -> list[str]: - """Parse ``git ls-files`` output into paths with synthesised directory entries. + """Parse NUL-delimited ``git ls-files -z`` output into paths with synthesised dirs. When *filter_ignored* is *True*, paths whose segments match ``is_ignored()`` are excluded so that tracked ``node_modules/``, @@ -150,11 +150,11 @@ def _parse_ls_files_output(stdout: str, *, filter_ignored: bool = True) -> list[ paths: list[str] = [] seen_dirs: set[str] = set() ignored_prefixes: set[str] = set() - for line in stdout.splitlines(): - if not line: + for entry in stdout.split("\0"): + if not entry: continue - parts = line.split("/") + parts = entry.split("/") if filter_ignored: skip = False @@ -175,13 +175,13 @@ def _parse_ls_files_output(stdout: str, *, filter_ignored: bool = True) -> list[ if dir_path not in seen_dirs: seen_dirs.add(dir_path) paths.append(dir_path) - paths.append(line) + paths.append(entry) return paths def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: """Return the set of tracked files deleted from the working tree.""" - cmd = ["git", "-c", "core.quotepath=false", "ls-files", "--deleted"] + cmd = ["git", "-c", "core.quotepath=false", "ls-files", "-z", "--deleted"] if scope: cmd.append(scope + "/") try: @@ -193,7 +193,7 @@ def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: timeout=_GIT_LS_FILES_TIMEOUT, ) if result.returncode == 0: - return {line for line in result.stdout.splitlines() if line} + return {e for e in result.stdout.split("\0") if e} except Exception: pass return set() @@ -223,6 +223,7 @@ def list_files_git( "-c", "core.quotepath=false", "ls-files", + "-z", "--recurse-submodules", ] if scope: @@ -251,6 +252,7 @@ def list_files_git( "-c", "core.quotepath=false", "ls-files", + "-z", "--others", "--exclude-standard", ] diff --git a/tests/utils/test_file_filter.py b/tests/utils/test_file_filter.py index b2974c083..0ca8dd517 100644 --- a/tests/utils/test_file_filter.py +++ b/tests/utils/test_file_filter.py @@ -224,6 +224,34 @@ def test_untracked_without_flag(self, tmp_path: Path) -> None: # --------------------------------------------------------------------------- +class TestSpecialCharFilenames: + """Filenames with tab, quotes, or backslash must be handled correctly.""" + + def test_tab_in_filename(self, tmp_path: Path) -> None: + p = tmp_path / "tab\there.py" + p.write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + assert "tab\there.py" in git + + def test_quote_in_filename(self, tmp_path: Path) -> None: + p = tmp_path / 'quote"name.py' + p.write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + assert 'quote"name.py' in git + + def test_backslash_in_filename(self, tmp_path: Path) -> None: + p = tmp_path / "back\\slash.py" + p.write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + assert "back\\slash.py" in git + + class TestPathTraversal: """Scope containing ``..`` must be rejected.""" From 74165219e4d7ae2aa7736e4972eff4de5377af96 Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 22:07:01 +0800 Subject: [PATCH 12/15] fix: add -- separator before scope path in git ls-files commands Directory names starting with `-` (e.g. `-docs/`) are misinterpreted as git options without `--`. Extract `_scope_args()` helper to ensure all three git ls-files invocations use the end-of-options marker. --- src/kimi_cli/utils/file_filter.py | 15 +++++------ tests/utils/test_file_filter.py | 41 +++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index 29ce540a6..64ac12772 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -96,6 +96,11 @@ _GIT_LS_FILES_TIMEOUT = 5 +def _scope_args(scope: str | None) -> list[str]: + """Return ``["--", "/"]`` if *scope* is given, else ``[]``.""" + return ["--", scope + "/"] if scope else [] + + def is_ignored(name: str) -> bool: """Return *True* if *name* should be excluded from file mention results.""" if not name: @@ -181,9 +186,7 @@ def _parse_ls_files_output(stdout: str, *, filter_ignored: bool = True) -> list[ def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: """Return the set of tracked files deleted from the working tree.""" - cmd = ["git", "-c", "core.quotepath=false", "ls-files", "-z", "--deleted"] - if scope: - cmd.append(scope + "/") + cmd = ["git", "-c", "core.quotepath=false", "ls-files", "-z", "--deleted", *_scope_args(scope)] try: result = subprocess.run( cmd, @@ -225,9 +228,8 @@ def list_files_git( "ls-files", "-z", "--recurse-submodules", + *_scope_args(scope), ] - if scope: - cmd.append(scope + "/") try: result = subprocess.run( cmd, @@ -255,9 +257,8 @@ def list_files_git( "-z", "--others", "--exclude-standard", + *_scope_args(scope), ] - if scope: - others_cmd.append(scope + "/") try: others = subprocess.run( others_cmd, diff --git a/tests/utils/test_file_filter.py b/tests/utils/test_file_filter.py index 0ca8dd517..a5fd2fbc8 100644 --- a/tests/utils/test_file_filter.py +++ b/tests/utils/test_file_filter.py @@ -270,6 +270,47 @@ def test_nested_dotdot_rejected(self, tmp_path: Path) -> None: assert list_files_git(tmp_path, "src/../../etc") is None +# --------------------------------------------------------------------------- +# Dash-prefixed directory names (must not be parsed as git options) +# --------------------------------------------------------------------------- + + +class TestDashPrefixScope: + """Directory names starting with ``-`` must not be misinterpreted as git options.""" + + def test_git_scoped_dash_prefix(self, tmp_path: Path) -> None: + d = tmp_path / "-docs" + d.mkdir() + (d / "guide.md").write_text("# guide") + _init_git(tmp_path) + + result = list_files_git(tmp_path, "-docs") + assert result is not None + assert "-docs/guide.md" in result + + def test_git_deleted_with_dash_prefix(self, tmp_path: Path) -> None: + d = tmp_path / "-data" + d.mkdir() + (d / "old.csv").write_text("a,b") + _init_git(tmp_path) + (d / "old.csv").unlink() + + result = list_files_git(tmp_path, "-data") + assert result is not None + assert not any("old.csv" in p for p in result) + + def test_git_untracked_with_dash_prefix(self, tmp_path: Path) -> None: + d = tmp_path / "-src" + d.mkdir() + (d / "tracked.py").write_text("# tracked") + _init_git(tmp_path) + (d / "new.py").write_text("# new") + + result = list_files_git(tmp_path, "-src") + assert result is not None + assert "-src/new.py" in result + + # --------------------------------------------------------------------------- # is_ignored unit tests # --------------------------------------------------------------------------- From 89450c48f2b08152254b42b88889ae17e183bfbf Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 22:11:30 +0800 Subject: [PATCH 13/15] fix: handle submodule dirty state in deleted/untracked file discovery git ls-files --deleted and --others do not support --recurse-submodules (fatal: unsupported mode). Enumerate submodules via `git submodule status` and run per-submodule queries, prefixing results back to the root repo namespace. Also extract _git_ls_files_z() helper to reduce subprocess boilerplate. --- src/kimi_cli/utils/file_filter.py | 84 ++++++++++++++++++++----------- tests/utils/test_file_filter.py | 78 ++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 29 deletions(-) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index 64ac12772..f3221849e 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -184,17 +184,36 @@ def _parse_ls_files_output(stdout: str, *, filter_ignored: bool = True) -> list[ return paths -def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: - """Return the set of tracked files deleted from the working tree.""" - cmd = ["git", "-c", "core.quotepath=false", "ls-files", "-z", "--deleted", *_scope_args(scope)] +def _git_submodule_paths(root: Path) -> list[str]: + """Return registered submodule paths, or ``[]`` on failure.""" try: result = subprocess.run( - cmd, + ["git", "submodule", "status"], cwd=root, capture_output=True, text=True, timeout=_GIT_LS_FILES_TIMEOUT, ) + if result.returncode != 0: + return [] + paths: list[str] = [] + for line in result.stdout.splitlines(): + # Format: " ()" or "+ ()" + parts = line.strip().split() + if len(parts) >= 2: + paths.append(parts[1]) + return paths + except Exception: + return [] + + +def _git_ls_files_z(root: Path, *extra: str) -> set[str]: + """Run ``git ls-files -z`` with *extra* flags and return the path set.""" + cmd = ["git", "-c", "core.quotepath=false", "ls-files", "-z", *extra] + try: + result = subprocess.run( + cmd, cwd=root, capture_output=True, text=True, timeout=_GIT_LS_FILES_TIMEOUT, + ) if result.returncode == 0: return {e for e in result.stdout.split("\0") if e} except Exception: @@ -202,6 +221,24 @@ def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: return set() +def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: + """Return the set of tracked files deleted from the working tree. + + Includes deletions inside submodules, since ``--deleted`` does not + support ``--recurse-submodules``. + """ + deleted = _git_ls_files_z(root, "--deleted", *_scope_args(scope)) + for sub in _git_submodule_paths(root): + if scope and not sub.startswith(scope + "/") and scope != sub: + continue + sub_root = root / sub + if not sub_root.is_dir(): + continue + for entry in _git_ls_files_z(sub_root, "--deleted"): + deleted.add(f"{sub}/{entry}") + return deleted + + def list_files_git( root: Path, scope: str | None = None, @@ -249,31 +286,20 @@ def list_files_git( paths = [p for p in paths if p.endswith("/") or p not in deleted] if include_untracked: - others_cmd = [ - "git", - "-c", - "core.quotepath=false", - "ls-files", - "-z", - "--others", - "--exclude-standard", - *_scope_args(scope), - ] - try: - others = subprocess.run( - others_cmd, - cwd=root, - capture_output=True, - text=True, - timeout=_GIT_LS_FILES_TIMEOUT, - ) - if others.returncode == 0: - tracked = set(paths) - for p in _parse_ls_files_output(others.stdout): - if p not in tracked: - paths.append(p) - except Exception: - pass + untracked = _git_ls_files_z(root, "--others", "--exclude-standard", *_scope_args(scope)) + for sub in _git_submodule_paths(root): + if scope and not sub.startswith(scope + "/") and scope != sub: + continue + sub_root = root / sub + if not sub_root.is_dir(): + continue + for entry in _git_ls_files_z(sub_root, "--others", "--exclude-standard"): + untracked.add(f"{sub}/{entry}") + if untracked: + tracked = set(paths) + for p in _parse_ls_files_output("\0".join(sorted(untracked))): + if p not in tracked: + paths.append(p) # Prune directory entries that have no surviving file children. if deleted: diff --git a/tests/utils/test_file_filter.py b/tests/utils/test_file_filter.py index a5fd2fbc8..6b5621a80 100644 --- a/tests/utils/test_file_filter.py +++ b/tests/utils/test_file_filter.py @@ -311,6 +311,84 @@ def test_git_untracked_with_dash_prefix(self, tmp_path: Path) -> None: assert "-src/new.py" in result +# --------------------------------------------------------------------------- +# Submodule dirty state (--deleted/--others don't recurse into submodules) +# --------------------------------------------------------------------------- + + +def _init_git_with_submodule(tmp_path: Path, sub_name: str = "sub") -> tuple[Path, Path]: + """Create a repo with a submodule containing one tracked file. + + Returns ``(root, sub_path)`` where *root* is the main repo and + *sub_path* is the submodule working tree. The submodule origin is + placed outside *root* to avoid it being picked up by ``git add``. + """ + sub_origin = tmp_path / "_sub_origin" + sub_origin.mkdir() + (sub_origin / "pkg").mkdir() + (sub_origin / "pkg" / "a.py").write_text("# a") + for cmd in ( + ["git", "init"], + ["git", "config", "user.email", "t@t.com"], + ["git", "config", "user.name", "T"], + ["git", "add", "-A"], + ["git", "commit", "-m", "init"], + ): + subprocess.run(cmd, cwd=sub_origin, capture_output=True, check=True) + + root = tmp_path / "main_repo" + root.mkdir() + (root / "top.py").write_text("# top") + for cmd in ( + ["git", "init"], + ["git", "config", "user.email", "t@t.com"], + ["git", "config", "user.name", "T"], + ["git", "config", "protocol.file.allow", "always"], + ["git", "-c", "protocol.file.allow=always", "submodule", "add", str(sub_origin), sub_name], + ["git", "add", "-A"], + ["git", "commit", "-m", "init"], + ): + subprocess.run(cmd, cwd=root, capture_output=True, check=True) + return root, root / sub_name + + +class TestSubmoduleDirtyState: + """Deleted/new files in submodules must be handled correctly.""" + + def test_deleted_in_submodule_excluded(self, tmp_path: Path) -> None: + root, sub = _init_git_with_submodule(tmp_path) + (sub / "pkg" / "a.py").unlink() + + result = list_files_git(root) + assert result is not None + assert not any("a.py" in p for p in result), ( + f"Deleted sub/pkg/a.py should not appear, got: {result}" + ) + + def test_untracked_in_submodule_included(self, tmp_path: Path) -> None: + root, sub = _init_git_with_submodule(tmp_path) + (sub / "pkg" / "b.py").write_text("# new") + + result = list_files_git(root) + assert result is not None + assert "sub/pkg/b.py" in result, ( + f"Untracked sub/pkg/b.py should appear, got: {result}" + ) + + def test_renamed_in_submodule(self, tmp_path: Path) -> None: + root, sub = _init_git_with_submodule(tmp_path) + (sub / "pkg" / "a.py").rename(sub / "pkg" / "renamed.py") + + result = list_files_git(root) + assert result is not None + assert not any("a.py" in p for p in result), ( + f"Old sub/pkg/a.py should not appear, got: {result}" + ) + assert "sub/pkg/renamed.py" in result, ( + f"Renamed sub/pkg/renamed.py should appear, got: {result}" + ) + + # --------------------------------------------------------------------------- # is_ignored unit tests # --------------------------------------------------------------------------- From a57585589cfb2dc71da1b50051c4bd3e79d0820f Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Fri, 27 Mar 2026 22:25:20 +0800 Subject: [PATCH 14/15] Revert "fix: handle submodule dirty state in deleted/untracked file discovery" This reverts commit f7d394cfe8c036d2914a2e2ec43019bb6f12f27d. --- src/kimi_cli/utils/file_filter.py | 84 +++++++++++-------------------- tests/utils/test_file_filter.py | 78 ---------------------------- 2 files changed, 29 insertions(+), 133 deletions(-) diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py index f3221849e..64ac12772 100644 --- a/src/kimi_cli/utils/file_filter.py +++ b/src/kimi_cli/utils/file_filter.py @@ -184,36 +184,17 @@ def _parse_ls_files_output(stdout: str, *, filter_ignored: bool = True) -> list[ return paths -def _git_submodule_paths(root: Path) -> list[str]: - """Return registered submodule paths, or ``[]`` on failure.""" +def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: + """Return the set of tracked files deleted from the working tree.""" + cmd = ["git", "-c", "core.quotepath=false", "ls-files", "-z", "--deleted", *_scope_args(scope)] try: result = subprocess.run( - ["git", "submodule", "status"], + cmd, cwd=root, capture_output=True, text=True, timeout=_GIT_LS_FILES_TIMEOUT, ) - if result.returncode != 0: - return [] - paths: list[str] = [] - for line in result.stdout.splitlines(): - # Format: " ()" or "+ ()" - parts = line.strip().split() - if len(parts) >= 2: - paths.append(parts[1]) - return paths - except Exception: - return [] - - -def _git_ls_files_z(root: Path, *extra: str) -> set[str]: - """Run ``git ls-files -z`` with *extra* flags and return the path set.""" - cmd = ["git", "-c", "core.quotepath=false", "ls-files", "-z", *extra] - try: - result = subprocess.run( - cmd, cwd=root, capture_output=True, text=True, timeout=_GIT_LS_FILES_TIMEOUT, - ) if result.returncode == 0: return {e for e in result.stdout.split("\0") if e} except Exception: @@ -221,24 +202,6 @@ def _git_ls_files_z(root: Path, *extra: str) -> set[str]: return set() -def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: - """Return the set of tracked files deleted from the working tree. - - Includes deletions inside submodules, since ``--deleted`` does not - support ``--recurse-submodules``. - """ - deleted = _git_ls_files_z(root, "--deleted", *_scope_args(scope)) - for sub in _git_submodule_paths(root): - if scope and not sub.startswith(scope + "/") and scope != sub: - continue - sub_root = root / sub - if not sub_root.is_dir(): - continue - for entry in _git_ls_files_z(sub_root, "--deleted"): - deleted.add(f"{sub}/{entry}") - return deleted - - def list_files_git( root: Path, scope: str | None = None, @@ -286,20 +249,31 @@ def list_files_git( paths = [p for p in paths if p.endswith("/") or p not in deleted] if include_untracked: - untracked = _git_ls_files_z(root, "--others", "--exclude-standard", *_scope_args(scope)) - for sub in _git_submodule_paths(root): - if scope and not sub.startswith(scope + "/") and scope != sub: - continue - sub_root = root / sub - if not sub_root.is_dir(): - continue - for entry in _git_ls_files_z(sub_root, "--others", "--exclude-standard"): - untracked.add(f"{sub}/{entry}") - if untracked: - tracked = set(paths) - for p in _parse_ls_files_output("\0".join(sorted(untracked))): - if p not in tracked: - paths.append(p) + others_cmd = [ + "git", + "-c", + "core.quotepath=false", + "ls-files", + "-z", + "--others", + "--exclude-standard", + *_scope_args(scope), + ] + try: + others = subprocess.run( + others_cmd, + cwd=root, + capture_output=True, + text=True, + timeout=_GIT_LS_FILES_TIMEOUT, + ) + if others.returncode == 0: + tracked = set(paths) + for p in _parse_ls_files_output(others.stdout): + if p not in tracked: + paths.append(p) + except Exception: + pass # Prune directory entries that have no surviving file children. if deleted: diff --git a/tests/utils/test_file_filter.py b/tests/utils/test_file_filter.py index 6b5621a80..a5fd2fbc8 100644 --- a/tests/utils/test_file_filter.py +++ b/tests/utils/test_file_filter.py @@ -311,84 +311,6 @@ def test_git_untracked_with_dash_prefix(self, tmp_path: Path) -> None: assert "-src/new.py" in result -# --------------------------------------------------------------------------- -# Submodule dirty state (--deleted/--others don't recurse into submodules) -# --------------------------------------------------------------------------- - - -def _init_git_with_submodule(tmp_path: Path, sub_name: str = "sub") -> tuple[Path, Path]: - """Create a repo with a submodule containing one tracked file. - - Returns ``(root, sub_path)`` where *root* is the main repo and - *sub_path* is the submodule working tree. The submodule origin is - placed outside *root* to avoid it being picked up by ``git add``. - """ - sub_origin = tmp_path / "_sub_origin" - sub_origin.mkdir() - (sub_origin / "pkg").mkdir() - (sub_origin / "pkg" / "a.py").write_text("# a") - for cmd in ( - ["git", "init"], - ["git", "config", "user.email", "t@t.com"], - ["git", "config", "user.name", "T"], - ["git", "add", "-A"], - ["git", "commit", "-m", "init"], - ): - subprocess.run(cmd, cwd=sub_origin, capture_output=True, check=True) - - root = tmp_path / "main_repo" - root.mkdir() - (root / "top.py").write_text("# top") - for cmd in ( - ["git", "init"], - ["git", "config", "user.email", "t@t.com"], - ["git", "config", "user.name", "T"], - ["git", "config", "protocol.file.allow", "always"], - ["git", "-c", "protocol.file.allow=always", "submodule", "add", str(sub_origin), sub_name], - ["git", "add", "-A"], - ["git", "commit", "-m", "init"], - ): - subprocess.run(cmd, cwd=root, capture_output=True, check=True) - return root, root / sub_name - - -class TestSubmoduleDirtyState: - """Deleted/new files in submodules must be handled correctly.""" - - def test_deleted_in_submodule_excluded(self, tmp_path: Path) -> None: - root, sub = _init_git_with_submodule(tmp_path) - (sub / "pkg" / "a.py").unlink() - - result = list_files_git(root) - assert result is not None - assert not any("a.py" in p for p in result), ( - f"Deleted sub/pkg/a.py should not appear, got: {result}" - ) - - def test_untracked_in_submodule_included(self, tmp_path: Path) -> None: - root, sub = _init_git_with_submodule(tmp_path) - (sub / "pkg" / "b.py").write_text("# new") - - result = list_files_git(root) - assert result is not None - assert "sub/pkg/b.py" in result, ( - f"Untracked sub/pkg/b.py should appear, got: {result}" - ) - - def test_renamed_in_submodule(self, tmp_path: Path) -> None: - root, sub = _init_git_with_submodule(tmp_path) - (sub / "pkg" / "a.py").rename(sub / "pkg" / "renamed.py") - - result = list_files_git(root) - assert result is not None - assert not any("a.py" in p for p in result), ( - f"Old sub/pkg/a.py should not appear, got: {result}" - ) - assert "sub/pkg/renamed.py" in result, ( - f"Renamed sub/pkg/renamed.py should appear, got: {result}" - ) - - # --------------------------------------------------------------------------- # is_ignored unit tests # --------------------------------------------------------------------------- From aaaa0cab80a8d78d068f498273c2d49bcd06d2f1 Mon Sep 17 00:00:00 2001 From: n-WN <30841158+n-WN@users.noreply.github.com> Date: Sat, 28 Mar 2026 01:24:07 +0800 Subject: [PATCH 15/15] docs: regenerate changelog for file mention and web panel changes --- CHANGELOG.md | 4 ++++ docs/en/release-notes/changelog.md | 4 ++++ docs/zh/release-notes/changelog.md | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3da592f6..a99927ba0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ Only write entries that are worth mentioning to users. ## Unreleased +- Shell: Improve @ file mention discovery with git integration — the shell now uses `git ls-files` as the primary file discovery mechanism, fixing large repositories (e.g., 65k+ files) where the previous 1000-file limit caused late-alphabetical directories to be unreachable; supports scoped search (e.g., `@src/utils/`) for both git and non-git repositories +- Shell: Prevent path traversal in file mention scope parameter — scope values containing `..` are now rejected to prevent `@../` from escaping the workspace root +- Web: Restore unfiltered directory listing in file browser API — the web file browser now shows all directory entries including `node_modules`, `build`, `dist`, etc. + ## 1.27.0 (2026-03-28) - Shell: Add `/feedback` command — submit feedback directly from the CLI session; the command falls back to opening GitHub Issues on network errors or timeouts diff --git a/docs/en/release-notes/changelog.md b/docs/en/release-notes/changelog.md index b71cac5a8..7863b1f6e 100644 --- a/docs/en/release-notes/changelog.md +++ b/docs/en/release-notes/changelog.md @@ -4,6 +4,10 @@ This page documents the changes in each Kimi Code CLI release. ## Unreleased +- Shell: Improve @ file mention discovery with git integration — the shell now uses `git ls-files` as the primary file discovery mechanism, fixing large repositories (e.g., 65k+ files) where the previous 1000-file limit caused late-alphabetical directories to be unreachable; supports scoped search (e.g., `@src/utils/`) for both git and non-git repositories +- Shell: Prevent path traversal in file mention scope parameter — scope values containing `..` are now rejected to prevent `@../` from escaping the workspace root +- Web: Restore unfiltered directory listing in file browser API — the web file browser now shows all directory entries including `node_modules`, `build`, `dist`, etc. + ## 1.27.0 (2026-03-28) - Shell: Add `/feedback` command — submit feedback directly from the CLI session; the command falls back to opening GitHub Issues on network errors or timeouts diff --git a/docs/zh/release-notes/changelog.md b/docs/zh/release-notes/changelog.md index 1f8de646c..a2df1920c 100644 --- a/docs/zh/release-notes/changelog.md +++ b/docs/zh/release-notes/changelog.md @@ -4,6 +4,10 @@ ## 未发布 +- Shell:改进 @ 文件提及发现,集成 git 支持——Shell 现在使用 `git ls-files` 作为主要文件发现机制,修复大仓库(如 65k+ 文件)中之前 1000 文件限制导致靠后字母顺序目录无法访问的问题;支持范围搜索(如 `@src/utils/`),同时适用于 git 和非 git 仓库 +- Shell:防止文件提及范围参数的路径遍历——现在拒绝包含 `..` 的范围值,防止 `@../` 逃离工作区根目录 +- Web:恢复文件浏览器 API 的未过滤目录列表——Web 文件浏览器现在显示所有目录条目,包括 `node_modules`、`build`、`dist` 等 + ## 1.27.0 (2026-03-28) - Shell:新增 `/feedback` 命令——可直接在 CLI 会话中提交反馈,网络错误或超时时自动回退到打开 GitHub Issues 页面