diff --git a/CHANGELOG.md b/CHANGELOG.md index c3da592f6..a99927ba0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ Only write entries that are worth mentioning to users. ## Unreleased +- Shell: Improve @ file mention discovery with git integration — the shell now uses `git ls-files` as the primary file discovery mechanism, fixing large repositories (e.g., 65k+ files) where the previous 1000-file limit caused late-alphabetical directories to be unreachable; supports scoped search (e.g., `@src/utils/`) for both git and non-git repositories +- Shell: Prevent path traversal in file mention scope parameter — scope values containing `..` are now rejected to prevent `@../` from escaping the workspace root +- Web: Restore unfiltered directory listing in file browser API — the web file browser now shows all directory entries including `node_modules`, `build`, `dist`, etc. + ## 1.27.0 (2026-03-28) - Shell: Add `/feedback` command — submit feedback directly from the CLI session; the command falls back to opening GitHub Issues on network errors or timeouts diff --git a/docs/en/release-notes/changelog.md b/docs/en/release-notes/changelog.md index b71cac5a8..7863b1f6e 100644 --- a/docs/en/release-notes/changelog.md +++ b/docs/en/release-notes/changelog.md @@ -4,6 +4,10 @@ This page documents the changes in each Kimi Code CLI release. ## Unreleased +- Shell: Improve @ file mention discovery with git integration — the shell now uses `git ls-files` as the primary file discovery mechanism, fixing large repositories (e.g., 65k+ files) where the previous 1000-file limit caused late-alphabetical directories to be unreachable; supports scoped search (e.g., `@src/utils/`) for both git and non-git repositories +- Shell: Prevent path traversal in file mention scope parameter — scope values containing `..` are now rejected to prevent `@../` from escaping the workspace root +- Web: Restore unfiltered directory listing in file browser API — the web file browser now shows all directory entries including `node_modules`, `build`, `dist`, etc. + ## 1.27.0 (2026-03-28) - Shell: Add `/feedback` command — submit feedback directly from the CLI session; the command falls back to opening GitHub Issues on network errors or timeouts diff --git a/docs/zh/release-notes/changelog.md b/docs/zh/release-notes/changelog.md index 1f8de646c..a2df1920c 100644 --- a/docs/zh/release-notes/changelog.md +++ b/docs/zh/release-notes/changelog.md @@ -4,6 +4,10 @@ ## 未发布 +- Shell:改进 @ 文件提及发现,集成 git 支持——Shell 现在使用 `git ls-files` 作为主要文件发现机制,修复大仓库(如 65k+ 文件)中之前 1000 文件限制导致靠后字母顺序目录无法访问的问题;支持范围搜索(如 `@src/utils/`),同时适用于 git 和非 git 仓库 +- Shell:防止文件提及范围参数的路径遍历——现在拒绝包含 `..` 的范围值,防止 `@../` 逃离工作区根目录 +- Web:恢复文件浏览器 API 的未过滤目录列表——Web 文件浏览器现在显示所有目录条目,包括 `node_modules`、`build`、`dist` 等 + ## 1.27.0 (2026-03-28) - Shell:新增 `/feedback` 命令——可直接在 CLI 会话中提交反馈,网络错误或超时时自动回退到打开 GitHub Issues 页面 diff --git a/src/kimi_cli/ui/shell/prompt.py b/src/kimi_cli/ui/shell/prompt.py index ede338e05..e1afb11d4 100644 --- a/src/kimi_cli/ui/shell/prompt.py +++ b/src/kimi_cli/ui/shell/prompt.py @@ -611,82 +611,15 @@ def _render_selected_item_lines( class LocalFileMentionCompleter(Completer): - """Offer fuzzy `@` path completion by indexing workspace files.""" + """Offer fuzzy `@` path completion by indexing workspace files. + + File discovery and ignore rules are delegated to + :mod:`kimi_cli.utils.file_filter` so that the web backend can reuse + them. + """ _FRAGMENT_PATTERN = re.compile(r"[^\s@]+") _TRIGGER_GUARDS = frozenset((".", "-", "_", "`", "'", '"', ":", "@", "#", "~")) - _IGNORED_NAME_GROUPS: dict[str, tuple[str, ...]] = { - "vcs_metadata": (".DS_Store", ".bzr", ".git", ".hg", ".svn"), - "tooling_caches": ( - ".build", - ".cache", - ".coverage", - ".fleet", - ".gradle", - ".idea", - ".ipynb_checkpoints", - ".pnpm-store", - ".pytest_cache", - ".pub-cache", - ".ruff_cache", - ".swiftpm", - ".tox", - ".venv", - ".vs", - ".vscode", - ".yarn", - ".yarn-cache", - ), - "js_frontend": ( - ".next", - ".nuxt", - ".parcel-cache", - ".svelte-kit", - ".turbo", - ".vercel", - "node_modules", - ), - "python_packaging": ( - "__pycache__", - "build", - "coverage", - "dist", - "htmlcov", - "pip-wheel-metadata", - "venv", - ), - "java_jvm": (".mvn", "out", "target"), - "dotnet_native": ("bin", "cmake-build-debug", "cmake-build-release", "obj"), - "bazel_buck": ("bazel-bin", "bazel-out", "bazel-testlogs", "buck-out"), - "misc_artifacts": ( - ".dart_tool", - ".serverless", - ".stack-work", - ".terraform", - ".terragrunt-cache", - "DerivedData", - "Pods", - "deps", - "tmp", - "vendor", - ), - } - _IGNORED_NAMES = frozenset(name for group in _IGNORED_NAME_GROUPS.values() for name in group) - _IGNORED_PATTERN_PARTS: tuple[str, ...] = ( - r".*_cache$", - r".*-cache$", - r".*\.egg-info$", - r".*\.dist-info$", - r".*\.py[co]$", - r".*\.class$", - r".*\.sw[po]$", - r".*~$", - r".*\.(?:tmp|bak)$", - ) - _IGNORED_PATTERNS = re.compile( - "|".join(f"(?:{part})" for part in _IGNORED_PATTERN_PARTS), - re.IGNORECASE, - ) def __init__( self, @@ -700,9 +633,12 @@ def __init__( self._limit = limit self._cache_time: float = 0.0 self._cached_paths: list[str] = [] + self._cache_scope: str | None = None self._top_cache_time: float = 0.0 self._top_cached_paths: list[str] = [] self._fragment_hint: str | None = None + self._is_git: bool | None = None # lazily detected + self._git_index_mtime: float | None = None self._word_completer = WordCompleter( self._get_paths, @@ -716,14 +652,6 @@ def __init__( pattern=r"^[^\s@]*", ) - @classmethod - def _is_ignored(cls, name: str) -> bool: - if not name: - return True - if name in cls._IGNORED_NAMES: - return True - return bool(cls._IGNORED_PATTERNS.fullmatch(name)) - def _get_paths(self) -> list[str]: fragment = self._fragment_hint or "" if "/" not in fragment and len(fragment) < 3: @@ -731,6 +659,8 @@ def _get_paths(self) -> list[str]: return self._get_deep_paths() def _get_top_level_paths(self) -> list[str]: + from kimi_cli.utils.file_filter import is_ignored + now = time.monotonic() if now - self._top_cache_time <= self._refresh_interval: return self._top_cached_paths @@ -739,7 +669,7 @@ def _get_top_level_paths(self) -> list[str]: try: for entry in sorted(self._root.iterdir(), key=lambda p: p.name): name = entry.name - if self._is_ignored(name): + if is_ignored(name): continue entries.append(f"{name}/" if entry.is_dir() else name) if len(entries) >= self._limit: @@ -752,45 +682,45 @@ def _get_top_level_paths(self) -> list[str]: return self._top_cached_paths def _get_deep_paths(self) -> list[str]: - now = time.monotonic() - if now - self._cache_time <= self._refresh_interval: - return self._cached_paths - - paths: list[str] = [] - try: - for current_root, dirs, files in os.walk(self._root): - relative_root = Path(current_root).relative_to(self._root) + from kimi_cli.utils.file_filter import ( + detect_git, + git_index_mtime, + list_files_git, + list_files_walk, + ) - # Prevent descending into ignored directories. - dirs[:] = sorted(d for d in dirs if not self._is_ignored(d)) + fragment = self._fragment_hint or "" - if relative_root.parts and any( - self._is_ignored(part) for part in relative_root.parts - ): - dirs[:] = [] - continue + scope: str | None = None + if "/" in fragment: + scope = fragment.rsplit("/", 1)[0] - if relative_root.parts: - paths.append(relative_root.as_posix() + "/") - if len(paths) >= self._limit: - break + now = time.monotonic() + cache_valid = ( + now - self._cache_time <= self._refresh_interval and self._cache_scope == scope + ) - for file_name in sorted(files): - if self._is_ignored(file_name): - continue - relative = (relative_root / file_name).as_posix() - if not relative: - continue - paths.append(relative) - if len(paths) >= self._limit: - break + # Invalidate on .git/index mtime change (like Claude Code). + if cache_valid and self._is_git: + mtime = git_index_mtime(self._root) + if mtime != self._git_index_mtime: + cache_valid = False - if len(paths) >= self._limit: - break - except OSError: + if cache_valid: return self._cached_paths + if self._is_git is None: + self._is_git = detect_git(self._root) + + paths: list[str] | None = None + if self._is_git: + paths = list_files_git(self._root, scope) + self._git_index_mtime = git_index_mtime(self._root) + if paths is None: + paths = list_files_walk(self._root, scope, limit=self._limit) + self._cached_paths = paths + self._cache_scope = scope self._cache_time = now return self._cached_paths diff --git a/src/kimi_cli/utils/file_filter.py b/src/kimi_cli/utils/file_filter.py new file mode 100644 index 000000000..64ac12772 --- /dev/null +++ b/src/kimi_cli/utils/file_filter.py @@ -0,0 +1,367 @@ +from __future__ import annotations + +import os +import re +import subprocess +from pathlib import Path + +_IGNORED_NAMES: frozenset[str] = frozenset( + ( + # vcs metadata + ".DS_Store", + ".bzr", + ".git", + ".hg", + ".svn", + # tooling caches + ".build", + ".cache", + ".coverage", + ".fleet", + ".gradle", + ".idea", + ".ipynb_checkpoints", + ".pnpm-store", + ".pytest_cache", + ".pub-cache", + ".ruff_cache", + ".swiftpm", + ".tox", + ".venv", + ".vs", + ".vscode", + ".yarn", + ".yarn-cache", + # js / frontend + ".next", + ".nuxt", + ".parcel-cache", + ".svelte-kit", + ".turbo", + ".vercel", + "node_modules", + # python packaging + "__pycache__", + "build", + "coverage", + "dist", + "htmlcov", + "pip-wheel-metadata", + "venv", + # java / jvm + ".mvn", + "out", + "target", + # dotnet / native + "bin", + "cmake-build-debug", + "cmake-build-release", + "obj", + # bazel / buck + "bazel-bin", + "bazel-out", + "bazel-testlogs", + "buck-out", + # misc artifacts + ".dart_tool", + ".serverless", + ".stack-work", + ".terraform", + ".terragrunt-cache", + "DerivedData", + "Pods", + "deps", + "tmp", + "vendor", + ) +) + +_IGNORED_PATTERNS: re.Pattern[str] = re.compile( + r"|".join( + ( + r".*_cache$", + r".*-cache$", + r".*\.egg-info$", + r".*\.dist-info$", + r".*\.py[co]$", + r".*\.class$", + r".*\.sw[po]$", + r".*~$", + r".*\.(?:tmp|bak)$", + ) + ), + re.IGNORECASE, +) + +_GIT_LS_FILES_TIMEOUT = 5 + + +def _scope_args(scope: str | None) -> list[str]: + """Return ``["--", "/"]`` if *scope* is given, else ``[]``.""" + return ["--", scope + "/"] if scope else [] + + +def is_ignored(name: str) -> bool: + """Return *True* if *name* should be excluded from file mention results.""" + if not name: + return True + if name in _IGNORED_NAMES: + return True + return bool(_IGNORED_PATTERNS.fullmatch(name)) + + +def detect_git(root: Path) -> bool: + """Return *True* if *root* is inside a git work tree.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + cwd=root, + capture_output=True, + timeout=2, + ) + return result.returncode == 0 + except Exception: + return False + + +def git_index_mtime(root: Path) -> float | None: + """Return the mtime of ``.git/index``, or *None* if unavailable.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + cwd=root, + capture_output=True, + text=True, + timeout=2, + ) + if result.returncode != 0: + return None + git_dir = Path(result.stdout.strip()) + if not git_dir.is_absolute(): + git_dir = root / git_dir + index = git_dir / "index" + return index.stat().st_mtime + except Exception: + return None + + +def _parse_ls_files_output(stdout: str, *, filter_ignored: bool = True) -> list[str]: + """Parse NUL-delimited ``git ls-files -z`` output into paths with synthesised dirs. + + When *filter_ignored* is *True*, paths whose segments match + ``is_ignored()`` are excluded so that tracked ``node_modules/``, + ``vendor/``, etc. do not pollute completion candidates. + """ + paths: list[str] = [] + seen_dirs: set[str] = set() + ignored_prefixes: set[str] = set() + for entry in stdout.split("\0"): + if not entry: + continue + + parts = entry.split("/") + + if filter_ignored: + skip = False + for i, part in enumerate(parts): + prefix = "/".join(parts[: i + 1]) + "/" + if prefix in ignored_prefixes: + skip = True + break + if is_ignored(part): + ignored_prefixes.add(prefix) + skip = True + break + if skip: + continue + + for i in range(1, len(parts)): + dir_path = "/".join(parts[:i]) + "/" + if dir_path not in seen_dirs: + seen_dirs.add(dir_path) + paths.append(dir_path) + paths.append(entry) + return paths + + +def _git_deleted_files(root: Path, scope: str | None = None) -> set[str]: + """Return the set of tracked files deleted from the working tree.""" + cmd = ["git", "-c", "core.quotepath=false", "ls-files", "-z", "--deleted", *_scope_args(scope)] + try: + result = subprocess.run( + cmd, + cwd=root, + capture_output=True, + text=True, + timeout=_GIT_LS_FILES_TIMEOUT, + ) + if result.returncode == 0: + return {e for e in result.stdout.split("\0") if e} + except Exception: + pass + return set() + + +def list_files_git( + root: Path, + scope: str | None = None, + *, + include_untracked: bool = True, +) -> list[str] | None: + """List workspace paths via ``git ls-files``, or *None* on failure. + + When *scope* is given (e.g. ``"src/utils"``), only files under that + subtree are returned. When *include_untracked* is *True*, untracked + files (respecting ``.gitignore``) are appended via + ``--others --exclude-standard``. + + Deleted working-tree files (``git ls-files --deleted``) are excluded + so that renamed / removed files do not appear as stale candidates. + """ + if scope and ".." in scope.split("/"): + return None + + cmd = [ + "git", + "-c", + "core.quotepath=false", + "ls-files", + "-z", + "--recurse-submodules", + *_scope_args(scope), + ] + try: + result = subprocess.run( + cmd, + cwd=root, + capture_output=True, + text=True, + timeout=_GIT_LS_FILES_TIMEOUT, + ) + if result.returncode != 0: + return None + except Exception: + return None + + deleted = _git_deleted_files(root, scope) + paths = _parse_ls_files_output(result.stdout) + if deleted: + paths = [p for p in paths if p.endswith("/") or p not in deleted] + + if include_untracked: + others_cmd = [ + "git", + "-c", + "core.quotepath=false", + "ls-files", + "-z", + "--others", + "--exclude-standard", + *_scope_args(scope), + ] + try: + others = subprocess.run( + others_cmd, + cwd=root, + capture_output=True, + text=True, + timeout=_GIT_LS_FILES_TIMEOUT, + ) + if others.returncode == 0: + tracked = set(paths) + for p in _parse_ls_files_output(others.stdout): + if p not in tracked: + paths.append(p) + except Exception: + pass + + # Prune directory entries that have no surviving file children. + if deleted: + live_dirs: set[str] = set() + for p in paths: + if not p.endswith("/"): + parts = p.split("/") + for i in range(1, len(parts)): + live_dirs.add("/".join(parts[:i]) + "/") + paths = [p for p in paths if not p.endswith("/") or p in live_dirs] + + return paths + + +def list_files_walk( + root: Path, + scope: str | None = None, + *, + limit: int = 1000, +) -> list[str]: + """List workspace paths via ``os.walk`` (fallback for non-git repos). + + When *scope* is given, the walk starts from that subdirectory. + """ + resolved_root = root.resolve() + walk_root = (root / scope).resolve() if scope else resolved_root + + # Prevent path traversal outside the workspace (e.g. scope="../"). + try: + if not walk_root.is_relative_to(resolved_root): + return [] + except (OSError, ValueError): + return [] + + paths: list[str] = [] + try: + for current_root, dirs, files in os.walk(walk_root): + relative_root = Path(current_root).resolve().relative_to(resolved_root) + + dirs[:] = sorted(d for d in dirs if not is_ignored(d)) + + if relative_root.parts and any(is_ignored(part) for part in relative_root.parts): + dirs[:] = [] + continue + + if relative_root.parts: + paths.append(relative_root.as_posix() + "/") + if len(paths) >= limit: + break + + for file_name in sorted(files): + if is_ignored(file_name): + continue + relative = (relative_root / file_name).as_posix() + if not relative: + continue + paths.append(relative) + if len(paths) >= limit: + break + + if len(paths) >= limit: + break + except OSError: + pass + + return paths + + +def list_directory_filtered(directory: Path) -> list[dict[str, str | int]]: + """List immediate children of *directory*, filtering ignored entries. + + Returns dicts with ``name``, ``type`` (``"file"``/``"directory"``), and + optionally ``size``. Suitable for the web API response. + """ + result: list[dict[str, str | int]] = [] + try: + for subpath in directory.iterdir(): + if is_ignored(subpath.name): + continue + if subpath.is_dir(): + result.append({"name": subpath.name, "type": "directory"}) + else: + try: + size = subpath.stat().st_size + except OSError: + size = 0 + result.append({"name": subpath.name, "type": "file", "size": size}) + except OSError: + pass + result.sort(key=lambda x: (str(x["type"]), str(x["name"]))) + return result diff --git a/src/kimi_cli/web/api/sessions.py b/src/kimi_cli/web/api/sessions.py index 36cd42507..7c791ff58 100644 --- a/src/kimi_cli/web/api/sessions.py +++ b/src/kimi_cli/web/api/sessions.py @@ -533,13 +533,11 @@ async def get_session_file( if subpath.is_dir(): result.append({"name": subpath.name, "type": "directory"}) else: - result.append( - { - "name": subpath.name, - "type": "file", - "size": subpath.stat().st_size, - } - ) + try: + size = subpath.stat().st_size + except OSError: + size = 0 + result.append({"name": subpath.name, "type": "file", "size": size}) result.sort(key=lambda x: (cast(str, x["type"]), cast(str, x["name"]))) return Response(content=json.dumps(result), media_type="application/json") diff --git a/tests/e2e/test_file_mention_e2e.py b/tests/e2e/test_file_mention_e2e.py new file mode 100644 index 000000000..cae3e86e5 --- /dev/null +++ b/tests/e2e/test_file_mention_e2e.py @@ -0,0 +1,184 @@ +"""E2E tests for ``@`` file mention auto-completion. + +These tests verify that the file mention completer discovers files +correctly in a real PTY environment, including: +- Basic @ trigger and completion popup +- Scoped search with ``/`` prefix +- git ls-files integration (large repo simulation) +- Ignored directories (.git, node_modules) are filtered +""" + +from __future__ import annotations + +import subprocess +import sys +import time +from pathlib import Path + +import pytest + +from tests.e2e.shell_pty_helpers import ( + make_home_dir, + make_work_dir, + read_until_prompt_ready, + start_shell_pty, + write_scripted_config, +) + +pytestmark = pytest.mark.skipif( + sys.platform == "win32", + reason="Shell PTY E2E tests require a Unix-like PTY.", +) + + +def _init_git_repo(work_dir: Path) -> None: + """Initialise a git repo, stage all files, and commit.""" + subprocess.run(["git", "init"], cwd=work_dir, capture_output=True, check=True) + subprocess.run( + ["git", "config", "user.email", "test@test.com"], + cwd=work_dir, + capture_output=True, + check=True, + ) + subprocess.run( + ["git", "config", "user.name", "Test"], + cwd=work_dir, + capture_output=True, + check=True, + ) + subprocess.run(["git", "add", "-A"], cwd=work_dir, capture_output=True, check=True) + subprocess.run( + ["git", "commit", "-m", "init"], + cwd=work_dir, + capture_output=True, + check=True, + ) + + +def _setup_shell(tmp_path: Path, work_dir: Path): + """Start a kimi-cli shell in PTY with a scripted (no-op) model.""" + home_dir = make_home_dir(tmp_path) + config_path = write_scripted_config(tmp_path, scripts=["Hello!"]) + shell = start_shell_pty( + config_path=config_path, + work_dir=work_dir, + home_dir=home_dir, + yolo=False, + ) + # Wait for the welcome prompt + read_until_prompt_ready(shell, after=0, timeout=20.0) + return shell + + +def test_at_trigger_shows_top_level_entries(tmp_path: Path): + """Typing ``@`` shows top-level files/directories.""" + work_dir = make_work_dir(tmp_path) + (work_dir / "README.md").write_text("# Hello") + (work_dir / "src").mkdir() + (work_dir / "src" / "main.py").write_text("print('hi')") + # Ignored dir — should NOT appear + (work_dir / "node_modules").mkdir() + (work_dir / "node_modules" / "junk.js").write_text("") + + shell = _setup_shell(tmp_path, work_dir) + try: + mark = shell.mark() + shell.send_text("@") + time.sleep(1.0) + output = shell.wait_for_quiet(timeout=3.0, after=mark) + + # Should show real files + assert "README.md" in output or "src/" in output, ( + f"Expected top-level entries in output, got:\n{output}" + ) + # Should NOT show ignored dirs + assert "node_modules" not in output, f"node_modules should be filtered, got:\n{output}" + finally: + shell.send_key("escape") + shell.send_key("ctrl_c") + shell.close() + + +def test_at_scoped_search_with_slash(tmp_path: Path): + """Typing ``@src/`` shows files inside ``src/`` directory.""" + work_dir = make_work_dir(tmp_path) + src = work_dir / "src" + src.mkdir() + (src / "app.py").write_text("# app") + (src / "utils.py").write_text("# utils") + # Another top-level dir + (work_dir / "docs").mkdir() + (work_dir / "docs" / "readme.md").write_text("") + + shell = _setup_shell(tmp_path, work_dir) + try: + mark = shell.mark() + shell.send_text("@src/") + time.sleep(1.0) + output = shell.wait_for_quiet(timeout=3.0, after=mark) + + # Should show src/ contents + assert "app.py" in output or "utils.py" in output, ( + f"Expected src/ contents in output, got:\n{output}" + ) + finally: + shell.send_key("escape") + shell.send_key("ctrl_c") + shell.close() + + +def test_git_ls_files_finds_deep_files(tmp_path: Path): + """In a git repo, deep files are discoverable even with many early dirs.""" + work_dir = make_work_dir(tmp_path) + + # Create many early-alphabetical directories (would exhaust os.walk limit) + for i in range(30): + d = work_dir / f"aaa_{i:03d}" + d.mkdir() + for j in range(20): + (d / f"file_{j}.txt").write_text(f"content {i}/{j}") + + # The target — late alphabetically + target = work_dir / "zzz_target" + target.mkdir() + (target / "important.py").write_text("# find me") + + # Init git repo so git ls-files is used (files already created above). + _init_git_repo(work_dir) + + shell = _setup_shell(tmp_path, work_dir) + try: + mark = shell.mark() + shell.send_text("@zzz_target/") + time.sleep(1.5) + output = shell.wait_for_quiet(timeout=5.0, after=mark) + + assert "important.py" in output, f"Expected important.py via git ls-files, got:\n{output}" + finally: + shell.send_key("escape") + shell.send_key("ctrl_c") + shell.close() + + +def test_git_ignores_are_respected(tmp_path: Path): + """Files in .gitignore should not appear in @ completion.""" + work_dir = make_work_dir(tmp_path) + (work_dir / "visible.py").write_text("# visible") + (work_dir / "secret.log").write_text("secret stuff") + (work_dir / ".gitignore").write_text("*.log\n") + + _init_git_repo(work_dir) + + shell = _setup_shell(tmp_path, work_dir) + try: + mark = shell.mark() + shell.send_text("@sec") + time.sleep(1.5) + output = shell.wait_for_quiet(timeout=3.0, after=mark) + + # secret.log is gitignored — should NOT appear + assert "secret.log" not in output, f"secret.log should be gitignored, got:\n{output}" + finally: + shell.send_key("escape") + shell.send_key("ctrl_c") + shell.close() diff --git a/tests/ui_and_conv/test_file_completer.py b/tests/ui_and_conv/test_file_completer.py index 08e5fc54e..9df041ee4 100644 --- a/tests/ui_and_conv/test_file_completer.py +++ b/tests/ui_and_conv/test_file_completer.py @@ -2,6 +2,7 @@ from __future__ import annotations +import subprocess from pathlib import Path from inline_snapshot import snapshot @@ -91,6 +92,34 @@ def test_at_guard_prevents_email_like_fragments(tmp_path: Path): assert not texts +def test_scoped_walk_finds_late_alphabetical_dirs(tmp_path: Path): + """Directories that sort late alphabetically must still be reachable. + + Regression test for #1375: in large repos, ``os.walk`` exhausted the + 1000-file limit on early directories, making later ones (like ``src/``) + invisible. With scoped search (fragment contains ``/``), the walk starts + at the target subtree. + """ + # Create many early-alphabetical directories with files to exhaust a small limit. + for i in range(20): + d = tmp_path / f"aaa_{i:03d}" + d.mkdir() + for j in range(10): + (d / f"file_{j}.txt").write_text("") + + # The target directory sorts late. + target = tmp_path / "zzz_target" + target.mkdir() + (target / "important.py").write_text("# find me") + + # With a low limit, the old os.walk approach would never reach zzz_target. + completer = LocalFileMentionCompleter(tmp_path, limit=50) + + texts = _completion_texts(completer, "@zzz_target/") + + assert "zzz_target/important.py" in texts + + def test_basename_prefix_is_ranked_first(tmp_path: Path): """Prefer basename prefix matches over cross-segment fuzzy matches. @@ -117,3 +146,74 @@ def test_basename_prefix_is_ranked_first(tmp_path: Path): "src/kimi_cli/tools/file/patch.py", ] ) + + +def _init_git_repo(work_dir: Path) -> None: + """Initialise a git repo, stage all files, and commit.""" + for cmd in ( + ["git", "init"], + ["git", "config", "user.email", "test@test.com"], + ["git", "config", "user.name", "Test"], + ["git", "add", "-A"], + ["git", "commit", "-m", "init"], + ): + subprocess.run(cmd, cwd=work_dir, capture_output=True, check=True) + + +def test_tracked_ignored_dirs_filtered_in_git_mode(tmp_path: Path): + """Tracked ``node_modules/`` and ``vendor/`` must still be filtered. + + Regression test: ``git ls-files`` returns all tracked paths, so + directories in ``_IGNORED_NAMES`` were surfacing in completion when + they happened to be committed. + """ + (tmp_path / "src").mkdir() + (tmp_path / "src" / "app.py").write_text("# app") + nm = tmp_path / "node_modules" / "pkg" + nm.mkdir(parents=True) + (nm / "index.js").write_text("module.exports = {}") + vendor = tmp_path / "vendor" + vendor.mkdir() + (vendor / "dep.py").write_text("# dep") + + _init_git_repo(tmp_path) + + completer = LocalFileMentionCompleter(tmp_path) + + texts = _completion_texts(completer, "@nod") + assert not any("node_modules" in t for t in texts), ( + f"node_modules should be filtered even if tracked, got: {texts}" + ) + + texts = _completion_texts(completer, "@ven") + assert not any("vendor" in t for t in texts), ( + f"vendor should be filtered even if tracked, got: {texts}" + ) + + +def test_unstaged_rename_hides_deleted_path(tmp_path: Path): + """After ``mv old.py new.py`` without staging, old.py must not appear. + + Regression test: ``git ls-files`` reads the index, so a file that was + moved on disk (but not staged) would still show up as a stale + candidate. + """ + (tmp_path / "src").mkdir() + (tmp_path / "src" / "old.py").write_text("# original") + + _init_git_repo(tmp_path) + + # Rename without staging. + (tmp_path / "src" / "old.py").rename(tmp_path / "src" / "new.py") + + completer = LocalFileMentionCompleter(tmp_path) + + texts = _completion_texts(completer, "@old") + assert not any("old.py" in t for t in texts), ( + f"Deleted old.py should not appear in completion, got: {texts}" + ) + + texts = _completion_texts(completer, "@new") + assert any("new.py" in t for t in texts), ( + f"Renamed new.py should appear via --others, got: {texts}" + ) diff --git a/tests/utils/test_file_filter.py b/tests/utils/test_file_filter.py new file mode 100644 index 000000000..a5fd2fbc8 --- /dev/null +++ b/tests/utils/test_file_filter.py @@ -0,0 +1,360 @@ +"""Tests for file_filter: git vs walk cross-validation and edge cases.""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +import pytest + +from kimi_cli.utils.file_filter import ( + is_ignored, + list_files_git, + list_files_walk, +) + + +def _init_git(root: Path) -> None: + for cmd in ( + ["git", "init"], + ["git", "config", "user.email", "t@t.com"], + ["git", "config", "user.name", "T"], + ["git", "add", "-A"], + ["git", "commit", "-m", "init"], + ): + subprocess.run(cmd, cwd=root, capture_output=True, check=True) + + +# --------------------------------------------------------------------------- +# Cross-validation: git vs walk must agree on a clean working tree +# --------------------------------------------------------------------------- + + +class TestGitWalkParity: + """On a clean git repo the two backends must return the same path set.""" + + def test_flat_repo(self, tmp_path: Path) -> None: + (tmp_path / "README.md").write_text("hi") + (tmp_path / "main.py").write_text("print(1)") + _init_git(tmp_path) + + git = set(list_files_git(tmp_path) or []) + walk = set(list_files_walk(tmp_path)) + assert git == walk + + def test_nested_dirs(self, tmp_path: Path) -> None: + (tmp_path / "src" / "pkg").mkdir(parents=True) + (tmp_path / "src" / "pkg" / "mod.py").write_text("") + (tmp_path / "src" / "app.py").write_text("") + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "guide.md").write_text("") + _init_git(tmp_path) + + git = set(list_files_git(tmp_path) or []) + walk = set(list_files_walk(tmp_path)) + assert git == walk + + def test_with_gitignore(self, tmp_path: Path) -> None: + """Gitignored files excluded from both paths.""" + (tmp_path / "app.py").write_text("") + (tmp_path / "debug.log").write_text("log") + (tmp_path / ".gitignore").write_text("*.log\n") + _init_git(tmp_path) + + git = set(list_files_git(tmp_path) or []) + walk = set(list_files_walk(tmp_path)) + + assert "debug.log" not in git + # walk doesn't read .gitignore, so it may include debug.log. + # The key invariant: git is a subset of walk for non-gitignored files. + assert git <= walk | {"debug.log"} + + def test_scoped_search_parity(self, tmp_path: Path) -> None: + (tmp_path / "src" / "core").mkdir(parents=True) + (tmp_path / "src" / "core" / "engine.py").write_text("") + (tmp_path / "src" / "util.py").write_text("") + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "api.md").write_text("") + _init_git(tmp_path) + + git = set(list_files_git(tmp_path, "src") or []) + walk = set(list_files_walk(tmp_path, "src")) + assert git == walk + + # No docs contamination + assert not any("docs" in p for p in git) + + +# --------------------------------------------------------------------------- +# Ignored directory filtering (tracked content must still be hidden) +# --------------------------------------------------------------------------- + + +class TestIgnoredDirFiltering: + """Tracked ignored dirs must not leak into git results.""" + + @pytest.mark.parametrize( + "dirname", ["node_modules", "vendor", "__pycache__", ".vscode", "dist"] + ) + def test_tracked_ignored_dir_filtered(self, tmp_path: Path, dirname: str) -> None: + (tmp_path / "keep.py").write_text("") + d = tmp_path / dirname + d.mkdir() + (d / "stuff.js").write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + walk = list_files_walk(tmp_path) + + assert not any(dirname in p for p in git), f"{dirname} leaked via git" + assert not any(dirname in p for p in walk), f"{dirname} leaked via walk" + + def test_nested_ignored_dir(self, tmp_path: Path) -> None: + """Ignored dir deep inside tree must also be filtered.""" + (tmp_path / "src" / "lib" / "node_modules" / "pkg").mkdir(parents=True) + (tmp_path / "src" / "lib" / "node_modules" / "pkg" / "index.js").write_text("") + (tmp_path / "src" / "lib" / "real.py").write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + assert "src/lib/real.py" in git + assert not any("node_modules" in p for p in git) + + +# --------------------------------------------------------------------------- +# Deleted / renamed file handling +# --------------------------------------------------------------------------- + + +class TestDeletedFileHandling: + """Stale index entries must not appear in results.""" + + def test_deleted_file_excluded(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + (tmp_path / "b.py").write_text("") + _init_git(tmp_path) + + os.remove(tmp_path / "a.py") + + git = list_files_git(tmp_path) or [] + assert "a.py" not in git + assert "b.py" in git + + def test_renamed_file_old_excluded_new_included(self, tmp_path: Path) -> None: + (tmp_path / "old.py").write_text("# old") + _init_git(tmp_path) + + (tmp_path / "old.py").rename(tmp_path / "new.py") + + git = list_files_git(tmp_path) or [] + assert "old.py" not in git + assert "new.py" in git + + def test_empty_dir_pruned_after_delete(self, tmp_path: Path) -> None: + """Deleting the only file under a dir must also remove the dir entry.""" + (tmp_path / "solo").mkdir() + (tmp_path / "solo" / "only.py").write_text("") + (tmp_path / "keep.py").write_text("") + _init_git(tmp_path) + + os.remove(tmp_path / "solo" / "only.py") + os.rmdir(tmp_path / "solo") + + git = list_files_git(tmp_path) or [] + assert "solo/" not in git + assert "solo/only.py" not in git + assert "keep.py" in git + + def test_partial_delete_preserves_dir(self, tmp_path: Path) -> None: + """Deleting one of two files keeps the dir entry.""" + (tmp_path / "pkg").mkdir() + (tmp_path / "pkg" / "a.py").write_text("") + (tmp_path / "pkg" / "b.py").write_text("") + _init_git(tmp_path) + + os.remove(tmp_path / "pkg" / "a.py") + + git = list_files_git(tmp_path) or [] + assert "pkg/" in git + assert "pkg/a.py" not in git + assert "pkg/b.py" in git + + +# --------------------------------------------------------------------------- +# Untracked file discovery +# --------------------------------------------------------------------------- + + +class TestUntrackedFiles: + """New untracked files (respecting .gitignore) must be discovered.""" + + def test_untracked_file_included(self, tmp_path: Path) -> None: + (tmp_path / "tracked.py").write_text("") + _init_git(tmp_path) + + (tmp_path / "untracked.py").write_text("# new") + + git = list_files_git(tmp_path) or [] + assert "tracked.py" in git + assert "untracked.py" in git + + def test_gitignored_untracked_excluded(self, tmp_path: Path) -> None: + (tmp_path / "app.py").write_text("") + (tmp_path / ".gitignore").write_text("*.log\n") + _init_git(tmp_path) + + (tmp_path / "debug.log").write_text("noise") + + git = list_files_git(tmp_path) or [] + assert "debug.log" not in git + + def test_untracked_without_flag(self, tmp_path: Path) -> None: + (tmp_path / "tracked.py").write_text("") + _init_git(tmp_path) + (tmp_path / "untracked.py").write_text("") + + git = list_files_git(tmp_path, include_untracked=False) or [] + assert "tracked.py" in git + assert "untracked.py" not in git + + +# --------------------------------------------------------------------------- +# Path traversal prevention +# --------------------------------------------------------------------------- + + +class TestSpecialCharFilenames: + """Filenames with tab, quotes, or backslash must be handled correctly.""" + + def test_tab_in_filename(self, tmp_path: Path) -> None: + p = tmp_path / "tab\there.py" + p.write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + assert "tab\there.py" in git + + def test_quote_in_filename(self, tmp_path: Path) -> None: + p = tmp_path / 'quote"name.py' + p.write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + assert 'quote"name.py' in git + + def test_backslash_in_filename(self, tmp_path: Path) -> None: + p = tmp_path / "back\\slash.py" + p.write_text("") + _init_git(tmp_path) + + git = list_files_git(tmp_path) or [] + assert "back\\slash.py" in git + + +class TestPathTraversal: + """Scope containing ``..`` must be rejected.""" + + def test_git_rejects_dotdot(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + _init_git(tmp_path) + assert list_files_git(tmp_path, "..") is None + + def test_walk_rejects_dotdot(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + assert list_files_walk(tmp_path, "..") == [] + + def test_nested_dotdot_rejected(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + _init_git(tmp_path) + assert list_files_git(tmp_path, "src/../../etc") is None + + +# --------------------------------------------------------------------------- +# Dash-prefixed directory names (must not be parsed as git options) +# --------------------------------------------------------------------------- + + +class TestDashPrefixScope: + """Directory names starting with ``-`` must not be misinterpreted as git options.""" + + def test_git_scoped_dash_prefix(self, tmp_path: Path) -> None: + d = tmp_path / "-docs" + d.mkdir() + (d / "guide.md").write_text("# guide") + _init_git(tmp_path) + + result = list_files_git(tmp_path, "-docs") + assert result is not None + assert "-docs/guide.md" in result + + def test_git_deleted_with_dash_prefix(self, tmp_path: Path) -> None: + d = tmp_path / "-data" + d.mkdir() + (d / "old.csv").write_text("a,b") + _init_git(tmp_path) + (d / "old.csv").unlink() + + result = list_files_git(tmp_path, "-data") + assert result is not None + assert not any("old.csv" in p for p in result) + + def test_git_untracked_with_dash_prefix(self, tmp_path: Path) -> None: + d = tmp_path / "-src" + d.mkdir() + (d / "tracked.py").write_text("# tracked") + _init_git(tmp_path) + (d / "new.py").write_text("# new") + + result = list_files_git(tmp_path, "-src") + assert result is not None + assert "-src/new.py" in result + + +# --------------------------------------------------------------------------- +# is_ignored unit tests +# --------------------------------------------------------------------------- + + +class TestIsIgnored: + @pytest.mark.parametrize( + "name", + ["node_modules", "__pycache__", ".git", ".DS_Store", "vendor", "dist", ".vscode"], + ) + def test_ignored_names(self, name: str) -> None: + assert is_ignored(name) + + @pytest.mark.parametrize( + "name", + ["foo_cache", "bar-cache", "pkg.egg-info", "lib.dist-info", "mod.pyc", "A.class", "f.swp"], + ) + def test_ignored_patterns(self, name: str) -> None: + assert is_ignored(name) + + @pytest.mark.parametrize( + "name", + ["src", "main.py", "README.md", "package.json", ".gitignore", "Makefile"], + ) + def test_not_ignored(self, name: str) -> None: + assert not is_ignored(name) + + def test_empty_is_ignored(self) -> None: + assert is_ignored("") + + +# --------------------------------------------------------------------------- +# Fallback behaviour +# --------------------------------------------------------------------------- + + +class TestFallback: + """list_files_git returns None for non-git dirs; walk always works.""" + + def test_non_git_returns_none(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + assert list_files_git(tmp_path) is None + + def test_walk_works_without_git(self, tmp_path: Path) -> None: + (tmp_path / "a.py").write_text("") + result = list_files_walk(tmp_path) + assert "a.py" in result