From 372481cdccefb779a780cd7319fb19b336886992 Mon Sep 17 00:00:00 2001 From: Dmitriy Kovalenko Date: Thu, 14 May 2026 22:56:08 -0700 Subject: [PATCH 1/3] feat: configurable submodule support + sparse-checkout test Adds `support_submodules` (default true) to FilePickerOptions, threaded through ScanConfig, the background watcher, the Lua module, and the fff-c FFI (new `fff_create_instance3`; `fff_create_instance2` keeps the existing default). When false, submodule directories are pruned from the WalkBuilder via `Repository::submodules()` and excluded from `StatusOptions`. Lua exposes the toggle as `config.git.support_submodules`. Adds an e2e integration test that drives a real `git clone --no-checkout` + cone-mode `sparse-checkout` and asserts the picker indexes only the materialized files and never panics on libgit2 statuses for non-materialized paths. --- README.md | 1 + crates/fff-c/include/fff.h | 34 +++- crates/fff-c/src/ffi_types.rs | 10 +- crates/fff-c/src/lib.rs | 51 +++++- crates/fff-core/src/background_watcher.rs | 24 ++- crates/fff-core/src/file_picker.rs | 70 ++++++++- crates/fff-core/src/git.rs | 13 +- crates/fff-core/src/scan.rs | 26 +++- crates/fff-core/src/shared.rs | 3 +- crates/fff-core/tests/sparse_checkout_test.rs | 146 ++++++++++++++++++ crates/fff-mcp/src/main.rs | 6 + crates/fff-nvim/src/lib.rs | 23 ++- crates/fff-python/src/finder.rs | 2 + lua/fff/conf.lua | 4 + lua/fff/core.lua | 2 + 15 files changed, 389 insertions(+), 26 deletions(-) create mode 100644 crates/fff-core/tests/sparse_checkout_test.rs diff --git a/README.md b/README.md index 994d7a93..70180a80 100644 --- a/README.md +++ b/README.md @@ -333,6 +333,7 @@ require('fff').setup({ }, git = { status_text_color = false, -- true to color filenames by git status + support_submodules = true, -- walk into submodules and report their git status; set false to skip them }, select = { -- Return winid to open the chosen file in, or nil to open in the original window diff --git a/crates/fff-c/include/fff.h b/crates/fff-c/include/fff.h index 6be43935..21173be8 100644 --- a/crates/fff-c/include/fff.h +++ b/crates/fff-c/include/fff.h @@ -12,7 +12,7 @@ /** * Current used version of [`FffCreateOptions`]. */ -#define FFF_CREATE_OPTIONS_VERSION 1 +#define FFF_CREATE_OPTIONS_VERSION 2 /** * Result envelope returned by all `fff_*` functions. @@ -133,6 +133,12 @@ typedef struct FffCreateOptions { * `enable_fs_root_scanning`. */ bool enable_home_dir_scanning; + /** + * When `true` (default), submodule directories are walked and reported in + * git status. When `false`, submodule paths are skipped during traversal + * and excluded from git status. + */ + bool support_submodules; } FffCreateOptions; /** @@ -515,6 +521,32 @@ struct FffResult *fff_create_instance_with(const struct FffCreateOptions *opts); */ struct FffResult *fff_create_instance_with_value(struct FffCreateOptions opts); +/** + * Create a new file finder instance (v3, with submodule support toggle). + * + * Identical to [`fff_create_instance2`] except for the trailing + * `support_submodules` flag. When `true` (recommended default), submodule + * directories are walked and reported in git status. When `false`, submodule + * paths are skipped during traversal and excluded from git status. + * + * ## Safety + * String parameters must be valid null-terminated UTF-8 or NULL. + */ +struct FffResult *fff_create_instance3(const char *base_path, + const char *frecency_db_path, + const char *history_db_path, + bool _use_unsafe_no_lock, + bool enable_mmap_cache, + bool enable_content_indexing, + bool watch, + bool ai_mode, + const char *log_file_path, + const char *log_level, + uint64_t cache_budget_max_files, + uint64_t cache_budget_max_bytes, + uint64_t cache_budget_max_file_size, + bool support_submodules); + /** * Destroy a file finder instance and free all its resources. * diff --git a/crates/fff-c/src/ffi_types.rs b/crates/fff-c/src/ffi_types.rs index d2b0e972..c9070876 100644 --- a/crates/fff-c/src/ffi_types.rs +++ b/crates/fff-c/src/ffi_types.rs @@ -15,7 +15,7 @@ use fff::{ }; /// Current used version of [`FffCreateOptions`]. -pub const FFF_CREATE_OPTIONS_VERSION: u32 = 1; +pub const FFF_CREATE_OPTIONS_VERSION: u32 = 2; /// Options for `fff_create_instance_with`. /// @@ -57,7 +57,11 @@ pub struct FffCreateOptions { /// Allow indexing the user's home directory. Same trade-off as /// `enable_fs_root_scanning`. pub enable_home_dir_scanning: bool, - // ----- new version 2+ fields go here, ALWAYS appended ----- + // ----- v2 fields ----- + /// Track files inside git submodules. When true, submodule contents are + /// indexed and their git status reflected in results. + pub support_submodules: bool, + // ----- new version 3+ fields go here, ALWAYS appended ----- } impl FffCreateOptions { @@ -79,6 +83,7 @@ impl FffCreateOptions { cache_budget_max_file_size: 0, enable_fs_root_scanning: false, enable_home_dir_scanning: false, + support_submodules: true, } } } @@ -804,6 +809,7 @@ mod options_layout_tests { fn fff_create_options_layout_is_stable_64bit() { assert_eq!(size_of::(), 88); assert_eq!(align_of::(), 8); + assert_eq!(offset_of!(FffCreateOptions, support_submodules), 82); assert_eq!(offset_of!(FffCreateOptions, version), 0); assert_eq!(offset_of!(FffCreateOptions, base_path), 8); diff --git a/crates/fff-c/src/lib.rs b/crates/fff-c/src/lib.rs index 0e0d0c91..4a4a201c 100644 --- a/crates/fff-c/src/lib.rs +++ b/crates/fff-c/src/lib.rs @@ -295,6 +295,7 @@ pub unsafe extern "C" fn fff_create_instance_with(opts: *const FffCreateOptions) follow_symlinks: false, enable_fs_root_scanning: opts.enable_fs_root_scanning, enable_home_dir_scanning: opts.enable_home_dir_scanning, + support_submodules: opts.support_submodules, }, ) { return FffResult::err(&format!("Failed to init file picker: {}", e)); @@ -310,6 +311,50 @@ pub unsafe extern "C" fn fff_create_instance_with(opts: *const FffCreateOptions) FffResult::ok_handle(fff_handle) } +/// Create a new file finder instance (v3, with submodule support toggle). +/// +/// Identical to [`fff_create_instance2`] except for the trailing +/// `support_submodules` flag. When `true` (recommended default), submodule +/// directories are walked and reported in git status. When `false`, submodule +/// paths are skipped during traversal and excluded from git status. +/// +/// ## Safety +/// String parameters must be valid null-terminated UTF-8 or NULL. +#[unsafe(no_mangle)] +#[allow(clippy::too_many_arguments)] +pub unsafe extern "C" fn fff_create_instance3( + base_path: *const c_char, + frecency_db_path: *const c_char, + history_db_path: *const c_char, + _use_unsafe_no_lock: bool, + enable_mmap_cache: bool, + enable_content_indexing: bool, + watch: bool, + ai_mode: bool, + log_file_path: *const c_char, + log_level: *const c_char, + cache_budget_max_files: u64, + cache_budget_max_bytes: u64, + cache_budget_max_file_size: u64, + support_submodules: bool, +) -> *mut FffResult { + let mut opts = FffCreateOptions::defaults(); + opts.base_path = base_path; + opts.frecency_db_path = frecency_db_path; + opts.history_db_path = history_db_path; + opts.enable_mmap_cache = enable_mmap_cache; + opts.enable_content_indexing = enable_content_indexing; + opts.watch = watch; + opts.ai_mode = ai_mode; + opts.log_file_path = log_file_path; + opts.log_level = log_level; + opts.cache_budget_max_files = cache_budget_max_files; + opts.cache_budget_max_bytes = cache_budget_max_bytes; + opts.cache_budget_max_file_size = cache_budget_max_file_size; + opts.support_submodules = support_submodules; + unsafe { fff_create_instance_with(&opts as *const FffCreateOptions) } +} + /// Calling-convention adapter for [`fff_create_instance_with`]. /// /// Same logic, but takes the [`FffCreateOptions`] struct **by value**. This @@ -1018,7 +1063,7 @@ pub unsafe extern "C" fn fff_restart_index( Err(e) => return FffResult::err(&format!("Failed to acquire file picker lock: {}", e)), }; - let (warmup_caches, content_indexing, watch, mode, fs_root, home_dir) = + let (warmup_caches, content_indexing, watch, mode, fs_root, home_dir, support_submodules) = if let Some(ref picker) = *guard { ( picker.has_mmap_cache(), @@ -1027,9 +1072,10 @@ pub unsafe extern "C" fn fff_restart_index( picker.mode(), picker.fs_root_scanning_enabled(), picker.home_dir_scanning_enabled(), + picker.has_submodule_support(), ) } else { - (false, true, true, FFFMode::default(), false, false) + (false, true, true, FFFMode::default(), false, false, true) }; drop(guard); @@ -1047,6 +1093,7 @@ pub unsafe extern "C" fn fff_restart_index( follow_symlinks: false, enable_fs_root_scanning: fs_root, enable_home_dir_scanning: home_dir, + support_submodules, }, ) { Ok(()) => FffResult::ok_empty(), diff --git a/crates/fff-core/src/background_watcher.rs b/crates/fff-core/src/background_watcher.rs index 687bb17c..8bb68392 100644 --- a/crates/fff-core/src/background_watcher.rs +++ b/crates/fff-core/src/background_watcher.rs @@ -44,6 +44,7 @@ impl BackgroundWatcher { mode: FFFMode, enable_fs_root_scanning: bool, enable_home_dir_scanning: bool, + support_submodules: bool, trace_span: tracing::Span, ) -> Result { info!( @@ -94,6 +95,7 @@ impl BackgroundWatcher { mode, use_recursive, watch_tx_for_debouncer, + support_submodules, )?; info!("Background file watcher initialized successfully"); @@ -148,6 +150,7 @@ impl BackgroundWatcher { &strong_picker, &owner_frecency, &owner_git_workdir, + support_submodules, ); // Transient strong ref drops here, back @@ -165,6 +168,7 @@ impl BackgroundWatcher { }) } + #[allow(clippy::too_many_arguments)] fn create_debouncer( base_path: PathBuf, git_workdir: Option, @@ -173,6 +177,7 @@ impl BackgroundWatcher { mode: FFFMode, use_recursive: bool, watch_tx: mpsc::Sender, + support_submodules: bool, ) -> Result { let config = Config::default() // do not follow symlinks as then notifiers spawns a bunch of events for symlinked @@ -215,6 +220,7 @@ impl BackgroundWatcher { &strong_picker, &shared_frecency, mode, + support_submodules, ); // every new directory creates had to be reflected in the picker state @@ -402,6 +408,7 @@ fn handle_debounced_events( shared_picker: &SharedFilePicker, shared_frecency: &SharedFrecency, mode: FFFMode, + support_submodules: bool, ) -> Vec { // this will be called very often, we have to minimiy the lock time for file picker let repo = git_workdir.as_ref().and_then(|p| Repository::open(p).ok()); @@ -677,6 +684,7 @@ fn handle_debounced_events( let status = match GitStatusCache::git_status_for_paths( &repo, &files_to_update_git_status, + support_submodules, ) { Ok(s) => s, Err(e) => { @@ -708,6 +716,7 @@ fn track_files_from_new_directories( shared_picker: &SharedFilePicker, shared_frecency: &SharedFrecency, git_workdir: &Option, + support_submodules: bool, ) { let Ok(entries) = std::fs::read_dir(dir) else { return; @@ -744,13 +753,14 @@ fn track_files_from_new_directories( } if let Some(repo) = repo.as_ref() { - let status = match GitStatusCache::git_status_for_paths(repo, &files_to_add) { - Ok(status) => status, - Err(e) => { - tracing::error!(?e, "inject_existing_files: git status query failed"); - return; - } - }; + let status = + match GitStatusCache::git_status_for_paths(repo, &files_to_add, support_submodules) { + Ok(status) => status, + Err(e) => { + tracing::error!(?e, "inject_existing_files: git status query failed"); + return; + } + }; if let Ok(mut guard) = shared_picker.write() && let Some(ref mut picker) = *guard diff --git a/crates/fff-core/src/file_picker.rs b/crates/fff-core/src/file_picker.rs index b08fbf14..29048997 100644 --- a/crates/fff-core/src/file_picker.rs +++ b/crates/fff-core/src/file_picker.rs @@ -413,6 +413,10 @@ pub struct FilePickerOptions { /// Allow indexing the user's home directory. Off by default for the same /// reason as `enable_fs_root_scanning`. pub enable_home_dir_scanning: bool, + /// When `true` (default), submodule directories are walked and their + /// statuses are reported by libgit2. When `false`, submodule paths are + /// skipped during traversal and excluded from git status. + pub support_submodules: bool, } impl Default for FilePickerOptions { @@ -427,6 +431,7 @@ impl Default for FilePickerOptions { follow_symlinks: false, enable_fs_root_scanning: false, enable_home_dir_scanning: false, + support_submodules: true, } } } @@ -446,6 +451,7 @@ pub struct FilePicker { follow_symlinks: bool, enable_fs_root_scanning: bool, enable_home_dir_scanning: bool, + support_submodules: bool, trace_span: tracing::Span, trace_id: String, } @@ -513,6 +519,10 @@ impl FilePicker { self.enable_home_dir_scanning } + pub fn has_submodule_support(&self) -> bool { + self.support_submodules + } + pub fn trace_id(&self) -> &str { &self.trace_id } @@ -726,6 +736,7 @@ impl FilePicker { follow_symlinks: options.follow_symlinks, enable_fs_root_scanning: options.enable_fs_root_scanning, enable_home_dir_scanning: options.enable_home_dir_scanning, + support_submodules: options.support_submodules, trace_span, trace_id, }) @@ -751,6 +762,7 @@ impl FilePicker { let warmup = picker.enable_mmap_cache; let content_indexing = picker.enable_content_indexing; let watch = picker.watch; + let support_submodules = picker.support_submodules; let mode = picker.mode; let follow_symlinks = picker.follow_symlinks; let enable_fs_root_scanning = picker.enable_fs_root_scanning; @@ -794,6 +806,7 @@ impl FilePicker { follow_symlinks, enable_fs_root_scanning, enable_home_dir_scanning, + support_submodules, }, ) .spawn(); @@ -814,7 +827,10 @@ impl FilePicker { self.scanned_files_count.store(0, Ordering::Relaxed); let git_workdir = FileSync::discover_git_workdir(&self.base_path); - let git_handle = git_workdir.clone().map(FileSync::spawn_git_status); + let support_submodules = self.support_submodules; + let git_handle = git_workdir + .clone() + .map(|wd| FileSync::spawn_git_status(wd, support_submodules)); let empty_frecency = SharedFrecency::default(); let sync = FileSync::walk_filesystem( @@ -824,6 +840,7 @@ impl FilePicker { &empty_frecency, self.mode, self.follow_symlinks, + self.support_submodules, )?; self.sync_data = sync; @@ -873,6 +890,7 @@ impl FilePicker { self.mode, self.enable_fs_root_scanning, self.enable_home_dir_scanning, + self.support_submodules, self.trace_span.clone(), )?; self.background_watcher = Some(watcher); @@ -1766,11 +1784,14 @@ impl FileSync { git_workdir } - pub(crate) fn spawn_git_status(git_workdir: PathBuf) -> JoinHandle> { + pub(crate) fn spawn_git_status( + git_workdir: PathBuf, + support_submodules: bool, + ) -> JoinHandle> { std::thread::spawn(move || { GitStatusCache::read_git_status( Some(git_workdir.as_path()), - &mut crate::git::initial_scan_status_options(), + &mut crate::git::initial_scan_status_options(support_submodules), ) }) } @@ -1786,6 +1807,7 @@ impl FileSync { shared_frecency: &SharedFrecency, mode: FFFMode, follow_symlinks: bool, + support_submodules: bool, ) -> Result { use ignore::WalkBuilder; @@ -1811,6 +1833,20 @@ impl FileSync { walk_builder.overrides(overrides); } + // When submodule support is disabled, collect submodule absolute paths + // up-front and prune them from the walk via filter_entry. We resolve + // each submodule path relative to its containing repository workdir + // (which may itself be a submodule) so nested setups work. + if !support_submodules && let Some(workdir) = git_workdir.as_ref() { + let submodule_paths = collect_submodule_paths(workdir); + if !submodule_paths.is_empty() { + debug!(count = submodule_paths.len(), "Skipping submodule paths"); + walk_builder.filter_entry(move |entry| { + !submodule_paths.iter().any(|p| entry.path() == p.as_path()) + }); + } + } + let walker = walk_builder.build_parallel(); let walker_start = std::time::Instant::now(); debug!("SCAN: Starting file walker"); @@ -1991,6 +2027,34 @@ pub(crate) fn warmup_mmaps( } } +/// Resolve every submodule registered under `workdir` to its absolute on-disk +/// path. Includes nested submodules by descending into each submodule's own +/// repository when it is initialized. Returns normalized paths suitable for +/// equality comparison against `WalkBuilder` entries. +fn collect_submodule_paths(workdir: &Path) -> Vec { + let Ok(repo) = Repository::open(workdir) else { + return Vec::new(); + }; + + let mut out = Vec::new(); + let mut stack: Vec<(Repository, PathBuf)> = vec![(repo, workdir.to_path_buf())]; + while let Some((repo, root)) = stack.pop() { + let Ok(submodules) = repo.submodules() else { + continue; + }; + for sm in submodules { + let abs = root.join(sm.path()); + let abs = crate::path_utils::normalize(abs); + // Recurse into initialized submodules so nested ones are found too. + if let Ok(sub_repo) = sm.open() { + stack.push((sub_repo, abs.clone())); + } + out.push(abs); + } + } + out +} + /// This does both thing (yes sorry all the OOP morons) /// in one go: populates files chunked storage and creates new directories fn populates_dirs_files_chunked_storage<'a>( diff --git a/crates/fff-core/src/git.rs b/crates/fff-core/src/git.rs index 85bce5e3..86a6a524 100644 --- a/crates/fff-core/src/git.rs +++ b/crates/fff-core/src/git.rs @@ -6,12 +6,12 @@ use std::{ path::{Path, PathBuf}, }; -pub(crate) fn default_status_options() -> StatusOptions { +pub(crate) fn default_status_options(support_submodules: bool) -> StatusOptions { let mut opts = StatusOptions::new(); opts.include_untracked(true) .recurse_untracked_dirs(true) .include_unmodified(true) - .exclude_submodules(true); + .exclude_submodules(!support_submodules); opts } @@ -21,11 +21,11 @@ pub(crate) fn default_status_options() -> StatusOptions { /// `git_status: None` (== clean), so a missing cache entry already means /// "clean" — no need to ask libgit2 to enumerate every tracked path. /// Saves seconds on huge dirty trees (e.g. chromium with 400k+ entries). -pub(crate) fn initial_scan_status_options() -> StatusOptions { +pub(crate) fn initial_scan_status_options(support_submodules: bool) -> StatusOptions { let mut opts = StatusOptions::new(); opts.include_untracked(true) .recurse_untracked_dirs(true) - .exclude_submodules(true); + .exclude_submodules(!support_submodules); opts } @@ -96,6 +96,7 @@ impl GitStatusCache { pub fn git_status_for_paths + Debug>( repo: &Repository, paths: &[TPath], + support_submodules: bool, ) -> Result { if paths.is_empty() { return Ok(Self(AHashMap::new())); @@ -118,7 +119,7 @@ impl GitStatusCache { return Ok(Self(map)); } - let mut status_options = default_status_options(); + let mut status_options = default_status_options(support_submodules); for path in paths { status_options.pathspec(path.as_ref().strip_prefix(&workdir)?); } @@ -234,7 +235,7 @@ mod tests { let repo = Repository::open(&base).unwrap(); let paths: Vec = names.iter().map(|n| base.join(n)).collect(); - let cache = GitStatusCache::git_status_for_paths(&repo, &paths).unwrap(); + let cache = GitStatusCache::git_status_for_paths(&repo, &paths, true).unwrap(); for (n, abs) in names.iter().zip(paths.iter()) { let status = cache.lookup_status(abs); diff --git a/crates/fff-core/src/scan.rs b/crates/fff-core/src/scan.rs index a762a95f..06962ed4 100644 --- a/crates/fff-core/src/scan.rs +++ b/crates/fff-core/src/scan.rs @@ -33,7 +33,7 @@ pub(crate) struct ScanSignals { } /// Which optional phases a scan should run. -#[derive(Clone, Copy, Default, Debug)] +#[derive(Clone, Copy, Debug)] pub(crate) struct ScanConfig { pub(crate) warmup: bool, pub(crate) content_indexing: bool, @@ -43,6 +43,23 @@ pub(crate) struct ScanConfig { pub(crate) follow_symlinks: bool, pub(crate) enable_fs_root_scanning: bool, pub(crate) enable_home_dir_scanning: bool, + pub(crate) support_submodules: bool, +} + +impl Default for ScanConfig { + fn default() -> Self { + Self { + warmup: false, + content_indexing: false, + watch: false, + auto_cache_budget: false, + install_watcher: false, + follow_symlinks: false, + enable_fs_root_scanning: false, + enable_home_dir_scanning: false, + support_submodules: true, + } + } } /// A fully-configured scan job ready to run on a background thread. @@ -96,6 +113,7 @@ impl ScanJob { follow_symlinks: picker.follows_symlinks(), enable_fs_root_scanning: picker.fs_root_scanning_enabled(), enable_home_dir_scanning: picker.home_dir_scanning_enabled(), + support_submodules: picker.has_submodule_support(), }; drop(guard); // just a sanity check @@ -168,7 +186,9 @@ impl ScanJob { // 1. Start git discovery and walk filesystem off-lock. let git_workdir = FileSync::discover_git_workdir(&base_path); - let status_handle = git_workdir.clone().map(FileSync::spawn_git_status); + let status_handle = git_workdir + .clone() + .map(|wd| FileSync::spawn_git_status(wd, config.support_submodules)); let sync = match FileSync::walk_filesystem( &base_path, git_workdir.clone(), @@ -176,6 +196,7 @@ impl ScanJob { &shared_frecency, mode, config.follow_symlinks, + config.support_submodules, ) { Ok(sync) => sync, Err(e) => { @@ -261,6 +282,7 @@ impl ScanJob { mode, config.enable_fs_root_scanning, config.enable_home_dir_scanning, + config.support_submodules, tracing::Span::current(), ) { Ok(watcher) => { diff --git a/crates/fff-core/src/shared.rs b/crates/fff-core/src/shared.rs index 0b1a9490..6b11c3d9 100644 --- a/crates/fff-core/src/shared.rs +++ b/crates/fff-core/src/shared.rs @@ -222,6 +222,7 @@ impl SharedFilePicker { }; let git_root = picker.git_root().map(|p| p.to_path_buf()); + let support_submodules = picker.has_submodule_support(); drop(guard); // updating git status could take very long time, there is not risky as we // do not allow any mutations and deletions of files from the sync @@ -233,7 +234,7 @@ impl SharedFilePicker { GitStatusCache::read_git_status( git_root.as_deref(), - &mut crate::git::default_status_options(), + &mut crate::git::default_status_options(support_submodules), ) }; diff --git a/crates/fff-core/tests/sparse_checkout_test.rs b/crates/fff-core/tests/sparse_checkout_test.rs new file mode 100644 index 00000000..f47e8a1f --- /dev/null +++ b/crates/fff-core/tests/sparse_checkout_test.rs @@ -0,0 +1,146 @@ +//! Regression test: indexing a sparse-checked-out git repository must not +//! crash and must only surface paths that are actually materialized on +//! disk. libgit2 happily reports statuses for paths that were excluded by +//! `core.sparseCheckout`; `update_git_statuses` already silently skips +//! those (see `file_picker.rs::1346`), this test pins that contract. + +use std::fs; +use std::path::Path; +use std::process::Command; + +use fff_search::file_picker::FilePicker; +use fff_search::{FilePickerOptions, FuzzySearchOptions, PaginationArgs, QueryParser}; +use tempfile::TempDir; + +fn git(dir: &Path, args: &[&str]) { + let out = Command::new("git") + .args(args) + .current_dir(dir) + .env("GIT_AUTHOR_NAME", "t") + .env("GIT_AUTHOR_EMAIL", "t@t") + .env("GIT_COMMITTER_NAME", "t") + .env("GIT_COMMITTER_EMAIL", "t@t") + .env("GIT_CONFIG_GLOBAL", "/dev/null") + .env("GIT_CONFIG_SYSTEM", "/dev/null") + .output() + .expect("git binary must be installed to run this test"); + assert!( + out.status.success(), + "git {args:?} failed:\nstdout={}\nstderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); +} + +fn fuzzy_search_paths(picker: &FilePicker, query: &str) -> Vec { + let parser = QueryParser::default(); + let parsed = parser.parse(query); + let result = picker.fuzzy_search( + &parsed, + None, + FuzzySearchOptions { + max_threads: 1, + pagination: PaginationArgs { + offset: 0, + limit: 200, + }, + ..Default::default() + }, + ); + result + .items + .iter() + .map(|f| f.relative_path(picker)) + .collect() +} + +/// End-to-end: build a "remote" repo with files in two top-level dirs, +/// clone it with a cone-mode sparse-checkout that materializes only one +/// dir, then index the worktree. The picker must: +/// * see the materialized files, +/// * NOT see the sparse-excluded files, +/// * not panic when libgit2 hands it statuses for non-materialized paths. +#[test] +fn sparse_checkout_indexes_only_materialized_files() { + let upstream_tmp = TempDir::new().unwrap(); + let upstream = upstream_tmp.path(); + + // Create a small repo with two top-level dirs. + git(upstream, &["init", "-b", "main"]); + fs::create_dir_all(upstream.join("included/sub")).unwrap(); + fs::create_dir_all(upstream.join("excluded/deep")).unwrap(); + fs::write(upstream.join("README.md"), "root readme\n").unwrap(); + fs::write(upstream.join("included/in_a.rs"), "// a\n").unwrap(); + fs::write(upstream.join("included/sub/in_b.rs"), "// b\n").unwrap(); + fs::write(upstream.join("excluded/ex_a.rs"), "// ea\n").unwrap(); + fs::write(upstream.join("excluded/deep/ex_b.rs"), "// eb\n").unwrap(); + git(upstream, &["add", "-A"]); + git(upstream, &["commit", "-m", "seed", "--no-gpg-sign"]); + + // Clone with cone-mode sparse-checkout that only materializes /included. + let work_tmp = TempDir::new().unwrap(); + let work = work_tmp.path(); + git( + work, + &[ + "clone", + "--no-local", + "--filter=blob:none", + "--no-checkout", + upstream.to_str().unwrap(), + ".", + ], + ); + git(work, &["sparse-checkout", "init", "--cone"]); + git(work, &["sparse-checkout", "set", "included"]); + git(work, &["checkout", "main"]); + + // Sanity: only the included dir should exist on disk (cone-mode keeps + // top-level files like README.md too). + assert!(work.join("included/in_a.rs").is_file()); + assert!(work.join("included/sub/in_b.rs").is_file()); + assert!(!work.join("excluded").exists()); + + let mut picker = FilePicker::new(FilePickerOptions { + base_path: work.to_string_lossy().to_string(), + enable_mmap_cache: false, + enable_content_indexing: false, + watch: false, + ..Default::default() + }) + .expect("failed to create FilePicker"); + picker + .collect_files() + .expect("collect_files must not panic on a sparse-checked repo"); + + let indexed: Vec = picker + .get_files() + .iter() + .map(|f| f.relative_path(&picker)) + .collect(); + + assert!( + indexed.iter().any(|p| p.ends_with("in_a.rs")), + "expected to find included/in_a.rs in index, got {:?}", + indexed + ); + assert!( + indexed.iter().any(|p| p.ends_with("in_b.rs")), + "expected to find included/sub/in_b.rs in index, got {:?}", + indexed + ); + assert!( + indexed + .iter() + .all(|p| !p.contains("ex_a.rs") && !p.contains("ex_b.rs")), + "sparse-excluded files leaked into the index: {:?}", + indexed + ); + + let hits = fuzzy_search_paths(&picker, "in_b"); + assert!( + hits.iter().any(|p| p.ends_with("in_b.rs")), + "fuzzy search must surface materialized files, got {:?}", + hits + ); +} diff --git a/crates/fff-mcp/src/main.rs b/crates/fff-mcp/src/main.rs index 497c5ee9..d142d2fc 100644 --- a/crates/fff-mcp/src/main.rs +++ b/crates/fff-mcp/src/main.rs @@ -142,6 +142,11 @@ pub(crate) struct Args { #[arg(long = "no-watch")] no_watch: bool, + /// Skip git submodule directories during traversal and exclude them + /// from git status. Default: submodules are walked and reported. + #[arg(long = "no-submodules")] + no_submodules: bool, + /// Maximum number of files whose content is kept persistently in memory. /// Files beyond this limit are still searchable via temporary mmaps that /// are released after each grep. Defaults to 30 000. @@ -263,6 +268,7 @@ async fn main() -> Result<(), Box> { .max_cached_files .map(fff::ContentCacheBudget::new_for_repo), follow_symlinks: false, + support_submodules: !args.no_submodules, ..Default::default() }, ) diff --git a/crates/fff-nvim/src/lib.rs b/crates/fff-nvim/src/lib.rs index a9f3cb2d..534bbe35 100644 --- a/crates/fff-nvim/src/lib.rs +++ b/crates/fff-nvim/src/lib.rs @@ -62,12 +62,24 @@ pub fn destroy_query_db(_: &Lua, _: ()) -> LuaResult { /// Opts table accepted by `init_file_picker` / `restart_index_in_path`. /// Backwards compat: positional `follow_symlinks: bool` argument still works /// — callers that pass a table use the named fields instead. -#[derive(Default)] struct PickerInitOpts { follow_symlinks: bool, enable_fs_root_scanning: bool, enable_home_dir_scanning: bool, enable_filename_constraint: bool, + support_submodules: bool, +} + +impl Default for PickerInitOpts { + fn default() -> Self { + Self { + follow_symlinks: false, + enable_fs_root_scanning: false, + enable_home_dir_scanning: false, + enable_filename_constraint: false, + support_submodules: true, + } + } } impl PickerInitOpts { @@ -92,6 +104,9 @@ impl PickerInitOpts { enable_filename_constraint: t .get::>("enable_filename_constraint")? .unwrap_or(false), + support_submodules: t + .get::>("support_submodules")? + .unwrap_or(true), }), other => Err(LuaError::RuntimeError(format!( "init opts must be a table, boolean, or nil — got {}", @@ -128,6 +143,7 @@ pub fn init_file_picker( follow_symlinks: opts.follow_symlinks, enable_fs_root_scanning: opts.enable_fs_root_scanning, enable_home_dir_scanning: opts.enable_home_dir_scanning, + support_submodules: opts.support_submodules, ..Default::default() }, ) @@ -175,7 +191,7 @@ pub fn restart_index_in_path( // Inherit current picker's scanning flags when caller didn't pass // explicit opts — otherwise a `:cd ~` after init would silently lose // the user's `enable_home_dir_scanning = true` setting. - let (follow_symlinks, fs_root, home_dir) = { + let (follow_symlinks, fs_root, home_dir, support_submodules) = { let guard = match FILE_PICKER.read() { Ok(g) => g, Err(_) => return, @@ -193,11 +209,13 @@ pub fn restart_index_in_path( p.follows_symlinks() || opts.follow_symlinks, p.fs_root_scanning_enabled() || opts.enable_fs_root_scanning, p.home_dir_scanning_enabled() || opts.enable_home_dir_scanning, + p.has_submodule_support() && opts.support_submodules, ), None => ( opts.follow_symlinks, opts.enable_fs_root_scanning, opts.enable_home_dir_scanning, + opts.support_submodules, ), } }; @@ -220,6 +238,7 @@ pub fn restart_index_in_path( follow_symlinks, enable_fs_root_scanning: fs_root, enable_home_dir_scanning: home_dir, + support_submodules, ..Default::default() }, ) { diff --git a/crates/fff-python/src/finder.rs b/crates/fff-python/src/finder.rs index e61232fc..d3c77fe1 100644 --- a/crates/fff-python/src/finder.rs +++ b/crates/fff-python/src/finder.rs @@ -246,6 +246,7 @@ impl FileFinder { follow_symlinks: false, enable_fs_root_scanning, enable_home_dir_scanning, + support_submodules: true, }, ) .map_err(py_err) @@ -775,6 +776,7 @@ impl FileFinder { follow_symlinks: false, enable_fs_root_scanning: fs_root, enable_home_dir_scanning: home_dir, + support_submodules: true, }, ) .map_err(py_err) diff --git a/lua/fff/conf.lua b/lua/fff/conf.lua index dcb53e01..b5e5bca9 100644 --- a/lua/fff/conf.lua +++ b/lua/fff/conf.lua @@ -380,6 +380,10 @@ local function init() -- Git integration git = { status_text_color = false, -- Apply git status colors to filename text (default: false, only sign column) + -- When true (default), git submodules are walked during indexing and their + -- statuses are reported by libgit2. When false, submodule directories are + -- skipped from traversal and excluded from the git status cache. + support_submodules = true, }, debug = { enabled = false, -- Show file info panel in preview diff --git a/lua/fff/core.lua b/lua/fff/core.lua index 1b3f3a16..e10dfed7 100644 --- a/lua/fff/core.lua +++ b/lua/fff/core.lua @@ -116,6 +116,7 @@ M.change_indexing_directory = function(new_path) enable_fs_root_scanning = config.enable_fs_root_scanning, enable_home_dir_scanning = config.enable_home_dir_scanning, enable_filename_constraint = config.grep and config.grep.enable_filename_constraint, + support_submodules = config.git and config.git.support_submodules, }) if not ok then vim.notify('Failed to change directory: ' .. err, vim.log.levels.ERROR) @@ -152,6 +153,7 @@ M.ensure_initialized = function() enable_fs_root_scanning = config.enable_fs_root_scanning, enable_home_dir_scanning = config.enable_home_dir_scanning, enable_filename_constraint = config.grep and config.grep.enable_filename_constraint, + support_submodules = config.git and config.git.support_submodules, }) if not ok then vim.notify('Failed to initialize file picker: ' .. tostring(result), vim.log.levels.ERROR) From eeb4aec5345274d9dce233304345d516406c7dd8 Mon Sep 17 00:00:00 2001 From: Dmitriy Kovalenko Date: Fri, 15 May 2026 05:56:37 +0000 Subject: [PATCH 2/3] chore: Update docs for - feat: configurable submodule support + sparse-checkout test --- doc/fff.nvim.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/fff.nvim.txt b/doc/fff.nvim.txt index 025e2956..96e16189 100644 --- a/doc/fff.nvim.txt +++ b/doc/fff.nvim.txt @@ -245,6 +245,7 @@ Defaults are sensible. Override only what you care about. }, git = { status_text_color = false, -- true to color filenames by git status + support_submodules = true, -- walk into submodules and report their git status; set false to skip them }, select = { -- Return winid to open the chosen file in, or nil to open in the original window From 043ad5c25f72ea95eb0226b62be9a2ea5e9b4004 Mon Sep 17 00:00:00 2001 From: Dmitriy Kovalenko Date: Thu, 14 May 2026 23:04:03 -0700 Subject: [PATCH 3/3] test: strengthen sparse-checkout coverage with log assertions The previous version of the test only asserted that the indexed file list excluded the sparse files. It did not actually probe the branch that handles libgit2 returning statuses for paths that are in the index but not on disk. This: - captures fff tracing into a per-test buffer, - asserts libgit2 actually returns the sparse-excluded paths (so the test would regress to a no-op if libgit2's behavior changed), - exercises both the sync `collect_files` and the async `new_with_shared_state` + `refresh_git_status` pipelines, - requires the documented `Git status for path not in index, skipping` debug message to be emitted at least once, - fails on any unexpected ERROR/WARN line from fff core. --- crates/fff-core/tests/sparse_checkout_test.rs | 161 ++++++++++++++++-- 1 file changed, 151 insertions(+), 10 deletions(-) diff --git a/crates/fff-core/tests/sparse_checkout_test.rs b/crates/fff-core/tests/sparse_checkout_test.rs index f47e8a1f..635b27e9 100644 --- a/crates/fff-core/tests/sparse_checkout_test.rs +++ b/crates/fff-core/tests/sparse_checkout_test.rs @@ -5,12 +5,20 @@ //! those (see `file_picker.rs::1346`), this test pins that contract. use std::fs; +use std::io::Write; use std::path::Path; use std::process::Command; +use std::sync::{Arc, Mutex}; +use std::time::Duration; use fff_search::file_picker::FilePicker; -use fff_search::{FilePickerOptions, FuzzySearchOptions, PaginationArgs, QueryParser}; +use fff_search::{ + FilePickerOptions, FuzzySearchOptions, PaginationArgs, QueryParser, SharedFilePicker, + SharedFrecency, +}; +use git2::Repository; use tempfile::TempDir; +use tracing_subscriber::fmt::MakeWriter; fn git(dir: &Path, args: &[&str]) { let out = Command::new("git") @@ -32,6 +40,35 @@ fn git(dir: &Path, args: &[&str]) { ); } +#[derive(Clone, Default)] +struct CapturedLogs(Arc>>); + +impl CapturedLogs { + fn lines(&self) -> String { + let buf = self.0.lock().unwrap(); + String::from_utf8_lossy(&buf).to_string() + } +} + +impl<'a> MakeWriter<'a> for CapturedLogs { + type Writer = CapturedLogsWriter; + fn make_writer(&'a self) -> Self::Writer { + CapturedLogsWriter(Arc::clone(&self.0)) + } +} + +struct CapturedLogsWriter(Arc>>); + +impl Write for CapturedLogsWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.0.lock().unwrap().extend_from_slice(buf); + Ok(buf.len()) + } + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + fn fuzzy_search_paths(picker: &FilePicker, query: &str) -> Vec { let parser = QueryParser::default(); let parsed = parser.parse(query); @@ -56,16 +93,28 @@ fn fuzzy_search_paths(picker: &FilePicker, query: &str) -> Vec { /// End-to-end: build a "remote" repo with files in two top-level dirs, /// clone it with a cone-mode sparse-checkout that materializes only one -/// dir, then index the worktree. The picker must: -/// * see the materialized files, -/// * NOT see the sparse-excluded files, -/// * not panic when libgit2 hands it statuses for non-materialized paths. +/// dir, then index the worktree both via the synchronous `collect_files` +/// path and the async `new_with_shared_state` path. The picker must: +/// * see only the materialized files, +/// * NOT panic when libgit2 hands it statuses for non-materialized paths, +/// * surface no `ERROR`/`WARN` log lines from fff core, +/// * yield correct fuzzy-search results, +/// * skip non-materialized paths via the documented debug message in +/// `update_git_statuses`. #[test] fn sparse_checkout_indexes_only_materialized_files() { + let logs = CapturedLogs::default(); + let _guard = tracing::subscriber::set_default( + tracing_subscriber::fmt() + .with_max_level(tracing::Level::DEBUG) + .with_ansi(false) + .with_writer(logs.clone()) + .finish(), + ); + let upstream_tmp = TempDir::new().unwrap(); let upstream = upstream_tmp.path(); - // Create a small repo with two top-level dirs. git(upstream, &["init", "-b", "main"]); fs::create_dir_all(upstream.join("included/sub")).unwrap(); fs::create_dir_all(upstream.join("excluded/deep")).unwrap(); @@ -77,7 +126,6 @@ fn sparse_checkout_indexes_only_materialized_files() { git(upstream, &["add", "-A"]); git(upstream, &["commit", "-m", "seed", "--no-gpg-sign"]); - // Clone with cone-mode sparse-checkout that only materializes /included. let work_tmp = TempDir::new().unwrap(); let work = work_tmp.path(); git( @@ -85,7 +133,6 @@ fn sparse_checkout_indexes_only_materialized_files() { &[ "clone", "--no-local", - "--filter=blob:none", "--no-checkout", upstream.to_str().unwrap(), ".", @@ -95,12 +142,36 @@ fn sparse_checkout_indexes_only_materialized_files() { git(work, &["sparse-checkout", "set", "included"]); git(work, &["checkout", "main"]); - // Sanity: only the included dir should exist on disk (cone-mode keeps - // top-level files like README.md too). + // Sanity: only the included dir + top-level files materialized. + assert!(work.join("README.md").is_file()); assert!(work.join("included/in_a.rs").is_file()); assert!(work.join("included/sub/in_b.rs").is_file()); assert!(!work.join("excluded").exists()); + // Sanity: libgit2's status output DOES contain the sparse-excluded + // paths in the index — proving the "path not in index" branch in + // `update_git_statuses` actually has work to do. + { + let repo = Repository::open(work).unwrap(); + let mut opts = git2::StatusOptions::new(); + opts.include_untracked(true) + .include_unmodified(true) + .recurse_untracked_dirs(true) + .exclude_submodules(false); + let statuses = repo.statuses(Some(&mut opts)).unwrap(); + let entries: Vec = statuses + .iter() + .filter_map(|e| e.path().map(|p| p.to_owned())) + .collect(); + assert!( + entries.iter().any(|p| p == "excluded/ex_a.rs"), + "libgit2 must surface sparse-excluded paths so we can prove the \ + picker handles them; got {:?}", + entries + ); + } + + // ---- Path 1: synchronous `collect_files`. ----------------------- let mut picker = FilePicker::new(FilePickerOptions { base_path: work.to_string_lossy().to_string(), enable_mmap_cache: false, @@ -143,4 +214,74 @@ fn sparse_checkout_indexes_only_materialized_files() { "fuzzy search must surface materialized files, got {:?}", hits ); + + drop(picker); + + // ---- Path 2: async `new_with_shared_state` + refresh_git_status. --- + // This goes through the orchestrated scan + git-apply pipeline + // (`scan.rs::apply_git_status_and_frecency` followed by an explicit + // `refresh_git_status` that hits the `update_git_statuses` branch + // which is the one that has to silently drop sparse-excluded paths). + let shared_picker = SharedFilePicker::default(); + let shared_frecency = SharedFrecency::default(); + FilePicker::new_with_shared_state( + shared_picker.clone(), + shared_frecency.clone(), + FilePickerOptions { + base_path: work.to_string_lossy().to_string(), + enable_mmap_cache: false, + enable_content_indexing: false, + watch: false, + ..Default::default() + }, + ) + .expect("new_with_shared_state must succeed on a sparse-checked repo"); + + assert!( + shared_picker.wait_for_scan(Duration::from_secs(15)), + "initial scan timed out" + ); + + let applied = shared_picker + .refresh_git_status(&shared_frecency) + .expect("refresh_git_status must not error on a sparse-checked repo"); + assert!( + applied >= 3, + "expected statuses for materialized files, got {applied}" + ); + + { + let guard = shared_picker.read().unwrap(); + let picker = guard.as_ref().unwrap(); + for f in picker.get_files() { + let rel = f.relative_path(picker); + assert!( + !rel.contains("ex_a.rs") && !rel.contains("ex_b.rs"), + "sparse-excluded file leaked through async path: {rel}" + ); + } + } + + // ---- Log assertions. -------------------------------------------- + let captured = logs.lines(); + + assert!( + captured.contains("Git status for path not in index, skipping"), + "expected the documented debug skip-message for sparse-excluded \ + paths to be emitted at least once, full log was:\n{captured}" + ); + + let unexpected: Vec<&str> = captured + .lines() + .filter(|l| l.contains(" ERROR ") || l.contains(" WARN ")) + // The walker emits a warning when given a non-git base path; we + // are *inside* a git repo so it should not trip, but keep the + // filter narrow regardless. + .filter(|l| !l.contains("No git repository found")) + .collect(); + assert!( + unexpected.is_empty(), + "no ERROR/WARN lines expected on a sparse-checked repo, got:\n{}", + unexpected.join("\n") + ); }