diff --git a/README.md b/README.md index 994d7a93..70180a80 100644 --- a/README.md +++ b/README.md @@ -333,6 +333,7 @@ require('fff').setup({ }, git = { status_text_color = false, -- true to color filenames by git status + support_submodules = true, -- walk into submodules and report their git status; set false to skip them }, select = { -- Return winid to open the chosen file in, or nil to open in the original window diff --git a/crates/fff-c/include/fff.h b/crates/fff-c/include/fff.h index 6be43935..21173be8 100644 --- a/crates/fff-c/include/fff.h +++ b/crates/fff-c/include/fff.h @@ -12,7 +12,7 @@ /** * Current used version of [`FffCreateOptions`]. */ -#define FFF_CREATE_OPTIONS_VERSION 1 +#define FFF_CREATE_OPTIONS_VERSION 2 /** * Result envelope returned by all `fff_*` functions. @@ -133,6 +133,12 @@ typedef struct FffCreateOptions { * `enable_fs_root_scanning`. */ bool enable_home_dir_scanning; + /** + * When `true` (default), submodule directories are walked and reported in + * git status. When `false`, submodule paths are skipped during traversal + * and excluded from git status. + */ + bool support_submodules; } FffCreateOptions; /** @@ -515,6 +521,32 @@ struct FffResult *fff_create_instance_with(const struct FffCreateOptions *opts); */ struct FffResult *fff_create_instance_with_value(struct FffCreateOptions opts); +/** + * Create a new file finder instance (v3, with submodule support toggle). + * + * Identical to [`fff_create_instance2`] except for the trailing + * `support_submodules` flag. When `true` (recommended default), submodule + * directories are walked and reported in git status. When `false`, submodule + * paths are skipped during traversal and excluded from git status. + * + * ## Safety + * String parameters must be valid null-terminated UTF-8 or NULL. + */ +struct FffResult *fff_create_instance3(const char *base_path, + const char *frecency_db_path, + const char *history_db_path, + bool _use_unsafe_no_lock, + bool enable_mmap_cache, + bool enable_content_indexing, + bool watch, + bool ai_mode, + const char *log_file_path, + const char *log_level, + uint64_t cache_budget_max_files, + uint64_t cache_budget_max_bytes, + uint64_t cache_budget_max_file_size, + bool support_submodules); + /** * Destroy a file finder instance and free all its resources. * diff --git a/crates/fff-c/src/ffi_types.rs b/crates/fff-c/src/ffi_types.rs index d2b0e972..c9070876 100644 --- a/crates/fff-c/src/ffi_types.rs +++ b/crates/fff-c/src/ffi_types.rs @@ -15,7 +15,7 @@ use fff::{ }; /// Current used version of [`FffCreateOptions`]. -pub const FFF_CREATE_OPTIONS_VERSION: u32 = 1; +pub const FFF_CREATE_OPTIONS_VERSION: u32 = 2; /// Options for `fff_create_instance_with`. /// @@ -57,7 +57,11 @@ pub struct FffCreateOptions { /// Allow indexing the user's home directory. Same trade-off as /// `enable_fs_root_scanning`. pub enable_home_dir_scanning: bool, - // ----- new version 2+ fields go here, ALWAYS appended ----- + // ----- v2 fields ----- + /// Track files inside git submodules. When true, submodule contents are + /// indexed and their git status reflected in results. + pub support_submodules: bool, + // ----- new version 3+ fields go here, ALWAYS appended ----- } impl FffCreateOptions { @@ -79,6 +83,7 @@ impl FffCreateOptions { cache_budget_max_file_size: 0, enable_fs_root_scanning: false, enable_home_dir_scanning: false, + support_submodules: true, } } } @@ -804,6 +809,7 @@ mod options_layout_tests { fn fff_create_options_layout_is_stable_64bit() { assert_eq!(size_of::(), 88); assert_eq!(align_of::(), 8); + assert_eq!(offset_of!(FffCreateOptions, support_submodules), 82); assert_eq!(offset_of!(FffCreateOptions, version), 0); assert_eq!(offset_of!(FffCreateOptions, base_path), 8); diff --git a/crates/fff-c/src/lib.rs b/crates/fff-c/src/lib.rs index 0e0d0c91..4a4a201c 100644 --- a/crates/fff-c/src/lib.rs +++ b/crates/fff-c/src/lib.rs @@ -295,6 +295,7 @@ pub unsafe extern "C" fn fff_create_instance_with(opts: *const FffCreateOptions) follow_symlinks: false, enable_fs_root_scanning: opts.enable_fs_root_scanning, enable_home_dir_scanning: opts.enable_home_dir_scanning, + support_submodules: opts.support_submodules, }, ) { return FffResult::err(&format!("Failed to init file picker: {}", e)); @@ -310,6 +311,50 @@ pub unsafe extern "C" fn fff_create_instance_with(opts: *const FffCreateOptions) FffResult::ok_handle(fff_handle) } +/// Create a new file finder instance (v3, with submodule support toggle). +/// +/// Identical to [`fff_create_instance2`] except for the trailing +/// `support_submodules` flag. When `true` (recommended default), submodule +/// directories are walked and reported in git status. When `false`, submodule +/// paths are skipped during traversal and excluded from git status. +/// +/// ## Safety +/// String parameters must be valid null-terminated UTF-8 or NULL. +#[unsafe(no_mangle)] +#[allow(clippy::too_many_arguments)] +pub unsafe extern "C" fn fff_create_instance3( + base_path: *const c_char, + frecency_db_path: *const c_char, + history_db_path: *const c_char, + _use_unsafe_no_lock: bool, + enable_mmap_cache: bool, + enable_content_indexing: bool, + watch: bool, + ai_mode: bool, + log_file_path: *const c_char, + log_level: *const c_char, + cache_budget_max_files: u64, + cache_budget_max_bytes: u64, + cache_budget_max_file_size: u64, + support_submodules: bool, +) -> *mut FffResult { + let mut opts = FffCreateOptions::defaults(); + opts.base_path = base_path; + opts.frecency_db_path = frecency_db_path; + opts.history_db_path = history_db_path; + opts.enable_mmap_cache = enable_mmap_cache; + opts.enable_content_indexing = enable_content_indexing; + opts.watch = watch; + opts.ai_mode = ai_mode; + opts.log_file_path = log_file_path; + opts.log_level = log_level; + opts.cache_budget_max_files = cache_budget_max_files; + opts.cache_budget_max_bytes = cache_budget_max_bytes; + opts.cache_budget_max_file_size = cache_budget_max_file_size; + opts.support_submodules = support_submodules; + unsafe { fff_create_instance_with(&opts as *const FffCreateOptions) } +} + /// Calling-convention adapter for [`fff_create_instance_with`]. /// /// Same logic, but takes the [`FffCreateOptions`] struct **by value**. This @@ -1018,7 +1063,7 @@ pub unsafe extern "C" fn fff_restart_index( Err(e) => return FffResult::err(&format!("Failed to acquire file picker lock: {}", e)), }; - let (warmup_caches, content_indexing, watch, mode, fs_root, home_dir) = + let (warmup_caches, content_indexing, watch, mode, fs_root, home_dir, support_submodules) = if let Some(ref picker) = *guard { ( picker.has_mmap_cache(), @@ -1027,9 +1072,10 @@ pub unsafe extern "C" fn fff_restart_index( picker.mode(), picker.fs_root_scanning_enabled(), picker.home_dir_scanning_enabled(), + picker.has_submodule_support(), ) } else { - (false, true, true, FFFMode::default(), false, false) + (false, true, true, FFFMode::default(), false, false, true) }; drop(guard); @@ -1047,6 +1093,7 @@ pub unsafe extern "C" fn fff_restart_index( follow_symlinks: false, enable_fs_root_scanning: fs_root, enable_home_dir_scanning: home_dir, + support_submodules, }, ) { Ok(()) => FffResult::ok_empty(), diff --git a/crates/fff-core/src/background_watcher.rs b/crates/fff-core/src/background_watcher.rs index 687bb17c..8bb68392 100644 --- a/crates/fff-core/src/background_watcher.rs +++ b/crates/fff-core/src/background_watcher.rs @@ -44,6 +44,7 @@ impl BackgroundWatcher { mode: FFFMode, enable_fs_root_scanning: bool, enable_home_dir_scanning: bool, + support_submodules: bool, trace_span: tracing::Span, ) -> Result { info!( @@ -94,6 +95,7 @@ impl BackgroundWatcher { mode, use_recursive, watch_tx_for_debouncer, + support_submodules, )?; info!("Background file watcher initialized successfully"); @@ -148,6 +150,7 @@ impl BackgroundWatcher { &strong_picker, &owner_frecency, &owner_git_workdir, + support_submodules, ); // Transient strong ref drops here, back @@ -165,6 +168,7 @@ impl BackgroundWatcher { }) } + #[allow(clippy::too_many_arguments)] fn create_debouncer( base_path: PathBuf, git_workdir: Option, @@ -173,6 +177,7 @@ impl BackgroundWatcher { mode: FFFMode, use_recursive: bool, watch_tx: mpsc::Sender, + support_submodules: bool, ) -> Result { let config = Config::default() // do not follow symlinks as then notifiers spawns a bunch of events for symlinked @@ -215,6 +220,7 @@ impl BackgroundWatcher { &strong_picker, &shared_frecency, mode, + support_submodules, ); // every new directory creates had to be reflected in the picker state @@ -402,6 +408,7 @@ fn handle_debounced_events( shared_picker: &SharedFilePicker, shared_frecency: &SharedFrecency, mode: FFFMode, + support_submodules: bool, ) -> Vec { // this will be called very often, we have to minimiy the lock time for file picker let repo = git_workdir.as_ref().and_then(|p| Repository::open(p).ok()); @@ -677,6 +684,7 @@ fn handle_debounced_events( let status = match GitStatusCache::git_status_for_paths( &repo, &files_to_update_git_status, + support_submodules, ) { Ok(s) => s, Err(e) => { @@ -708,6 +716,7 @@ fn track_files_from_new_directories( shared_picker: &SharedFilePicker, shared_frecency: &SharedFrecency, git_workdir: &Option, + support_submodules: bool, ) { let Ok(entries) = std::fs::read_dir(dir) else { return; @@ -744,13 +753,14 @@ fn track_files_from_new_directories( } if let Some(repo) = repo.as_ref() { - let status = match GitStatusCache::git_status_for_paths(repo, &files_to_add) { - Ok(status) => status, - Err(e) => { - tracing::error!(?e, "inject_existing_files: git status query failed"); - return; - } - }; + let status = + match GitStatusCache::git_status_for_paths(repo, &files_to_add, support_submodules) { + Ok(status) => status, + Err(e) => { + tracing::error!(?e, "inject_existing_files: git status query failed"); + return; + } + }; if let Ok(mut guard) = shared_picker.write() && let Some(ref mut picker) = *guard diff --git a/crates/fff-core/src/file_picker.rs b/crates/fff-core/src/file_picker.rs index b08fbf14..29048997 100644 --- a/crates/fff-core/src/file_picker.rs +++ b/crates/fff-core/src/file_picker.rs @@ -413,6 +413,10 @@ pub struct FilePickerOptions { /// Allow indexing the user's home directory. Off by default for the same /// reason as `enable_fs_root_scanning`. pub enable_home_dir_scanning: bool, + /// When `true` (default), submodule directories are walked and their + /// statuses are reported by libgit2. When `false`, submodule paths are + /// skipped during traversal and excluded from git status. + pub support_submodules: bool, } impl Default for FilePickerOptions { @@ -427,6 +431,7 @@ impl Default for FilePickerOptions { follow_symlinks: false, enable_fs_root_scanning: false, enable_home_dir_scanning: false, + support_submodules: true, } } } @@ -446,6 +451,7 @@ pub struct FilePicker { follow_symlinks: bool, enable_fs_root_scanning: bool, enable_home_dir_scanning: bool, + support_submodules: bool, trace_span: tracing::Span, trace_id: String, } @@ -513,6 +519,10 @@ impl FilePicker { self.enable_home_dir_scanning } + pub fn has_submodule_support(&self) -> bool { + self.support_submodules + } + pub fn trace_id(&self) -> &str { &self.trace_id } @@ -726,6 +736,7 @@ impl FilePicker { follow_symlinks: options.follow_symlinks, enable_fs_root_scanning: options.enable_fs_root_scanning, enable_home_dir_scanning: options.enable_home_dir_scanning, + support_submodules: options.support_submodules, trace_span, trace_id, }) @@ -751,6 +762,7 @@ impl FilePicker { let warmup = picker.enable_mmap_cache; let content_indexing = picker.enable_content_indexing; let watch = picker.watch; + let support_submodules = picker.support_submodules; let mode = picker.mode; let follow_symlinks = picker.follow_symlinks; let enable_fs_root_scanning = picker.enable_fs_root_scanning; @@ -794,6 +806,7 @@ impl FilePicker { follow_symlinks, enable_fs_root_scanning, enable_home_dir_scanning, + support_submodules, }, ) .spawn(); @@ -814,7 +827,10 @@ impl FilePicker { self.scanned_files_count.store(0, Ordering::Relaxed); let git_workdir = FileSync::discover_git_workdir(&self.base_path); - let git_handle = git_workdir.clone().map(FileSync::spawn_git_status); + let support_submodules = self.support_submodules; + let git_handle = git_workdir + .clone() + .map(|wd| FileSync::spawn_git_status(wd, support_submodules)); let empty_frecency = SharedFrecency::default(); let sync = FileSync::walk_filesystem( @@ -824,6 +840,7 @@ impl FilePicker { &empty_frecency, self.mode, self.follow_symlinks, + self.support_submodules, )?; self.sync_data = sync; @@ -873,6 +890,7 @@ impl FilePicker { self.mode, self.enable_fs_root_scanning, self.enable_home_dir_scanning, + self.support_submodules, self.trace_span.clone(), )?; self.background_watcher = Some(watcher); @@ -1766,11 +1784,14 @@ impl FileSync { git_workdir } - pub(crate) fn spawn_git_status(git_workdir: PathBuf) -> JoinHandle> { + pub(crate) fn spawn_git_status( + git_workdir: PathBuf, + support_submodules: bool, + ) -> JoinHandle> { std::thread::spawn(move || { GitStatusCache::read_git_status( Some(git_workdir.as_path()), - &mut crate::git::initial_scan_status_options(), + &mut crate::git::initial_scan_status_options(support_submodules), ) }) } @@ -1786,6 +1807,7 @@ impl FileSync { shared_frecency: &SharedFrecency, mode: FFFMode, follow_symlinks: bool, + support_submodules: bool, ) -> Result { use ignore::WalkBuilder; @@ -1811,6 +1833,20 @@ impl FileSync { walk_builder.overrides(overrides); } + // When submodule support is disabled, collect submodule absolute paths + // up-front and prune them from the walk via filter_entry. We resolve + // each submodule path relative to its containing repository workdir + // (which may itself be a submodule) so nested setups work. + if !support_submodules && let Some(workdir) = git_workdir.as_ref() { + let submodule_paths = collect_submodule_paths(workdir); + if !submodule_paths.is_empty() { + debug!(count = submodule_paths.len(), "Skipping submodule paths"); + walk_builder.filter_entry(move |entry| { + !submodule_paths.iter().any(|p| entry.path() == p.as_path()) + }); + } + } + let walker = walk_builder.build_parallel(); let walker_start = std::time::Instant::now(); debug!("SCAN: Starting file walker"); @@ -1991,6 +2027,34 @@ pub(crate) fn warmup_mmaps( } } +/// Resolve every submodule registered under `workdir` to its absolute on-disk +/// path. Includes nested submodules by descending into each submodule's own +/// repository when it is initialized. Returns normalized paths suitable for +/// equality comparison against `WalkBuilder` entries. +fn collect_submodule_paths(workdir: &Path) -> Vec { + let Ok(repo) = Repository::open(workdir) else { + return Vec::new(); + }; + + let mut out = Vec::new(); + let mut stack: Vec<(Repository, PathBuf)> = vec![(repo, workdir.to_path_buf())]; + while let Some((repo, root)) = stack.pop() { + let Ok(submodules) = repo.submodules() else { + continue; + }; + for sm in submodules { + let abs = root.join(sm.path()); + let abs = crate::path_utils::normalize(abs); + // Recurse into initialized submodules so nested ones are found too. + if let Ok(sub_repo) = sm.open() { + stack.push((sub_repo, abs.clone())); + } + out.push(abs); + } + } + out +} + /// This does both thing (yes sorry all the OOP morons) /// in one go: populates files chunked storage and creates new directories fn populates_dirs_files_chunked_storage<'a>( diff --git a/crates/fff-core/src/git.rs b/crates/fff-core/src/git.rs index 85bce5e3..86a6a524 100644 --- a/crates/fff-core/src/git.rs +++ b/crates/fff-core/src/git.rs @@ -6,12 +6,12 @@ use std::{ path::{Path, PathBuf}, }; -pub(crate) fn default_status_options() -> StatusOptions { +pub(crate) fn default_status_options(support_submodules: bool) -> StatusOptions { let mut opts = StatusOptions::new(); opts.include_untracked(true) .recurse_untracked_dirs(true) .include_unmodified(true) - .exclude_submodules(true); + .exclude_submodules(!support_submodules); opts } @@ -21,11 +21,11 @@ pub(crate) fn default_status_options() -> StatusOptions { /// `git_status: None` (== clean), so a missing cache entry already means /// "clean" — no need to ask libgit2 to enumerate every tracked path. /// Saves seconds on huge dirty trees (e.g. chromium with 400k+ entries). -pub(crate) fn initial_scan_status_options() -> StatusOptions { +pub(crate) fn initial_scan_status_options(support_submodules: bool) -> StatusOptions { let mut opts = StatusOptions::new(); opts.include_untracked(true) .recurse_untracked_dirs(true) - .exclude_submodules(true); + .exclude_submodules(!support_submodules); opts } @@ -96,6 +96,7 @@ impl GitStatusCache { pub fn git_status_for_paths + Debug>( repo: &Repository, paths: &[TPath], + support_submodules: bool, ) -> Result { if paths.is_empty() { return Ok(Self(AHashMap::new())); @@ -118,7 +119,7 @@ impl GitStatusCache { return Ok(Self(map)); } - let mut status_options = default_status_options(); + let mut status_options = default_status_options(support_submodules); for path in paths { status_options.pathspec(path.as_ref().strip_prefix(&workdir)?); } @@ -234,7 +235,7 @@ mod tests { let repo = Repository::open(&base).unwrap(); let paths: Vec = names.iter().map(|n| base.join(n)).collect(); - let cache = GitStatusCache::git_status_for_paths(&repo, &paths).unwrap(); + let cache = GitStatusCache::git_status_for_paths(&repo, &paths, true).unwrap(); for (n, abs) in names.iter().zip(paths.iter()) { let status = cache.lookup_status(abs); diff --git a/crates/fff-core/src/scan.rs b/crates/fff-core/src/scan.rs index a762a95f..06962ed4 100644 --- a/crates/fff-core/src/scan.rs +++ b/crates/fff-core/src/scan.rs @@ -33,7 +33,7 @@ pub(crate) struct ScanSignals { } /// Which optional phases a scan should run. -#[derive(Clone, Copy, Default, Debug)] +#[derive(Clone, Copy, Debug)] pub(crate) struct ScanConfig { pub(crate) warmup: bool, pub(crate) content_indexing: bool, @@ -43,6 +43,23 @@ pub(crate) struct ScanConfig { pub(crate) follow_symlinks: bool, pub(crate) enable_fs_root_scanning: bool, pub(crate) enable_home_dir_scanning: bool, + pub(crate) support_submodules: bool, +} + +impl Default for ScanConfig { + fn default() -> Self { + Self { + warmup: false, + content_indexing: false, + watch: false, + auto_cache_budget: false, + install_watcher: false, + follow_symlinks: false, + enable_fs_root_scanning: false, + enable_home_dir_scanning: false, + support_submodules: true, + } + } } /// A fully-configured scan job ready to run on a background thread. @@ -96,6 +113,7 @@ impl ScanJob { follow_symlinks: picker.follows_symlinks(), enable_fs_root_scanning: picker.fs_root_scanning_enabled(), enable_home_dir_scanning: picker.home_dir_scanning_enabled(), + support_submodules: picker.has_submodule_support(), }; drop(guard); // just a sanity check @@ -168,7 +186,9 @@ impl ScanJob { // 1. Start git discovery and walk filesystem off-lock. let git_workdir = FileSync::discover_git_workdir(&base_path); - let status_handle = git_workdir.clone().map(FileSync::spawn_git_status); + let status_handle = git_workdir + .clone() + .map(|wd| FileSync::spawn_git_status(wd, config.support_submodules)); let sync = match FileSync::walk_filesystem( &base_path, git_workdir.clone(), @@ -176,6 +196,7 @@ impl ScanJob { &shared_frecency, mode, config.follow_symlinks, + config.support_submodules, ) { Ok(sync) => sync, Err(e) => { @@ -261,6 +282,7 @@ impl ScanJob { mode, config.enable_fs_root_scanning, config.enable_home_dir_scanning, + config.support_submodules, tracing::Span::current(), ) { Ok(watcher) => { diff --git a/crates/fff-core/src/shared.rs b/crates/fff-core/src/shared.rs index 0b1a9490..6b11c3d9 100644 --- a/crates/fff-core/src/shared.rs +++ b/crates/fff-core/src/shared.rs @@ -222,6 +222,7 @@ impl SharedFilePicker { }; let git_root = picker.git_root().map(|p| p.to_path_buf()); + let support_submodules = picker.has_submodule_support(); drop(guard); // updating git status could take very long time, there is not risky as we // do not allow any mutations and deletions of files from the sync @@ -233,7 +234,7 @@ impl SharedFilePicker { GitStatusCache::read_git_status( git_root.as_deref(), - &mut crate::git::default_status_options(), + &mut crate::git::default_status_options(support_submodules), ) }; diff --git a/crates/fff-core/tests/sparse_checkout_test.rs b/crates/fff-core/tests/sparse_checkout_test.rs new file mode 100644 index 00000000..635b27e9 --- /dev/null +++ b/crates/fff-core/tests/sparse_checkout_test.rs @@ -0,0 +1,287 @@ +//! Regression test: indexing a sparse-checked-out git repository must not +//! crash and must only surface paths that are actually materialized on +//! disk. libgit2 happily reports statuses for paths that were excluded by +//! `core.sparseCheckout`; `update_git_statuses` already silently skips +//! those (see `file_picker.rs::1346`), this test pins that contract. + +use std::fs; +use std::io::Write; +use std::path::Path; +use std::process::Command; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use fff_search::file_picker::FilePicker; +use fff_search::{ + FilePickerOptions, FuzzySearchOptions, PaginationArgs, QueryParser, SharedFilePicker, + SharedFrecency, +}; +use git2::Repository; +use tempfile::TempDir; +use tracing_subscriber::fmt::MakeWriter; + +fn git(dir: &Path, args: &[&str]) { + let out = Command::new("git") + .args(args) + .current_dir(dir) + .env("GIT_AUTHOR_NAME", "t") + .env("GIT_AUTHOR_EMAIL", "t@t") + .env("GIT_COMMITTER_NAME", "t") + .env("GIT_COMMITTER_EMAIL", "t@t") + .env("GIT_CONFIG_GLOBAL", "/dev/null") + .env("GIT_CONFIG_SYSTEM", "/dev/null") + .output() + .expect("git binary must be installed to run this test"); + assert!( + out.status.success(), + "git {args:?} failed:\nstdout={}\nstderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); +} + +#[derive(Clone, Default)] +struct CapturedLogs(Arc>>); + +impl CapturedLogs { + fn lines(&self) -> String { + let buf = self.0.lock().unwrap(); + String::from_utf8_lossy(&buf).to_string() + } +} + +impl<'a> MakeWriter<'a> for CapturedLogs { + type Writer = CapturedLogsWriter; + fn make_writer(&'a self) -> Self::Writer { + CapturedLogsWriter(Arc::clone(&self.0)) + } +} + +struct CapturedLogsWriter(Arc>>); + +impl Write for CapturedLogsWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.0.lock().unwrap().extend_from_slice(buf); + Ok(buf.len()) + } + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +fn fuzzy_search_paths(picker: &FilePicker, query: &str) -> Vec { + let parser = QueryParser::default(); + let parsed = parser.parse(query); + let result = picker.fuzzy_search( + &parsed, + None, + FuzzySearchOptions { + max_threads: 1, + pagination: PaginationArgs { + offset: 0, + limit: 200, + }, + ..Default::default() + }, + ); + result + .items + .iter() + .map(|f| f.relative_path(picker)) + .collect() +} + +/// End-to-end: build a "remote" repo with files in two top-level dirs, +/// clone it with a cone-mode sparse-checkout that materializes only one +/// dir, then index the worktree both via the synchronous `collect_files` +/// path and the async `new_with_shared_state` path. The picker must: +/// * see only the materialized files, +/// * NOT panic when libgit2 hands it statuses for non-materialized paths, +/// * surface no `ERROR`/`WARN` log lines from fff core, +/// * yield correct fuzzy-search results, +/// * skip non-materialized paths via the documented debug message in +/// `update_git_statuses`. +#[test] +fn sparse_checkout_indexes_only_materialized_files() { + let logs = CapturedLogs::default(); + let _guard = tracing::subscriber::set_default( + tracing_subscriber::fmt() + .with_max_level(tracing::Level::DEBUG) + .with_ansi(false) + .with_writer(logs.clone()) + .finish(), + ); + + let upstream_tmp = TempDir::new().unwrap(); + let upstream = upstream_tmp.path(); + + git(upstream, &["init", "-b", "main"]); + fs::create_dir_all(upstream.join("included/sub")).unwrap(); + fs::create_dir_all(upstream.join("excluded/deep")).unwrap(); + fs::write(upstream.join("README.md"), "root readme\n").unwrap(); + fs::write(upstream.join("included/in_a.rs"), "// a\n").unwrap(); + fs::write(upstream.join("included/sub/in_b.rs"), "// b\n").unwrap(); + fs::write(upstream.join("excluded/ex_a.rs"), "// ea\n").unwrap(); + fs::write(upstream.join("excluded/deep/ex_b.rs"), "// eb\n").unwrap(); + git(upstream, &["add", "-A"]); + git(upstream, &["commit", "-m", "seed", "--no-gpg-sign"]); + + let work_tmp = TempDir::new().unwrap(); + let work = work_tmp.path(); + git( + work, + &[ + "clone", + "--no-local", + "--no-checkout", + upstream.to_str().unwrap(), + ".", + ], + ); + git(work, &["sparse-checkout", "init", "--cone"]); + git(work, &["sparse-checkout", "set", "included"]); + git(work, &["checkout", "main"]); + + // Sanity: only the included dir + top-level files materialized. + assert!(work.join("README.md").is_file()); + assert!(work.join("included/in_a.rs").is_file()); + assert!(work.join("included/sub/in_b.rs").is_file()); + assert!(!work.join("excluded").exists()); + + // Sanity: libgit2's status output DOES contain the sparse-excluded + // paths in the index — proving the "path not in index" branch in + // `update_git_statuses` actually has work to do. + { + let repo = Repository::open(work).unwrap(); + let mut opts = git2::StatusOptions::new(); + opts.include_untracked(true) + .include_unmodified(true) + .recurse_untracked_dirs(true) + .exclude_submodules(false); + let statuses = repo.statuses(Some(&mut opts)).unwrap(); + let entries: Vec = statuses + .iter() + .filter_map(|e| e.path().map(|p| p.to_owned())) + .collect(); + assert!( + entries.iter().any(|p| p == "excluded/ex_a.rs"), + "libgit2 must surface sparse-excluded paths so we can prove the \ + picker handles them; got {:?}", + entries + ); + } + + // ---- Path 1: synchronous `collect_files`. ----------------------- + let mut picker = FilePicker::new(FilePickerOptions { + base_path: work.to_string_lossy().to_string(), + enable_mmap_cache: false, + enable_content_indexing: false, + watch: false, + ..Default::default() + }) + .expect("failed to create FilePicker"); + picker + .collect_files() + .expect("collect_files must not panic on a sparse-checked repo"); + + let indexed: Vec = picker + .get_files() + .iter() + .map(|f| f.relative_path(&picker)) + .collect(); + + assert!( + indexed.iter().any(|p| p.ends_with("in_a.rs")), + "expected to find included/in_a.rs in index, got {:?}", + indexed + ); + assert!( + indexed.iter().any(|p| p.ends_with("in_b.rs")), + "expected to find included/sub/in_b.rs in index, got {:?}", + indexed + ); + assert!( + indexed + .iter() + .all(|p| !p.contains("ex_a.rs") && !p.contains("ex_b.rs")), + "sparse-excluded files leaked into the index: {:?}", + indexed + ); + + let hits = fuzzy_search_paths(&picker, "in_b"); + assert!( + hits.iter().any(|p| p.ends_with("in_b.rs")), + "fuzzy search must surface materialized files, got {:?}", + hits + ); + + drop(picker); + + // ---- Path 2: async `new_with_shared_state` + refresh_git_status. --- + // This goes through the orchestrated scan + git-apply pipeline + // (`scan.rs::apply_git_status_and_frecency` followed by an explicit + // `refresh_git_status` that hits the `update_git_statuses` branch + // which is the one that has to silently drop sparse-excluded paths). + let shared_picker = SharedFilePicker::default(); + let shared_frecency = SharedFrecency::default(); + FilePicker::new_with_shared_state( + shared_picker.clone(), + shared_frecency.clone(), + FilePickerOptions { + base_path: work.to_string_lossy().to_string(), + enable_mmap_cache: false, + enable_content_indexing: false, + watch: false, + ..Default::default() + }, + ) + .expect("new_with_shared_state must succeed on a sparse-checked repo"); + + assert!( + shared_picker.wait_for_scan(Duration::from_secs(15)), + "initial scan timed out" + ); + + let applied = shared_picker + .refresh_git_status(&shared_frecency) + .expect("refresh_git_status must not error on a sparse-checked repo"); + assert!( + applied >= 3, + "expected statuses for materialized files, got {applied}" + ); + + { + let guard = shared_picker.read().unwrap(); + let picker = guard.as_ref().unwrap(); + for f in picker.get_files() { + let rel = f.relative_path(picker); + assert!( + !rel.contains("ex_a.rs") && !rel.contains("ex_b.rs"), + "sparse-excluded file leaked through async path: {rel}" + ); + } + } + + // ---- Log assertions. -------------------------------------------- + let captured = logs.lines(); + + assert!( + captured.contains("Git status for path not in index, skipping"), + "expected the documented debug skip-message for sparse-excluded \ + paths to be emitted at least once, full log was:\n{captured}" + ); + + let unexpected: Vec<&str> = captured + .lines() + .filter(|l| l.contains(" ERROR ") || l.contains(" WARN ")) + // The walker emits a warning when given a non-git base path; we + // are *inside* a git repo so it should not trip, but keep the + // filter narrow regardless. + .filter(|l| !l.contains("No git repository found")) + .collect(); + assert!( + unexpected.is_empty(), + "no ERROR/WARN lines expected on a sparse-checked repo, got:\n{}", + unexpected.join("\n") + ); +} diff --git a/crates/fff-mcp/src/main.rs b/crates/fff-mcp/src/main.rs index 497c5ee9..d142d2fc 100644 --- a/crates/fff-mcp/src/main.rs +++ b/crates/fff-mcp/src/main.rs @@ -142,6 +142,11 @@ pub(crate) struct Args { #[arg(long = "no-watch")] no_watch: bool, + /// Skip git submodule directories during traversal and exclude them + /// from git status. Default: submodules are walked and reported. + #[arg(long = "no-submodules")] + no_submodules: bool, + /// Maximum number of files whose content is kept persistently in memory. /// Files beyond this limit are still searchable via temporary mmaps that /// are released after each grep. Defaults to 30 000. @@ -263,6 +268,7 @@ async fn main() -> Result<(), Box> { .max_cached_files .map(fff::ContentCacheBudget::new_for_repo), follow_symlinks: false, + support_submodules: !args.no_submodules, ..Default::default() }, ) diff --git a/crates/fff-nvim/src/lib.rs b/crates/fff-nvim/src/lib.rs index a9f3cb2d..534bbe35 100644 --- a/crates/fff-nvim/src/lib.rs +++ b/crates/fff-nvim/src/lib.rs @@ -62,12 +62,24 @@ pub fn destroy_query_db(_: &Lua, _: ()) -> LuaResult { /// Opts table accepted by `init_file_picker` / `restart_index_in_path`. /// Backwards compat: positional `follow_symlinks: bool` argument still works /// — callers that pass a table use the named fields instead. -#[derive(Default)] struct PickerInitOpts { follow_symlinks: bool, enable_fs_root_scanning: bool, enable_home_dir_scanning: bool, enable_filename_constraint: bool, + support_submodules: bool, +} + +impl Default for PickerInitOpts { + fn default() -> Self { + Self { + follow_symlinks: false, + enable_fs_root_scanning: false, + enable_home_dir_scanning: false, + enable_filename_constraint: false, + support_submodules: true, + } + } } impl PickerInitOpts { @@ -92,6 +104,9 @@ impl PickerInitOpts { enable_filename_constraint: t .get::>("enable_filename_constraint")? .unwrap_or(false), + support_submodules: t + .get::>("support_submodules")? + .unwrap_or(true), }), other => Err(LuaError::RuntimeError(format!( "init opts must be a table, boolean, or nil — got {}", @@ -128,6 +143,7 @@ pub fn init_file_picker( follow_symlinks: opts.follow_symlinks, enable_fs_root_scanning: opts.enable_fs_root_scanning, enable_home_dir_scanning: opts.enable_home_dir_scanning, + support_submodules: opts.support_submodules, ..Default::default() }, ) @@ -175,7 +191,7 @@ pub fn restart_index_in_path( // Inherit current picker's scanning flags when caller didn't pass // explicit opts — otherwise a `:cd ~` after init would silently lose // the user's `enable_home_dir_scanning = true` setting. - let (follow_symlinks, fs_root, home_dir) = { + let (follow_symlinks, fs_root, home_dir, support_submodules) = { let guard = match FILE_PICKER.read() { Ok(g) => g, Err(_) => return, @@ -193,11 +209,13 @@ pub fn restart_index_in_path( p.follows_symlinks() || opts.follow_symlinks, p.fs_root_scanning_enabled() || opts.enable_fs_root_scanning, p.home_dir_scanning_enabled() || opts.enable_home_dir_scanning, + p.has_submodule_support() && opts.support_submodules, ), None => ( opts.follow_symlinks, opts.enable_fs_root_scanning, opts.enable_home_dir_scanning, + opts.support_submodules, ), } }; @@ -220,6 +238,7 @@ pub fn restart_index_in_path( follow_symlinks, enable_fs_root_scanning: fs_root, enable_home_dir_scanning: home_dir, + support_submodules, ..Default::default() }, ) { diff --git a/crates/fff-python/src/finder.rs b/crates/fff-python/src/finder.rs index e61232fc..d3c77fe1 100644 --- a/crates/fff-python/src/finder.rs +++ b/crates/fff-python/src/finder.rs @@ -246,6 +246,7 @@ impl FileFinder { follow_symlinks: false, enable_fs_root_scanning, enable_home_dir_scanning, + support_submodules: true, }, ) .map_err(py_err) @@ -775,6 +776,7 @@ impl FileFinder { follow_symlinks: false, enable_fs_root_scanning: fs_root, enable_home_dir_scanning: home_dir, + support_submodules: true, }, ) .map_err(py_err) diff --git a/doc/fff.nvim.txt b/doc/fff.nvim.txt index 025e2956..96e16189 100644 --- a/doc/fff.nvim.txt +++ b/doc/fff.nvim.txt @@ -245,6 +245,7 @@ Defaults are sensible. Override only what you care about. }, git = { status_text_color = false, -- true to color filenames by git status + support_submodules = true, -- walk into submodules and report their git status; set false to skip them }, select = { -- Return winid to open the chosen file in, or nil to open in the original window diff --git a/lua/fff/conf.lua b/lua/fff/conf.lua index dcb53e01..b5e5bca9 100644 --- a/lua/fff/conf.lua +++ b/lua/fff/conf.lua @@ -380,6 +380,10 @@ local function init() -- Git integration git = { status_text_color = false, -- Apply git status colors to filename text (default: false, only sign column) + -- When true (default), git submodules are walked during indexing and their + -- statuses are reported by libgit2. When false, submodule directories are + -- skipped from traversal and excluded from the git status cache. + support_submodules = true, }, debug = { enabled = false, -- Show file info panel in preview diff --git a/lua/fff/core.lua b/lua/fff/core.lua index 1b3f3a16..e10dfed7 100644 --- a/lua/fff/core.lua +++ b/lua/fff/core.lua @@ -116,6 +116,7 @@ M.change_indexing_directory = function(new_path) enable_fs_root_scanning = config.enable_fs_root_scanning, enable_home_dir_scanning = config.enable_home_dir_scanning, enable_filename_constraint = config.grep and config.grep.enable_filename_constraint, + support_submodules = config.git and config.git.support_submodules, }) if not ok then vim.notify('Failed to change directory: ' .. err, vim.log.levels.ERROR) @@ -152,6 +153,7 @@ M.ensure_initialized = function() enable_fs_root_scanning = config.enable_fs_root_scanning, enable_home_dir_scanning = config.enable_home_dir_scanning, enable_filename_constraint = config.grep and config.grep.enable_filename_constraint, + support_submodules = config.git and config.git.support_submodules, }) if not ok then vim.notify('Failed to initialize file picker: ' .. tostring(result), vim.log.levels.ERROR)