From d21093ba1194b9a9c7fed60489b1d6ec3b7949a5 Mon Sep 17 00:00:00 2001 From: Special Bread <250742548+special-bread@users.noreply.github.com> Date: Sun, 10 May 2026 16:36:33 +0100 Subject: [PATCH 1/4] Improve performance of status on windows --- Cargo.lock | 2 + gix-index/src/entry/mode.rs | 42 +- gix-status/Cargo.toml | 9 + gix-status/src/index_as_worktree/function.rs | 174 ++++-- gix-status/src/index_as_worktree/types.rs | 7 + .../src/index_as_worktree_with_renames/mod.rs | 2 + .../index_as_worktree_with_renames/types.rs | 5 + gix-status/src/lib.rs | 13 + gix-status/src/metadata_cache.rs | 564 ++++++++++++++++++ gix-status/tests/status/index_as_worktree.rs | 4 + .../status/index_as_worktree_with_renames.rs | 2 + gix/src/status/index_worktree.rs | 11 + gix/src/status/iter/mod.rs | 12 + gix/src/status/mod.rs | 59 ++ gix/src/status/platform.rs | 33 + 15 files changed, 890 insertions(+), 49 deletions(-) create mode 100644 gix-status/src/metadata_cache.rs diff --git a/Cargo.lock b/Cargo.lock index 2efd9a7cbda..3346920e95c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2379,9 +2379,11 @@ dependencies = [ "gix-status", "gix-testtools", "gix-worktree", + "hashbrown 0.16.1", "portable-atomic", "pretty_assertions", "thiserror 2.0.18", + "windows-sys 0.61.2", ] [[package]] diff --git a/gix-index/src/entry/mode.rs b/gix-index/src/entry/mode.rs index 3b29548485f..54874748f07 100644 --- a/gix-index/src/entry/mode.rs +++ b/gix-index/src/entry/mode.rs @@ -46,22 +46,44 @@ impl Mode { stat: &crate::fs::Metadata, has_symlinks: bool, executable_bit: bool, + ) -> Option { + self.change_to_match_fs_with_values( + stat.is_file(), + stat.is_dir(), + stat.is_symlink(), + stat.is_executable(), + has_symlinks, + executable_bit, + ) + } + + /// Like [`change_to_match_fs`](Self::change_to_match_fs) but accepts pre-extracted + /// file-type and permission bits, for callers that already have them (e.g. cached + /// metadata from a batched directory enumeration). + pub fn change_to_match_fs_with_values( + self, + is_file: bool, + is_dir: bool, + is_symlink: bool, + is_executable: bool, + has_symlinks: bool, + executable_bit: bool, ) -> Option { match self { - Mode::FILE if !stat.is_file() => (), - Mode::SYMLINK if stat.is_symlink() => return None, - Mode::SYMLINK if has_symlinks && !stat.is_symlink() => (), - Mode::SYMLINK if !has_symlinks && !stat.is_file() => (), - Mode::COMMIT | Mode::DIR if !stat.is_dir() => (), - Mode::FILE if executable_bit && stat.is_executable() => return Some(Change::ExecutableBit), - Mode::FILE_EXECUTABLE if executable_bit && !stat.is_executable() => return Some(Change::ExecutableBit), + Mode::FILE if !is_file => (), + Mode::SYMLINK if is_symlink => return None, + Mode::SYMLINK if has_symlinks && !is_symlink => (), + Mode::SYMLINK if !has_symlinks && !is_file => (), + Mode::COMMIT | Mode::DIR if !is_dir => (), + Mode::FILE if executable_bit && is_executable => return Some(Change::ExecutableBit), + Mode::FILE_EXECUTABLE if executable_bit && !is_executable => return Some(Change::ExecutableBit), _ => return None, } - let new_mode = if stat.is_dir() { + let new_mode = if is_dir { Mode::COMMIT - } else if executable_bit && stat.is_executable() { + } else if executable_bit && is_executable { Mode::FILE_EXECUTABLE - } else if has_symlinks && stat.is_symlink() { + } else if has_symlinks && is_symlink { Mode::SYMLINK } else { Mode::FILE diff --git a/gix-status/Cargo.toml b/gix-status/Cargo.toml index 000dd6020eb..c2eedeb6a98 100644 --- a/gix-status/Cargo.toml +++ b/gix-status/Cargo.toml @@ -41,12 +41,21 @@ gix-diff = { version = "^0.63.0", path = "../gix-diff", default-features = false thiserror = "2.0.18" filetime = "0.2.27" bstr = { version = "1.12.0", default-features = false } +hashbrown = "0.16.0" document-features = { version = "0.2.0", optional = true } [target.'cfg(not(target_has_atomic = "64"))'.dependencies] portable-atomic = "1" +[target.'cfg(windows)'.dependencies] +windows-sys = { version = "0.61.1", features = [ + "Win32_Foundation", + "Win32_Storage_FileSystem", + # For SECURITY_ATTRIBUTES in CreateFileW (used by metadata_cache::prepare on Windows). + "Win32_Security", +] } + [dev-dependencies] gix-status = { path = ".", features = ["worktree-rewrites", "parallel"] } gix-hash = { path = "../gix-hash", features = ["sha1"] } diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index 781bcfe1ea4..60d9c4e8125 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -21,6 +21,67 @@ use crate::{ }, is_dir_to_mode, }; +#[cfg(windows)] +use crate::metadata_cache::{self, CachedMetadata, MetadataCache}; + +/// Windows-only union of live `lstat` metadata and pre-cached metadata, so +/// `compute_status` sees one shape. Other platforms use `gix_index::fs::Metadata` +/// directly. +#[cfg(windows)] +enum FileMetadata<'a> { + Live(gix_index::fs::Metadata), + Cached(&'a CachedMetadata), +} + +#[cfg(windows)] +impl FileMetadata<'_> { + fn is_dir(&self) -> bool { + match self { + Self::Live(m) => m.is_dir(), + Self::Cached(c) => c.is_dir, + } + } + + fn is_symlink(&self) -> bool { + match self { + Self::Live(m) => m.is_symlink(), + Self::Cached(c) => c.is_symlink, + } + } + + fn len(&self) -> u64 { + match self { + Self::Live(m) => m.len(), + Self::Cached(c) => c.size, + } + } + + fn to_stat(&self) -> Result { + match self { + Self::Live(m) => gix_index::entry::Stat::from_fs(m), + Self::Cached(c) => Ok(c.to_stat()), + } + } + + fn mode_change( + &self, + entry_mode: gix_index::entry::Mode, + has_symlinks: bool, + executable_bit: bool, + ) -> Option { + match self { + Self::Live(m) => entry_mode.change_to_match_fs(m, has_symlinks, executable_bit), + Self::Cached(c) => entry_mode.change_to_match_fs_with_values( + !c.is_dir && !c.is_symlink, // is_file: regular file (not dir, not symlink) + c.is_dir, + c.is_symlink, + c.is_executable, + has_symlinks, + executable_bit, + ), + } + } +} /// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them /// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes, and @@ -63,6 +124,8 @@ pub fn index_as_worktree<'index, T, U, Find, E>( stack, filter, should_interrupt, + #[cfg(windows)] + metadata_cache, }: Context<'_>, options: Options, ) -> Result @@ -122,6 +185,8 @@ where path_backing, filter, options, + #[cfg(windows)] + metadata_cache, skipped_by_pathspec, skipped_by_entry_flags, @@ -228,6 +293,10 @@ struct State<'a, 'b> { filter: gix_filter::Pipeline, path_backing: &'b gix_index::PathStorageRef, options: &'a Options, + /// Optional pre-populated metadata cache for faster status checks on Windows. + /// Cache lookups happen before falling back to per-file syscalls. + #[cfg(windows)] + metadata_cache: Option<&'a MetadataCache>, skipped_by_pathspec: &'a AtomicUsize, skipped_by_entry_flags: &'a AtomicUsize, @@ -374,53 +443,80 @@ impl<'index> State<'_, 'index> { } Err(err) => return Err(Error::Io(err.into())), }; - self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); - let metadata = match gix_index::fs::Metadata::from_path_no_follow(worktree_path) { - Ok(metadata) if metadata.is_dir() => { - // index entries are normally only for files/symlinks - // if a file turned into a directory it was removed - // the only exception here are submodules which are - // part of the index despite being directories - if entry.mode.is_submodule() { - let status = submodule - .status(entry, rela_path) - .map_err(|err| Error::SubmoduleStatus { - rela_path: rela_path.into(), - source: Box::new(err), - })?; - return Ok(status.map(|status| Change::SubmoduleModification(status).into())); - } else { - return Ok(Some(Change::Removed.into())); + + // Acquire metadata. On Windows we consult the metadata cache first and + // only fall back to a syscall on miss; on other platforms per-file + // `lstat` is already fast, so we just do the syscall directly. + #[cfg(windows)] + let metadata = if let Some(cached) = self.metadata_cache.and_then(|c| metadata_cache::lookup(c, rela_path)) { + FileMetadata::Cached(cached) + } else { + self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); + match gix_index::fs::Metadata::from_path_no_follow(worktree_path) { + Ok(m) => FileMetadata::Live(m), + Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => { + return Ok(Some(Change::Removed.into())) } + Err(err) => return Err(Error::Io(err.into())), } - Ok(metadata) => metadata, - Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => { - return Ok(Some(Change::Removed.into())); - } - Err(err) => { - return Err(Error::Io(err.into())); + }; + #[cfg(not(windows))] + let metadata = { + self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); + match gix_index::fs::Metadata::from_path_no_follow(worktree_path) { + Ok(m) => m, + Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => { + return Ok(Some(Change::Removed.into())) + } + Err(err) => return Err(Error::Io(err.into())), } }; + + // Handle directory: index entries are normally only for files/symlinks. + // If a file turned into a directory it was removed. + // The only exception here are submodules which are part of the index despite being directories. + if metadata.is_dir() { + if entry.mode.is_submodule() { + let status = submodule + .status(entry, rela_path) + .map_err(|err| Error::SubmoduleStatus { + rela_path: rela_path.into(), + source: Box::new(err), + })?; + return Ok(status.map(|status| Change::SubmoduleModification(status).into())); + } else { + return Ok(Some(Change::Removed.into())); + } + } + if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { return Ok(Some(EntryStatus::IntentToAdd)); } + + #[cfg(windows)] + let new_stat = metadata.to_stat()?; + #[cfg(not(windows))] let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; - let executable_bit_changed = - match entry - .mode - .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit) - { - Some(gix_index::entry::mode::Change::Type { new_mode }) => { - return Ok(Some( - Change::Type { - worktree_mode: new_mode, - } - .into(), - )); - } - Some(gix_index::entry::mode::Change::ExecutableBit) => true, - None => false, - }; + + #[cfg(windows)] + let mode_change = + metadata.mode_change(entry.mode, self.options.fs.symlink, self.options.fs.executable_bit); + #[cfg(not(windows))] + let mode_change = entry + .mode + .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit); + let executable_bit_changed = match mode_change { + Some(gix_index::entry::mode::Change::Type { new_mode }) => { + return Ok(Some( + Change::Type { + worktree_mode: new_mode, + } + .into(), + )); + } + Some(gix_index::entry::mode::Change::ExecutableBit) => true, + None => false, + }; // We implement racy-git. See racy-git.txt in the git documentation for detailed documentation. // diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index 26d26981412..4485f69a30e 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -3,6 +3,9 @@ use std::sync::atomic::AtomicBool; use bstr::{BStr, BString}; use gix_index::entry; +#[cfg(windows)] +use crate::metadata_cache::MetadataCache; + /// The error returned by [index_as_worktree()`](crate::index_as_worktree()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] @@ -55,6 +58,10 @@ pub struct Context<'a> { pub filter: gix_filter::Pipeline, /// A flag to query to learn if cancellation is requested. pub should_interrupt: &'a AtomicBool, + /// Windows-only pre-populated metadata cache. See [`crate::metadata_cache`]. + /// Transparent: `None`/empty/partial are all correct. + #[cfg(windows)] + pub metadata_cache: Option<&'a MetadataCache>, } /// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()). diff --git a/gix-status/src/index_as_worktree_with_renames/mod.rs b/gix-status/src/index_as_worktree_with_renames/mod.rs index 9eff6084a25..c58b356a26b 100644 --- a/gix-status/src/index_as_worktree_with_renames/mod.rs +++ b/gix-status/src/index_as_worktree_with_renames/mod.rs @@ -153,6 +153,8 @@ pub(super) mod function { stack, filter, should_interrupt: ctx.should_interrupt, + #[cfg(windows)] + metadata_cache: ctx.metadata_cache, }, options.tracked_file_modifications, ) diff --git a/gix-status/src/index_as_worktree_with_renames/types.rs b/gix-status/src/index_as_worktree_with_renames/types.rs index d0e528c1e4b..8a42948aea1 100644 --- a/gix-status/src/index_as_worktree_with_renames/types.rs +++ b/gix-status/src/index_as_worktree_with_renames/types.rs @@ -342,6 +342,11 @@ pub struct Context<'a> { pub should_interrupt: &'a AtomicBool, /// The context for the directory walk. pub dirwalk: DirwalkContext<'a>, + /// An optional pre-populated metadata cache for faster status checks on Windows. + /// + /// See [`crate::index_as_worktree::Context::metadata_cache`] for details. + #[cfg(windows)] + pub metadata_cache: Option<&'a crate::metadata_cache::MetadataCache>, } /// All information that is required to perform a [dirwalk](gix_dir::walk()). diff --git a/gix-status/src/lib.rs b/gix-status/src/lib.rs index b49a086182f..9ab4245c61e 100644 --- a/gix-status/src/lib.rs +++ b/gix-status/src/lib.rs @@ -37,6 +37,19 @@ use portable_atomic::AtomicU64; pub mod index_as_worktree; pub use index_as_worktree::function::index_as_worktree; +/// The metadata cache is a **Windows-only** optimization. Its job is to skip +/// per-file `lstat` calls by pre-populating stat results via one batched +/// directory enumeration. That trade only pays off where per-file stat is +/// expensive (Windows), not on Linux/macOS where `lstat` is sub-microsecond. +/// A Linux-friendly cache would almost certainly be keyed by *directory* (à la +/// git's `UNTRACKED_CACHE`) rather than by file path, so forcing this type to +/// exist there would encourage the wrong abstraction. Keep the two separate; +/// lift this gate if a cross-platform use case actually appears. +#[cfg(windows)] +pub mod metadata_cache; +#[cfg(windows)] +pub use metadata_cache::{CachedMetadata, MetadataCache}; + #[cfg(feature = "worktree-rewrites")] pub mod index_as_worktree_with_renames; #[cfg(feature = "worktree-rewrites")] diff --git a/gix-status/src/metadata_cache.rs b/gix-status/src/metadata_cache.rs new file mode 100644 index 00000000000..ac29c5b6548 --- /dev/null +++ b/gix-status/src/metadata_cache.rs @@ -0,0 +1,564 @@ +//! Windows-only metadata cache — see the gate on `pub mod metadata_cache` in +//! [`crate`] for why this is Windows-only. +//! +//! [`prepare`] batches a parallel GetFileInformationByHandleEx walk of the +//! worktree (~30 ms / 90 k files) into a [`MetadataCache`] keyed by +//! worktree-relative path. index_as_worktree looks up each index entry +//! there instead of calling lstat (~1 s for the same tree). +//! +//! The cache is **transparent**: empty/partial/extra entries change speed +//! only, never correctness — misses fall through to a live syscall. + +use std::path::Path; + +use bstr::BString; + +/// Cached file metadata. +/// +/// Carries enough information to determine file type, detect mode changes, +/// build a [`gix_index::entry::Stat`] for comparison, and short-circuit content +/// reads via file size. +/// +/// All fields are platform-agnostic. When populating from a source that doesn't +/// provide some fields (e.g. Windows directory enumeration doesn't expose +/// `ino`/`uid`/`gid`), leave those as `0`/`false`. The status pipeline's stat +/// comparison on Windows treats zeros as "equal-by-default" for those fields. +#[derive(Debug, Clone, Default)] +pub struct CachedMetadata { + /// Whether this is a directory. + pub is_dir: bool, + /// Whether this is a symlink (or reparse point on Windows). + pub is_symlink: bool, + /// Whether the file has the executable bit set. + /// + /// Always `false` when populated from Windows batch enumeration — git on + /// Windows defaults to `core.filemode=false`, so the bit isn't tracked there. + pub is_executable: bool, + /// File size in bytes. + pub size: u64, + /// Modification time — seconds since Unix epoch. + pub mtime_secs: u32, + /// Modification time — nanoseconds component. + pub mtime_nsecs: u32, + /// Status/creation time — seconds since Unix epoch. + /// + /// On Windows this must be populated from the real `CreationTime`, not `mtime`: + /// the stat comparison in the status pipeline compares `ctime.secs` by default + /// (`trust_ctime=true`), and faking `ctime=mtime` causes spurious mismatches + /// for any file whose creation-time and modification-time differ. + pub ctime_secs: u32, + /// Status/creation time — nanoseconds component. + pub ctime_nsecs: u32, + /// Device ID. Set to 0 if not available (always 0 on Windows). + pub dev: u64, + /// Inode number. Set to 0 if not available (always 0 on Windows). + pub ino: u64, + /// User ID. Set to 0 if not available (always 0 on Windows). + pub uid: u32, + /// Group ID. Set to 0 if not available (always 0 on Windows). + pub gid: u32, +} + +impl CachedMetadata { + /// Convert to gitoxide's [`Stat`](gix_index::entry::Stat) struct for index comparison. + /// + /// Truncates `dev`, `ino`, and `size` from 64 to 32 bits — matching what + /// [`gix_index::entry::stat::Stat::from_fs`] does on Unix, so both code + /// paths compare the same quantities. + pub fn to_stat(&self) -> gix_index::entry::Stat { + gix_index::entry::Stat { + mtime: gix_index::entry::stat::Time { + secs: self.mtime_secs, + nsecs: self.mtime_nsecs, + }, + ctime: gix_index::entry::stat::Time { + secs: self.ctime_secs, + nsecs: self.ctime_nsecs, + }, + dev: self.dev as u32, + ino: self.ino as u32, + uid: self.uid, + gid: self.gid, + size: self.size as u32, + } + } +} + +/// Metadata cache: maps worktree-relative paths (forward-slashed, case-normalized +/// per [`normalize_path`]) to cached metadata. +pub type MetadataCache = hashbrown::HashMap; + +/// Normalize a path for use as a cache key. Lower-cases on Windows (case- +/// insensitive filesystem), which is the only target this module compiles on. +#[inline] +pub fn normalize_path(path: &[u8]) -> BString { + use bstr::ByteSlice; + path.to_str_lossy().to_lowercase().into() +} + +/// Look up cached metadata for a worktree-relative path, avoiding per-lookup heap +/// allocations on the common (ASCII) fast path. +/// +/// This is the hot path called once per index entry in `index_as_worktree`. A +/// naive `cache.get(&normalize_path(rela_path))` allocates a fresh `BString` +/// for the lowercased key 90 k+ times per status run — under multi-threaded +/// allocator contention that shows up in wall-clock. This helper lowercases +/// into a stack buffer when the path is ASCII and short, and falls back to +/// [`normalize_path`] for the rare non-ASCII / oversized case. +#[inline] +pub fn lookup<'a>(cache: &'a MetadataCache, rela_path: &[u8]) -> Option<&'a CachedMetadata> { + const STACK_BUF: usize = 256; + if rela_path.len() <= STACK_BUF && rela_path.is_ascii() { + let mut buf = [0u8; STACK_BUF]; + for (dst, &src) in buf.iter_mut().zip(rela_path.iter()) { + // ASCII-only fast lowercase: bit 0x20 toggles case for A-Z. + *dst = if src.is_ascii_uppercase() { src | 0x20 } else { src }; + } + return cache.get(&buf[..rela_path.len()] as &[u8]); + } + // Slow path: non-ASCII (needs Unicode case folding) or path longer than + // the stack buffer. Cache stores Unicode-lowercased keys, so we match + // that here. + let key = normalize_path(rela_path); + cache.get(key.as_slice()) +} + +/// Prepare a metadata cache by walking the worktree in parallel using +/// `GetFileInformationByHandleEx` with `FileIdBothDirectoryInfo`, skipping +/// subtrees flagged by the per-thread predicate produced by `make_excludes`. +/// +/// The returned cache can be attached to the status pipeline via +/// [`Context::metadata_cache`](crate::index_as_worktree::Context::metadata_cache) +/// — cache hits skip per-file syscalls. +/// +/// `thread_limit` caps parallelism. `None` uses all available cores; `Some(1)` +/// is single-threaded. +/// +/// `make_excludes` is called once on each worker thread and returns a predicate +/// that owns thread-local state (e.g. a `gix_worktree::Stack`). Each time the +/// walker is about to descend into a subdirectory, it calls the predicate with +/// the worktree-relative path; returning `true` skips that subtree. Callers +/// that don't need gitignore pruning can pass `|| |_: &bstr::BStr| false`, but +/// for typical projects with fat ignored dirs (`node_modules`, `target`) the +/// wasted enumeration makes the cache net-slower than plain per-file stats. +pub fn prepare(worktree: &Path, thread_limit: Option, make_excludes: F) -> std::io::Result +where + F: Fn() -> E + Sync, + E: FnMut(&bstr::BStr) -> bool, +{ + windows::walk_worktree_parallel(worktree, thread_limit, make_excludes) +} + +/// Windows-specific implementation using `GetFileInformationByHandleEx` / +/// `FileIdBothDirectoryInfo`. Work-stealing across threads via `thread::scope`. +#[allow(unsafe_code)] +mod windows { + use super::*; + use std::collections::VecDeque; + use std::ffi::{OsString, c_void}; + use std::os::windows::ffi::{OsStrExt, OsStringExt}; + use std::sync::{Condvar, Mutex}; + use std::thread; + + use windows_sys::Win32::Foundation::{CloseHandle, INVALID_HANDLE_VALUE}; + use windows_sys::Win32::Storage::FileSystem::{ + CreateFileW, FILE_ATTRIBUTE_DIRECTORY, FILE_ATTRIBUTE_REPARSE_POINT, FILE_FLAG_BACKUP_SEMANTICS, + FILE_ID_BOTH_DIR_INFO, FILE_LIST_DIRECTORY, FILE_SHARE_DELETE, FILE_SHARE_READ, FILE_SHARE_WRITE, + FileIdBothDirectoryInfo, GetFileInformationByHandleEx, OPEN_EXISTING, SYNCHRONIZE, + }; + + /// 64 KiB, u64-aligned — `FILE_ID_BOTH_DIR_INFO` contains LARGE_INTEGER fields that + /// require 8-byte alignment, and `Vec` guarantees it. Hoisted to the worker so + /// 6k+ directory walks reuse one allocation instead of allocating per call. + const BUFFER_U64S: usize = 8 * 1024; + + /// Work item for the parallel walker: (null-terminated UTF-16 absolute path, relative prefix). + /// + /// The path is stored pre-encoded so `CreateFileW` on the child can reuse the parent's + /// allocation without re-traversing `PathBuf`/`OsStr` each time. + type WorkItem = (Vec, String); + + /// Convert FILE_ID_BOTH_DIR_INFO to CachedMetadata. + fn cached_from_info(info: &FILE_ID_BOTH_DIR_INFO) -> CachedMetadata { + let size = info.EndOfFile as u64; + + // FILETIME values are LARGE_INTEGER holding 100ns intervals since 1601-01-01 UTC. + // `ctime` must come from `CreationTime` (not mtime): `gix_index::entry::stat::from_fs` + // on Windows populates ctime from `Metadata::created()`, which is CreationTime. If we + // faked ctime=mtime here, stat comparison would spuriously fail for any file where + // creation-time and modification-time differ, forcing an unnecessary content hash. + let (mtime_secs, mtime_nsecs) = filetime_to_unix(info.LastWriteTime as u64); + let (ctime_secs, ctime_nsecs) = filetime_to_unix(info.CreationTime as u64); + + let is_dir = (info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + let is_symlink = (info.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0; + + // The executable bit, dev, ino, uid, and gid aren't exposed by Windows + // directory enumeration. Git on Windows defaults to core.filemode=false + // (so is_executable is ignored anyway); the remaining fields are only + // compared against matching zeros from `Stat::from_fs`'s Windows branch. + CachedMetadata { + is_dir, + is_symlink, + is_executable: false, + size, + mtime_secs, + mtime_nsecs, + ctime_secs, + ctime_nsecs, + dev: 0, + ino: 0, + uid: 0, + gid: 0, + } + } + + /// Convert a Windows FILETIME (100ns intervals since 1601-01-01 UTC) to Unix (secs, nsecs). + fn filetime_to_unix(ft: u64) -> (u32, u32) { + const EPOCH_DIFF: u64 = 116_444_736_000_000_000; + let unix_100ns = ft.saturating_sub(EPOCH_DIFF); + let secs = (unix_100ns / 10_000_000) as u32; + let nsecs = ((unix_100ns % 10_000_000) * 100) as u32; + (secs, nsecs) + } + + /// Build a null-terminated UTF-16 absolute path for `parent\name`. + fn join_utf16(parent: &[u16], name: &[u16]) -> Vec { + // Parent is null-terminated; drop the trailing NUL before joining. + let parent = parent.strip_suffix(&[0u16]).unwrap_or(parent); + let mut out = Vec::with_capacity(parent.len() + 1 + name.len() + 1); + out.extend_from_slice(parent); + if out.last().copied() != Some(b'\\' as u16) { + out.push(b'\\' as u16); + } + out.extend_from_slice(name); + out.push(0); + out + } + + /// Convert a filesystem path into a null-terminated UTF-16 buffer suitable for `CreateFileW`. + fn utf16_null_terminated(path: &Path) -> Vec { + let mut v: Vec = path.as_os_str().encode_wide().collect(); + v.push(0); + v + } + + /// Check if a UTF-16 name equals exactly ASCII ".git" (case-sensitive, matching the + /// prior behaviour). This is intentional: on Windows a mis-cased `.Git` is the same + /// file to the filesystem but conventionally never appears, and the cache is + /// look-through — a missed skip just means one extra cached entry that will be + /// ignored by the status pipeline. + fn name_is_dotgit(name: &[u16]) -> bool { + name.len() == 4 + && name[0] == b'.' as u16 + && name[1] == b'g' as u16 + && name[2] == b'i' as u16 + && name[3] == b't' as u16 + } + + /// Result type for directory walking to simplify the return type. + type WalkResult = (Vec<(BString, CachedMetadata)>, Vec); + + /// Walk a single directory using `GetFileInformationByHandleEx` with + /// `FileIdBothDirectoryInfo`. + /// + /// Returns (cacheable entries, subdirectories to recurse into). `buffer` is a + /// reusable 64 KiB u64-aligned scratch buffer; reusing it across calls avoids + /// a heap allocation per directory (6k+ per worktree on the Linux kernel). + fn walk_directory(dir_path: &[u16], rel_prefix: &str, buffer: &mut [u64]) -> std::io::Result { + let mut files = Vec::new(); + let mut subdirs = Vec::new(); + + let handle = unsafe { + CreateFileW( + dir_path.as_ptr(), + FILE_LIST_DIRECTORY | SYNCHRONIZE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + std::ptr::null(), + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS, + std::ptr::null_mut(), + ) + }; + + if handle == INVALID_HANDLE_VALUE { + // Directory doesn't exist or can't be read - not an error for a look-through cache. + return Ok((files, subdirs)); + } + + let buffer_bytes = (buffer.len() * 8) as u32; + + loop { + let success = unsafe { + GetFileInformationByHandleEx( + handle, + FileIdBothDirectoryInfo, + buffer.as_mut_ptr().cast::(), + buffer_bytes, + ) + }; + if success == 0 { + // End of enumeration (ERROR_NO_MORE_FILES) or access denied / similar. + // Either way, stop: the cache is best-effort and correctness falls back + // to per-file syscalls in `index_as_worktree`. + break; + } + + let mut offset = 0usize; + loop { + let info_ptr = unsafe { buffer.as_ptr().cast::().add(offset).cast::() }; + let info = unsafe { &*info_ptr }; + + let name_len = (info.FileNameLength / 2) as usize; + let name_slice = unsafe { std::slice::from_raw_parts(info.FileName.as_ptr(), name_len) }; + + let is_dot = name_len == 1 && name_slice[0] == b'.' as u16; + let is_dotdot = name_len == 2 && name_slice[0] == b'.' as u16 && name_slice[1] == b'.' as u16; + + if !is_dot && !is_dotdot && !name_is_dotgit(name_slice) { + let is_dir = (info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + let is_reparse = (info.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0; + + let name_str = OsString::from_wide(name_slice).to_string_lossy().into_owned(); + let rel_path = if rel_prefix.is_empty() { + name_str + } else { + format!("{rel_prefix}/{name_str}") + }; + + let meta = cached_from_info(info); + let normalized = normalize_path(rel_path.as_bytes()); + files.push((normalized, meta)); + + if is_dir && !is_reparse { + let child = join_utf16(dir_path, name_slice); + subdirs.push((child, rel_path)); + } + } + + if info.NextEntryOffset == 0 { + break; + } + offset += info.NextEntryOffset as usize; + } + } + + unsafe { CloseHandle(handle) }; + Ok((files, subdirs)) + } + + /// A directory the walk hasn't descended into yet, plus a count of workers + /// currently processing work so the last one out can tell the others to exit. + struct WorkQueue { + dirs: VecDeque, + active_workers: usize, + } + + /// Walk the worktree using work-stealing parallelism. + pub fn walk_worktree_parallel( + worktree: &Path, + thread_limit: Option, + make_excludes: F, + ) -> std::io::Result + where + F: Fn() -> E + Sync, + E: FnMut(&bstr::BStr) -> bool, + { + let num_threads = thread_limit + .unwrap_or_else(|| { + std::thread::available_parallelism() + .map(std::num::NonZero::get) + .unwrap_or(4) + }) + .max(1); + + if num_threads == 1 { + return walk_worktree_single_threaded(worktree, make_excludes()); + } + + let queue_mutex = Mutex::new(WorkQueue { + dirs: VecDeque::from([(utf16_null_terminated(worktree), String::new())]), + active_workers: 0, + }); + let cvar = Condvar::new(); + let shared_cache = Mutex::new(MetadataCache::default()); + + thread::scope(|s| { + for _ in 0..num_threads { + let make_excludes = &make_excludes; + s.spawn(|| worker(&queue_mutex, &cvar, &shared_cache, make_excludes())); + } + }); + + Ok(shared_cache.into_inner().unwrap()) + } + + /// One worker of the parallel walker. Grabs batches of directories from the + /// shared queue, walks them into a thread-local cache, and pushes any discovered + /// subdirectories back onto the queue. Exits when the queue is drained and no + /// worker is still producing. + /// + /// `is_excluded` is a thread-local predicate that returns true for directories + /// whose contents should be skipped (gitignored). The excluded directory's own + /// metadata entry is still cached; only recursion is avoided. + fn worker bool>( + queue_mutex: &Mutex, + cvar: &Condvar, + shared_cache: &Mutex, + mut is_excluded: E, + ) { + let mut local_cache = MetadataCache::default(); + let mut local_stack: Vec = Vec::new(); + let mut buffer = vec![0u64; BUFFER_U64S]; + + loop { + // Claim work, or exit if the walk is done. + { + let mut queue = queue_mutex.lock().unwrap(); + loop { + // Steal up to half of the queue (capped) to reduce re-locking while + // still leaving work for other threads to pick up. + let take = queue.dirs.len().div_ceil(2).min(32); + if take > 0 { + local_stack.extend(queue.dirs.drain(..take)); + queue.active_workers += 1; + break; + } + if queue.active_workers == 0 { + // Queue is empty and no one is producing more work: we're done. + cvar.notify_all(); + shared_cache.lock().unwrap().extend(local_cache); + return; + } + queue = cvar.wait(queue).unwrap(); + } + } + + // Process the claimed directories outside the lock. + let mut new_dirs: Vec = Vec::new(); + while let Some((dir, rel_prefix)) = local_stack.pop() { + if let Ok((files, subdirs)) = walk_directory(&dir, &rel_prefix, &mut buffer) { + local_cache.extend(files); + for (child_path, child_rel) in subdirs { + if !is_excluded(child_rel.as_bytes().into()) { + new_dirs.push((child_path, child_rel)); + } + } + } + } + + // Return discovered subdirectories; wake anyone waiting. + let mut queue = queue_mutex.lock().unwrap(); + queue.dirs.extend(new_dirs); + queue.active_workers -= 1; + cvar.notify_all(); + } + } + + /// Simple single-threaded walk for thread_limit=1. + fn walk_worktree_single_threaded bool>( + worktree: &Path, + mut is_excluded: E, + ) -> std::io::Result { + let mut cache = MetadataCache::default(); + let mut dir_stack: Vec = vec![(utf16_null_terminated(worktree), String::new())]; + let mut buffer = vec![0u64; BUFFER_U64S]; + + while let Some((dir, rel_prefix)) = dir_stack.pop() { + if let Ok((files, subdirs)) = walk_directory(&dir, &rel_prefix, &mut buffer) { + cache.extend(files); + for (child_path, child_rel) in subdirs { + if !is_excluded(child_rel.as_bytes().into()) { + dir_stack.push((child_path, child_rel)); + } + } + } + } + + Ok(cache) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_path() { + assert_eq!(normalize_path(b"Foo/Bar.txt"), BString::from("foo/bar.txt")); + assert_eq!(normalize_path(b"UPPER/CASE"), BString::from("upper/case")); + } + + #[test] + fn test_cached_metadata_to_stat() { + let cached = CachedMetadata { + is_dir: false, + is_symlink: false, + is_executable: true, + size: 1234, + mtime_secs: 1700000000, + mtime_nsecs: 500_000_000, + ctime_secs: 1699999999, + ctime_nsecs: 100_000_000, + dev: 123, + ino: 456, + uid: 1000, + gid: 1000, + }; + let stat = cached.to_stat(); + assert_eq!(stat.size, 1234); + assert_eq!(stat.mtime.secs, 1700000000); + assert_eq!(stat.mtime.nsecs, 500_000_000); + assert_eq!(stat.ctime.secs, 1699999999); + assert_eq!(stat.ctime.nsecs, 100_000_000); + assert_eq!(stat.dev, 123); + assert_eq!(stat.ino, 456); + assert_eq!(stat.uid, 1000); + assert_eq!(stat.gid, 1000); + } + + #[test] + fn test_lookup_round_trips_through_normalize_path() { + // Insert using the same path normalization the walker uses. + let mut cache = MetadataCache::default(); + let meta = CachedMetadata { + size: 42, + ..Default::default() + }; + cache.insert(normalize_path(b"src/foo.rs"), meta.clone()); + + // Exact-case and mixed-case both hit (Windows is case-insensitive). + assert!(lookup(&cache, b"src/foo.rs").is_some()); + assert!(lookup(&cache, b"SRC/Foo.rs").is_some()); + + // Non-ASCII path routes through the fallback allocating path; exact-key + // round-trip still works. + cache.insert(normalize_path("ünïcode.txt".as_bytes()), meta); + assert!(lookup(&cache, "ünïcode.txt".as_bytes()).is_some()); + } + + #[test] + fn test_prepare_returns_cache() { + // Use a unique temp directory to avoid walking other files. + use std::time::{SystemTime, UNIX_EPOCH}; + let timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); + let temp_dir = std::env::temp_dir().join(format!("gix_status_test_{timestamp}")); + std::fs::create_dir_all(&temp_dir).unwrap(); + + let test_file = temp_dir.join("test.txt"); + std::fs::write(&test_file, b"hello").unwrap(); + + let subdir = temp_dir.join("subdir"); + std::fs::create_dir(&subdir).unwrap(); + let nested_file = subdir.join("nested.txt"); + std::fs::write(&nested_file, b"world").unwrap(); + + let cache = prepare(&temp_dir, Some(1), || |_: &bstr::BStr| false).unwrap(); + + assert!(!cache.is_empty()); + assert!(cache.contains_key(&normalize_path(b"test.txt"))); + assert!(cache.contains_key(&normalize_path(b"subdir/nested.txt"))); + + let _ = std::fs::remove_dir_all(&temp_dir); + } +} diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 57e36674498..90ba59ab54e 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -187,6 +187,8 @@ fn fixture_filtered_detailed( }, ), should_interrupt: &AtomicBool::default(), + #[cfg(windows)] + metadata_cache: None, }; let options = Options { fs: fs_capabilities.map_or_else(|| gix_fs::Capabilities::probe(&git_dir), |new| new(&git_dir)), @@ -1054,6 +1056,8 @@ fn racy_git() { stack, filter: Default::default(), should_interrupt: &AtomicBool::default(), + #[cfg(windows)] + metadata_cache: None, }; let out = index_as_worktree( &index, diff --git a/gix-status/tests/status/index_as_worktree_with_renames.rs b/gix-status/tests/status/index_as_worktree_with_renames.rs index 878c58c6590..94c8eeb6797 100644 --- a/gix-status/tests/status/index_as_worktree_with_renames.rs +++ b/gix-status/tests/status/index_as_worktree_with_renames.rs @@ -339,6 +339,8 @@ fn fixture_filtered_detailed( current_dir: &cwd, ignore_case_index_lookup: None, }, + #[cfg(windows)] + metadata_cache: None, }; let options = Options { object_hash, diff --git a/gix/src/status/index_worktree.rs b/gix/src/status/index_worktree.rs index 9b7dc060f29..266aa9f29ef 100644 --- a/gix/src/status/index_worktree.rs +++ b/gix/src/status/index_worktree.rs @@ -28,6 +28,12 @@ pub enum Error { StatOptions(#[from] config::stat_options::Error), #[error(transparent)] ResourceCache(#[from] crate::diff::resource_cache::Error), + #[cfg(windows)] + #[error("Failed to prepare metadata cache")] + PrepareMetadataCache(#[from] std::io::Error), + #[cfg(windows)] + #[error(transparent)] + OpenIndex(#[from] crate::worktree::open_index::Error), } /// Options for use with [Repository::index_worktree_status()]. @@ -82,6 +88,8 @@ impl Repository { /// - A flag to stop the whole operation. /// * `options` /// - Additional configuration for all parts of the operation. + /// * `metadata_cache` *(Windows only)* + /// - Optional pre-populated metadata cache; see gix_status::metadata_cache. /// /// ### Note /// @@ -101,6 +109,7 @@ impl Repository { progress: &mut dyn gix_features::progress::Progress, should_interrupt: &AtomicBool, options: Options, + #[cfg(windows)] metadata_cache: Option<&gix_status::MetadataCache>, ) -> Result where T: Send + Clone, @@ -149,6 +158,8 @@ impl Repository { current_dir: cwd, ignore_case_index_lookup: accelerate_lookup.as_ref(), }, + #[cfg(windows)] + metadata_cache, }, gix_status::index_as_worktree_with_renames::Options { sorting: options.sorting, diff --git a/gix/src/status/iter/mod.rs b/gix/src/status/iter/mod.rs index 1b860099206..cee8506b051 100644 --- a/gix/src/status/iter/mod.rs +++ b/gix/src/status/iter/mod.rs @@ -65,6 +65,14 @@ where .unwrap_or_default(); let should_interrupt = self.should_interrupt.clone().unwrap_or_default(); let submodule = BuiltinSubmoduleStatus::new(self.repo.clone().into_sync(), self.submodules)?; + #[cfg(windows)] + let metadata_cache = match self.metadata_cache { + crate::status::MetadataCacheConfig::Provided(cache) => Some(cache), + crate::status::MetadataCacheConfig::Disabled => None, + // Best-effort: if the prep walk fails (missing workdir, syscall error), + // silently fall through to stat-based status rather than abort. + crate::status::MetadataCacheConfig::Auto => crate::status::build_metadata_cache(self.repo, None).ok(), + }; #[cfg(feature = "parallel")] { let (tx, rx) = std::sync::mpsc::channel(); @@ -134,6 +142,8 @@ where &mut progress, &should_interrupt, options, + #[cfg(windows)] + metadata_cache.as_ref(), )?; Ok(Outcome { index_worktree: out, @@ -197,6 +207,8 @@ where &mut progress, &should_interrupt, options, + #[cfg(windows)] + metadata_cache.as_ref(), )?; let mut iter = Iter { items: Vec::new().into_iter(), diff --git a/gix/src/status/mod.rs b/gix/src/status/mod.rs index 121bb63aa02..d617fe88055 100644 --- a/gix/src/status/mod.rs +++ b/gix/src/status/mod.rs @@ -15,6 +15,23 @@ where index_worktree_options: index_worktree::Options, tree_index_renames: tree_index::TrackRenames, should_interrupt: Option, + #[cfg(windows)] + metadata_cache: MetadataCacheConfig, +} + +/// Windows-only: controls the metadata cache. `Auto` (default) trades a +/// one-shot gitignore-aware worktree walk (~30 ms / 90 k files) for avoiding +/// per-file `lstat` during status (~1 s for the same tree). +#[cfg(windows)] +#[derive(Default)] +pub enum MetadataCacheConfig { + /// Prepare the cache lazily inside the iterator using all cores. + #[default] + Auto, + /// Skip the cache. + Disabled, + /// Use this pre-built cache. + Provided(gix_status::MetadataCache), } /// How to obtain a submodule's status. @@ -114,6 +131,8 @@ impl Repository { rewrites: None, thread_limit: None, }, + #[cfg(windows)] + metadata_cache: MetadataCacheConfig::default(), }; let untracked = self @@ -232,6 +251,46 @@ pub mod into_iter { } } +/// Build a gitignore-aware Windows metadata cache. Shared between the explicit +/// `prepare_index_worktree_metadata_cache` and the Auto branch in `into_iter`. +#[cfg(windows)] +pub(crate) fn build_metadata_cache( + repo: &Repository, + thread_limit: Option, +) -> Result { + let workdir = repo + .workdir() + .ok_or(crate::status::index_worktree::Error::MissingWorkDir)?; + let sync_repo = repo.clone().into_sync(); + let index = repo.index_or_empty()?; + let index_state: &gix_index::State = &index; + + let make_excludes = || -> Box bool> { + let thread_repo = sync_repo.to_thread_local(); + let Ok(stack) = thread_repo.excludes( + index_state, + None, + gix_worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, + ) else { + return Box::new(|_| false); + }; + let mut stack = stack.detach(); + let objects = thread_repo.objects.clone(); + Box::new(move |path: &crate::bstr::BStr| -> bool { + stack + .at_entry(path, Some(gix_index::entry::Mode::DIR), &objects) + .map(|p| p.is_excluded()) + .unwrap_or(false) + }) + }; + + Ok(gix_status::metadata_cache::prepare( + workdir, + thread_limit, + make_excludes, + )?) +} + mod platform; /// diff --git a/gix/src/status/platform.rs b/gix/src/status/platform.rs index 4faa69e48df..72bd115ccf6 100644 --- a/gix/src/status/platform.rs +++ b/gix/src/status/platform.rs @@ -125,3 +125,36 @@ where self } } + +/// Windows-only metadata-cache builder methods. See +/// [`crate::status::MetadataCacheConfig`] for the default-on behaviour. +#[cfg(windows)] +impl Platform<'_, Progress> +where + Progress: gix_features::progress::Progress, +{ + /// Use `cache` instead of building one. For out-of-band prep (e.g. file- + /// watcher refresh) reused across status calls. + pub fn index_worktree_metadata_cache(mut self, cache: gix_status::MetadataCache) -> Self { + self.metadata_cache = crate::status::MetadataCacheConfig::Provided(cache); + self + } + + /// Skip the metadata cache. Prefer the Auto default unless measured. + pub fn disable_index_worktree_metadata_cache(mut self) -> Self { + self.metadata_cache = crate::status::MetadataCacheConfig::Disabled; + self + } + + /// Eagerly prepare the cache with a specific `thread_limit` (`Some(1)` = + /// single-threaded, `None` = all cores). Use this to pick parallelism or + /// to fail-fast before building the iterator. + pub fn prepare_index_worktree_metadata_cache( + mut self, + thread_limit: Option, + ) -> Result { + let cache = crate::status::build_metadata_cache(self.repo, thread_limit)?; + self.metadata_cache = crate::status::MetadataCacheConfig::Provided(cache); + Ok(self) + } +} From b755a3b9ad9d92962eb7ff21a73fd1a6bb0a8ee6 Mon Sep 17 00:00:00 2001 From: Special Bread <250742548+special-bread@users.noreply.github.com> Date: Thu, 7 May 2026 20:43:19 +0100 Subject: [PATCH 2/4] Fix edge case on windows status with case sensitive collisions Follow up to git status performance improvement, this fixes an edge case where a case sensitive entry in the cache gets lowercased and matches a second case sensitive entry in the tree, potentially resulting in incorrect git status entries. Skipping lowercasing entirely results in those cases being a cache miss instead making it more transparent. --- gix-status/src/index_as_worktree/function.rs | 20 ++--- gix-status/src/metadata_cache.rs | 83 ++++++-------------- 2 files changed, 34 insertions(+), 69 deletions(-) diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index 60d9c4e8125..3a14fb46410 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -12,6 +12,8 @@ use gix_filter::pipeline::convert::ToGitOutcome; use gix_object::FindExt; use crate::index_as_worktree::types::ConflictIndexEntry; +#[cfg(windows)] +use crate::metadata_cache::{CachedMetadata, MetadataCache}; use crate::{ AtomicU64, SymlinkCheck, index_as_worktree::{ @@ -21,8 +23,6 @@ use crate::{ }, is_dir_to_mode, }; -#[cfg(windows)] -use crate::metadata_cache::{self, CachedMetadata, MetadataCache}; /// Windows-only union of live `lstat` metadata and pre-cached metadata, so /// `compute_status` sees one shape. Other platforms use `gix_index::fs::Metadata` @@ -448,14 +448,14 @@ impl<'index> State<'_, 'index> { // only fall back to a syscall on miss; on other platforms per-file // `lstat` is already fast, so we just do the syscall directly. #[cfg(windows)] - let metadata = if let Some(cached) = self.metadata_cache.and_then(|c| metadata_cache::lookup(c, rela_path)) { + let metadata = if let Some(cached) = self.metadata_cache.and_then(|c| c.get(rela_path)) { FileMetadata::Cached(cached) } else { self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); match gix_index::fs::Metadata::from_path_no_follow(worktree_path) { Ok(m) => FileMetadata::Live(m), Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => { - return Ok(Some(Change::Removed.into())) + return Ok(Some(Change::Removed.into())); } Err(err) => return Err(Error::Io(err.into())), } @@ -466,7 +466,7 @@ impl<'index> State<'_, 'index> { match gix_index::fs::Metadata::from_path_no_follow(worktree_path) { Ok(m) => m, Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => { - return Ok(Some(Change::Removed.into())) + return Ok(Some(Change::Removed.into())); } Err(err) => return Err(Error::Io(err.into())), } @@ -499,12 +499,12 @@ impl<'index> State<'_, 'index> { let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; #[cfg(windows)] - let mode_change = - metadata.mode_change(entry.mode, self.options.fs.symlink, self.options.fs.executable_bit); + let mode_change = metadata.mode_change(entry.mode, self.options.fs.symlink, self.options.fs.executable_bit); #[cfg(not(windows))] - let mode_change = entry - .mode - .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit); + let mode_change = + entry + .mode + .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit); let executable_bit_changed = match mode_change { Some(gix_index::entry::mode::Change::Type { new_mode }) => { return Ok(Some( diff --git a/gix-status/src/metadata_cache.rs b/gix-status/src/metadata_cache.rs index ac29c5b6548..db60e22045b 100644 --- a/gix-status/src/metadata_cache.rs +++ b/gix-status/src/metadata_cache.rs @@ -84,44 +84,18 @@ impl CachedMetadata { } } -/// Metadata cache: maps worktree-relative paths (forward-slashed, case-normalized -/// per [`normalize_path`]) to cached metadata. -pub type MetadataCache = hashbrown::HashMap; - -/// Normalize a path for use as a cache key. Lower-cases on Windows (case- -/// insensitive filesystem), which is the only target this module compiles on. -#[inline] -pub fn normalize_path(path: &[u8]) -> BString { - use bstr::ByteSlice; - path.to_str_lossy().to_lowercase().into() -} - -/// Look up cached metadata for a worktree-relative path, avoiding per-lookup heap -/// allocations on the common (ASCII) fast path. +/// Metadata cache: maps worktree-relative paths (forward-slashed, in the exact +/// case as enumerated from disk) to cached metadata. /// -/// This is the hot path called once per index entry in `index_as_worktree`. A -/// naive `cache.get(&normalize_path(rela_path))` allocates a fresh `BString` -/// for the lowercased key 90 k+ times per status run — under multi-threaded -/// allocator contention that shows up in wall-clock. This helper lowercases -/// into a stack buffer when the path is ASCII and short, and falls back to -/// [`normalize_path`] for the rare non-ASCII / oversized case. -#[inline] -pub fn lookup<'a>(cache: &'a MetadataCache, rela_path: &[u8]) -> Option<&'a CachedMetadata> { - const STACK_BUF: usize = 256; - if rela_path.len() <= STACK_BUF && rela_path.is_ascii() { - let mut buf = [0u8; STACK_BUF]; - for (dst, &src) in buf.iter_mut().zip(rela_path.iter()) { - // ASCII-only fast lowercase: bit 0x20 toggles case for A-Z. - *dst = if src.is_ascii_uppercase() { src | 0x20 } else { src }; - } - return cache.get(&buf[..rela_path.len()] as &[u8]); - } - // Slow path: non-ASCII (needs Unicode case folding) or path longer than - // the stack buffer. Cache stores Unicode-lowercased keys, so we match - // that here. - let key = normalize_path(rela_path); - cache.get(key.as_slice()) -} +/// Lookups are case-sensitive: callers must query with the same case the walker +/// emitted. On a case-insensitive worktree where the index path's case differs +/// from disk, the lookup misses and `index_as_worktree` falls back to a live +/// `lstat` — a few extra syscalls in a rare scenario. Folding cases together +/// would silently merge distinct files on case-sensitive volumes (Windows +/// per-directory case-sensitivity, NTFS POSIX mode), which would let the cache +/// return one file's stat for a query about another and silently misreport +/// tracked-file status. That's strictly worse than a few cache misses. +pub type MetadataCache = hashbrown::HashMap; /// Prepare a metadata cache by walking the worktree in parallel using /// `GetFileInformationByHandleEx` with `FileIdBothDirectoryInfo`, skipping @@ -327,13 +301,11 @@ mod windows { }; let meta = cached_from_info(info); - let normalized = normalize_path(rel_path.as_bytes()); - files.push((normalized, meta)); - if is_dir && !is_reparse { let child = join_utf16(dir_path, name_slice); - subdirs.push((child, rel_path)); + subdirs.push((child, rel_path.clone())); } + files.push((rel_path.into_bytes().into(), meta)); } if info.NextEntryOffset == 0 { @@ -483,12 +455,6 @@ mod windows { mod tests { use super::*; - #[test] - fn test_normalize_path() { - assert_eq!(normalize_path(b"Foo/Bar.txt"), BString::from("foo/bar.txt")); - assert_eq!(normalize_path(b"UPPER/CASE"), BString::from("upper/case")); - } - #[test] fn test_cached_metadata_to_stat() { let cached = CachedMetadata { @@ -518,23 +484,22 @@ mod tests { } #[test] - fn test_lookup_round_trips_through_normalize_path() { - // Insert using the same path normalization the walker uses. + fn test_lookup_is_case_sensitive() { + // The cache is keyed by the exact path bytes the walker emits. + // Mixed-case lookups miss rather than silently aliasing onto the wrong + // file — a case-insensitive worktree falls back to a live `lstat` on miss. let mut cache = MetadataCache::default(); let meta = CachedMetadata { size: 42, ..Default::default() }; - cache.insert(normalize_path(b"src/foo.rs"), meta.clone()); + cache.insert(BString::from(b"src/foo.rs".as_slice()), meta.clone()); - // Exact-case and mixed-case both hit (Windows is case-insensitive). - assert!(lookup(&cache, b"src/foo.rs").is_some()); - assert!(lookup(&cache, b"SRC/Foo.rs").is_some()); + assert!(cache.get(&b"src/foo.rs"[..]).is_some()); + assert!(cache.get(&b"SRC/Foo.rs"[..]).is_none()); - // Non-ASCII path routes through the fallback allocating path; exact-key - // round-trip still works. - cache.insert(normalize_path("ünïcode.txt".as_bytes()), meta); - assert!(lookup(&cache, "ünïcode.txt".as_bytes()).is_some()); + cache.insert(BString::from("ünïcode.txt".as_bytes()), meta); + assert!(cache.get("ünïcode.txt".as_bytes()).is_some()); } #[test] @@ -556,8 +521,8 @@ mod tests { let cache = prepare(&temp_dir, Some(1), || |_: &bstr::BStr| false).unwrap(); assert!(!cache.is_empty()); - assert!(cache.contains_key(&normalize_path(b"test.txt"))); - assert!(cache.contains_key(&normalize_path(b"subdir/nested.txt"))); + assert!(cache.contains_key(&b"test.txt"[..])); + assert!(cache.contains_key(&b"subdir/nested.txt"[..])); let _ = std::fs::remove_dir_all(&temp_dir); } From ff6a4274058b9ed693760c273e567fdb2169e833 Mon Sep 17 00:00:00 2001 From: Special Bread <250742548+special-bread@users.noreply.github.com> Date: Sun, 10 May 2026 17:27:07 +0100 Subject: [PATCH 3/4] Cleanup windows status cache into preprocess step --- gix-status/Cargo.toml | 3 +- gix-status/src/index_as_worktree/function.rs | 75 +---- gix-status/src/index_as_worktree/types.rs | 9 +- .../src/index_as_worktree_with_renames/mod.rs | 2 +- .../index_as_worktree_with_renames/types.rs | 6 +- gix-status/src/lib.rs | 18 +- .../{metadata_cache.rs => worktree_stats.rs} | 260 ++++++++++-------- gix-status/tests/status/index_as_worktree.rs | 4 +- .../status/index_as_worktree_with_renames.rs | 2 +- gix/src/status/index_worktree.rs | 17 +- gix/src/status/iter/mod.rs | 16 +- gix/src/status/mod.rs | 49 ++-- gix/src/status/platform.rs | 41 +-- 13 files changed, 221 insertions(+), 281 deletions(-) rename gix-status/src/{metadata_cache.rs => worktree_stats.rs} (70%) diff --git a/gix-status/Cargo.toml b/gix-status/Cargo.toml index c2eedeb6a98..3d3ed2af004 100644 --- a/gix-status/Cargo.toml +++ b/gix-status/Cargo.toml @@ -52,7 +52,8 @@ portable-atomic = "1" windows-sys = { version = "0.61.1", features = [ "Win32_Foundation", "Win32_Storage_FileSystem", - # For SECURITY_ATTRIBUTES in CreateFileW (used by metadata_cache::prepare on Windows). + # Needed for `SECURITY_ATTRIBUTES` in the signature of `CreateFileW`, + # which is gated behind this windows-sys feature even when the call passes null. "Win32_Security", ] } diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index 3a14fb46410..c92bc041f23 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -13,7 +13,7 @@ use gix_object::FindExt; use crate::index_as_worktree::types::ConflictIndexEntry; #[cfg(windows)] -use crate::metadata_cache::{CachedMetadata, MetadataCache}; +use crate::worktree_stats::{FileMetadata, WorktreeStats}; use crate::{ AtomicU64, SymlinkCheck, index_as_worktree::{ @@ -24,65 +24,6 @@ use crate::{ is_dir_to_mode, }; -/// Windows-only union of live `lstat` metadata and pre-cached metadata, so -/// `compute_status` sees one shape. Other platforms use `gix_index::fs::Metadata` -/// directly. -#[cfg(windows)] -enum FileMetadata<'a> { - Live(gix_index::fs::Metadata), - Cached(&'a CachedMetadata), -} - -#[cfg(windows)] -impl FileMetadata<'_> { - fn is_dir(&self) -> bool { - match self { - Self::Live(m) => m.is_dir(), - Self::Cached(c) => c.is_dir, - } - } - - fn is_symlink(&self) -> bool { - match self { - Self::Live(m) => m.is_symlink(), - Self::Cached(c) => c.is_symlink, - } - } - - fn len(&self) -> u64 { - match self { - Self::Live(m) => m.len(), - Self::Cached(c) => c.size, - } - } - - fn to_stat(&self) -> Result { - match self { - Self::Live(m) => gix_index::entry::Stat::from_fs(m), - Self::Cached(c) => Ok(c.to_stat()), - } - } - - fn mode_change( - &self, - entry_mode: gix_index::entry::Mode, - has_symlinks: bool, - executable_bit: bool, - ) -> Option { - match self { - Self::Live(m) => entry_mode.change_to_match_fs(m, has_symlinks, executable_bit), - Self::Cached(c) => entry_mode.change_to_match_fs_with_values( - !c.is_dir && !c.is_symlink, // is_file: regular file (not dir, not symlink) - c.is_dir, - c.is_symlink, - c.is_executable, - has_symlinks, - executable_bit, - ), - } - } -} - /// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them /// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes, and /// `submodule` which can take a look at submodules in detail to produce status information (BASE version if its conflicting). @@ -125,7 +66,7 @@ pub fn index_as_worktree<'index, T, U, Find, E>( filter, should_interrupt, #[cfg(windows)] - metadata_cache, + worktree_stats, }: Context<'_>, options: Options, ) -> Result @@ -186,7 +127,7 @@ where filter, options, #[cfg(windows)] - metadata_cache, + worktree_stats, skipped_by_pathspec, skipped_by_entry_flags, @@ -293,10 +234,10 @@ struct State<'a, 'b> { filter: gix_filter::Pipeline, path_backing: &'b gix_index::PathStorageRef, options: &'a Options, - /// Optional pre-populated metadata cache for faster status checks on Windows. - /// Cache lookups happen before falling back to per-file syscalls. + /// Optional precomputed worktree stats for faster status checks on Windows. + /// Lookups happen before falling back to per-file syscalls. #[cfg(windows)] - metadata_cache: Option<&'a MetadataCache>, + worktree_stats: Option<&'a WorktreeStats>, skipped_by_pathspec: &'a AtomicUsize, skipped_by_entry_flags: &'a AtomicUsize, @@ -444,11 +385,11 @@ impl<'index> State<'_, 'index> { Err(err) => return Err(Error::Io(err.into())), }; - // Acquire metadata. On Windows we consult the metadata cache first and + // Acquire metadata. On Windows we consult the precomputed stats first and // only fall back to a syscall on miss; on other platforms per-file // `lstat` is already fast, so we just do the syscall directly. #[cfg(windows)] - let metadata = if let Some(cached) = self.metadata_cache.and_then(|c| c.get(rela_path)) { + let metadata = if let Some(cached) = self.worktree_stats.and_then(|c| c.get(rela_path)) { FileMetadata::Cached(cached) } else { self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index 4485f69a30e..98758f75274 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -4,7 +4,7 @@ use bstr::{BStr, BString}; use gix_index::entry; #[cfg(windows)] -use crate::metadata_cache::MetadataCache; +use crate::worktree_stats::WorktreeStats; /// The error returned by [index_as_worktree()`](crate::index_as_worktree()). #[derive(Debug, thiserror::Error)] @@ -58,10 +58,11 @@ pub struct Context<'a> { pub filter: gix_filter::Pipeline, /// A flag to query to learn if cancellation is requested. pub should_interrupt: &'a AtomicBool, - /// Windows-only pre-populated metadata cache. See [`crate::metadata_cache`]. - /// Transparent: `None`/empty/partial are all correct. + /// Windows-only precomputed worktree stats from + /// [`crate::worktree_stats::prepare`]. Look-through: `None`/empty/partial + /// are all correct, misses fall through to a live `lstat`. #[cfg(windows)] - pub metadata_cache: Option<&'a MetadataCache>, + pub worktree_stats: Option<&'a WorktreeStats>, } /// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()). diff --git a/gix-status/src/index_as_worktree_with_renames/mod.rs b/gix-status/src/index_as_worktree_with_renames/mod.rs index c58b356a26b..47e7444f690 100644 --- a/gix-status/src/index_as_worktree_with_renames/mod.rs +++ b/gix-status/src/index_as_worktree_with_renames/mod.rs @@ -154,7 +154,7 @@ pub(super) mod function { filter, should_interrupt: ctx.should_interrupt, #[cfg(windows)] - metadata_cache: ctx.metadata_cache, + worktree_stats: ctx.worktree_stats, }, options.tracked_file_modifications, ) diff --git a/gix-status/src/index_as_worktree_with_renames/types.rs b/gix-status/src/index_as_worktree_with_renames/types.rs index 8a42948aea1..ff8edd50a3b 100644 --- a/gix-status/src/index_as_worktree_with_renames/types.rs +++ b/gix-status/src/index_as_worktree_with_renames/types.rs @@ -342,11 +342,11 @@ pub struct Context<'a> { pub should_interrupt: &'a AtomicBool, /// The context for the directory walk. pub dirwalk: DirwalkContext<'a>, - /// An optional pre-populated metadata cache for faster status checks on Windows. + /// Optional precomputed worktree stats for faster status checks on Windows. /// - /// See [`crate::index_as_worktree::Context::metadata_cache`] for details. + /// See [`crate::index_as_worktree::Context::worktree_stats`] for details. #[cfg(windows)] - pub metadata_cache: Option<&'a crate::metadata_cache::MetadataCache>, + pub worktree_stats: Option<&'a crate::worktree_stats::WorktreeStats>, } /// All information that is required to perform a [dirwalk](gix_dir::walk()). diff --git a/gix-status/src/lib.rs b/gix-status/src/lib.rs index 9ab4245c61e..7cde7d52a45 100644 --- a/gix-status/src/lib.rs +++ b/gix-status/src/lib.rs @@ -37,18 +37,14 @@ use portable_atomic::AtomicU64; pub mod index_as_worktree; pub use index_as_worktree::function::index_as_worktree; -/// The metadata cache is a **Windows-only** optimization. Its job is to skip -/// per-file `lstat` calls by pre-populating stat results via one batched -/// directory enumeration. That trade only pays off where per-file stat is -/// expensive (Windows), not on Linux/macOS where `lstat` is sub-microsecond. -/// A Linux-friendly cache would almost certainly be keyed by *directory* (à la -/// git's `UNTRACKED_CACHE`) rather than by file path, so forcing this type to -/// exist there would encourage the wrong abstraction. Keep the two separate; -/// lift this gate if a cross-platform use case actually appears. +/// **Windows-only** worktree metadata preprocessing. Before per-entry +/// modification checks, one batched parallel directory walk gathers stat +/// results so that `index_as_worktree` can look them up instead of issuing a +/// per-file `lstat`. This trade only pays off where per-file stat is expensive +/// (Windows); on Linux/macOS `lstat` is sub-microsecond and the walk would be +/// pure overhead. #[cfg(windows)] -pub mod metadata_cache; -#[cfg(windows)] -pub use metadata_cache::{CachedMetadata, MetadataCache}; +pub mod worktree_stats; #[cfg(feature = "worktree-rewrites")] pub mod index_as_worktree_with_renames; diff --git a/gix-status/src/metadata_cache.rs b/gix-status/src/worktree_stats.rs similarity index 70% rename from gix-status/src/metadata_cache.rs rename to gix-status/src/worktree_stats.rs index db60e22045b..0dacd12b265 100644 --- a/gix-status/src/metadata_cache.rs +++ b/gix-status/src/worktree_stats.rs @@ -1,39 +1,37 @@ -//! Windows-only metadata cache — see the gate on `pub mod metadata_cache` in -//! [`crate`] for why this is Windows-only. +//! Windows-only worktree metadata preprocessing — see the gate on +//! `pub mod worktree_stats` in [`crate`] for why this is Windows-only. //! -//! [`prepare`] batches a parallel GetFileInformationByHandleEx walk of the -//! worktree (~30 ms / 90 k files) into a [`MetadataCache`] keyed by -//! worktree-relative path. index_as_worktree looks up each index entry -//! there instead of calling lstat (~1 s for the same tree). -//! -//! The cache is **transparent**: empty/partial/extra entries change speed -//! only, never correctness — misses fall through to a live syscall. +//! [`prepare`] runs a single parallel `GetFileInformationByHandleEx` walk +//! of the worktree (~30 ms / 90 k files) and returns a [`WorktreeStats`] +//! map keyed by worktree-relative path. `index_as_worktree` then looks up +//! each index entry there instead of issuing a per-file `lstat` (~1 s for +//! the same tree). The map is **not a long-lived cache**: it is built once +//! per status call and discarded with the iterator. Lookups are +//! transparent — empty, partial, or extra entries change speed only, never +//! correctness, since misses fall through to a live syscall. use std::path::Path; use bstr::BString; -/// Cached file metadata. +/// Pre-computed file metadata produced by [`prepare`] for one worktree entry. /// /// Carries enough information to determine file type, detect mode changes, /// build a [`gix_index::entry::Stat`] for comparison, and short-circuit content /// reads via file size. /// -/// All fields are platform-agnostic. When populating from a source that doesn't -/// provide some fields (e.g. Windows directory enumeration doesn't expose -/// `ino`/`uid`/`gid`), leave those as `0`/`false`. The status pipeline's stat -/// comparison on Windows treats zeros as "equal-by-default" for those fields. +/// Windows-only fields: this module is `#[cfg(windows)]`, and Windows batch +/// directory enumeration doesn't expose `dev`/`ino`/`uid`/`gid` or the +/// executable bit. The status pipeline's stat comparison on Windows compares +/// those `Stat` fields against matching zeros from +/// [`gix_index::entry::Stat::from_fs`]'s Windows branch, and git on Windows +/// defaults to `core.filemode=false`, so all five are simply omitted here. #[derive(Debug, Clone, Default)] -pub struct CachedMetadata { +pub struct WorktreeStat { /// Whether this is a directory. pub is_dir: bool, /// Whether this is a symlink (or reparse point on Windows). pub is_symlink: bool, - /// Whether the file has the executable bit set. - /// - /// Always `false` when populated from Windows batch enumeration — git on - /// Windows defaults to `core.filemode=false`, so the bit isn't tracked there. - pub is_executable: bool, /// File size in bytes. pub size: u64, /// Modification time — seconds since Unix epoch. @@ -49,22 +47,15 @@ pub struct CachedMetadata { pub ctime_secs: u32, /// Status/creation time — nanoseconds component. pub ctime_nsecs: u32, - /// Device ID. Set to 0 if not available (always 0 on Windows). - pub dev: u64, - /// Inode number. Set to 0 if not available (always 0 on Windows). - pub ino: u64, - /// User ID. Set to 0 if not available (always 0 on Windows). - pub uid: u32, - /// Group ID. Set to 0 if not available (always 0 on Windows). - pub gid: u32, } -impl CachedMetadata { +impl WorktreeStat { /// Convert to gitoxide's [`Stat`](gix_index::entry::Stat) struct for index comparison. /// - /// Truncates `dev`, `ino`, and `size` from 64 to 32 bits — matching what + /// Truncates `size` from 64 to 32 bits — matching what /// [`gix_index::entry::stat::Stat::from_fs`] does on Unix, so both code - /// paths compare the same quantities. + /// paths compare the same quantities. `dev`/`ino`/`uid`/`gid` are zeroed + /// here to match what `from_fs` produces on Windows. pub fn to_stat(&self) -> gix_index::entry::Stat { gix_index::entry::Stat { mtime: gix_index::entry::stat::Time { @@ -75,35 +66,94 @@ impl CachedMetadata { secs: self.ctime_secs, nsecs: self.ctime_nsecs, }, - dev: self.dev as u32, - ino: self.ino as u32, - uid: self.uid, - gid: self.gid, + dev: 0, + ino: 0, + uid: 0, + gid: 0, size: self.size as u32, } } } -/// Metadata cache: maps worktree-relative paths (forward-slashed, in the exact -/// case as enumerated from disk) to cached metadata. +/// Map of worktree-relative paths (forward-slashed, in the exact case as +/// enumerated from disk) to their pre-computed [`WorktreeStat`]. /// /// Lookups are case-sensitive: callers must query with the same case the walker /// emitted. On a case-insensitive worktree where the index path's case differs /// from disk, the lookup misses and `index_as_worktree` falls back to a live /// `lstat` — a few extra syscalls in a rare scenario. Folding cases together /// would silently merge distinct files on case-sensitive volumes (Windows -/// per-directory case-sensitivity, NTFS POSIX mode), which would let the cache +/// per-directory case-sensitivity, NTFS POSIX mode), which would let the map /// return one file's stat for a query about another and silently misreport /// tracked-file status. That's strictly worse than a few cache misses. -pub type MetadataCache = hashbrown::HashMap; +pub type WorktreeStats = hashbrown::HashMap; + +/// Either a live `lstat` result or a precomputed [`WorktreeStat`] from +/// [`prepare`]. Lets [`crate::index_as_worktree`] treat both shapes uniformly +/// without branching at every per-entry use site. +pub(crate) enum FileMetadata<'a> { + Live(gix_index::fs::Metadata), + Cached(&'a WorktreeStat), +} -/// Prepare a metadata cache by walking the worktree in parallel using +impl FileMetadata<'_> { + pub(crate) fn is_dir(&self) -> bool { + match self { + Self::Live(m) => m.is_dir(), + Self::Cached(c) => c.is_dir, + } + } + + pub(crate) fn is_symlink(&self) -> bool { + match self { + Self::Live(m) => m.is_symlink(), + Self::Cached(c) => c.is_symlink, + } + } + + pub(crate) fn len(&self) -> u64 { + match self { + Self::Live(m) => m.len(), + Self::Cached(c) => c.size, + } + } + + pub(crate) fn to_stat(&self) -> Result { + match self { + Self::Live(m) => gix_index::entry::Stat::from_fs(m), + Self::Cached(c) => Ok(c.to_stat()), + } + } + + pub(crate) fn mode_change( + &self, + entry_mode: gix_index::entry::Mode, + has_symlinks: bool, + executable_bit: bool, + ) -> Option { + match self { + Self::Live(m) => entry_mode.change_to_match_fs(m, has_symlinks, executable_bit), + // Windows batch enumeration doesn't expose the executable bit; pass `false`. + // Git on Windows defaults to `core.filemode=false` so this is unused anyway. + Self::Cached(c) => entry_mode.change_to_match_fs_with_values( + !c.is_dir && !c.is_symlink, // is_file: regular file (not dir, not symlink) + c.is_dir, + c.is_symlink, + false, + has_symlinks, + executable_bit, + ), + } + } +} + +/// Prepare a [`WorktreeStats`] map by walking the worktree in parallel using /// `GetFileInformationByHandleEx` with `FileIdBothDirectoryInfo`, skipping /// subtrees flagged by the per-thread predicate produced by `make_excludes`. /// -/// The returned cache can be attached to the status pipeline via -/// [`Context::metadata_cache`](crate::index_as_worktree::Context::metadata_cache) -/// — cache hits skip per-file syscalls. +/// The returned map can be attached to the status pipeline via +/// [`Context::worktree_stats`](crate::index_as_worktree::Context::worktree_stats) +/// — hits skip per-file syscalls. /// /// `thread_limit` caps parallelism. `None` uses all available cores; `Some(1)` /// is single-threaded. @@ -114,8 +164,9 @@ pub type MetadataCache = hashbrown::HashMap; /// the worktree-relative path; returning `true` skips that subtree. Callers /// that don't need gitignore pruning can pass `|| |_: &bstr::BStr| false`, but /// for typical projects with fat ignored dirs (`node_modules`, `target`) the -/// wasted enumeration makes the cache net-slower than plain per-file stats. -pub fn prepare(worktree: &Path, thread_limit: Option, make_excludes: F) -> std::io::Result +/// wasted enumeration makes the preprocessing pass net-slower than plain +/// per-file stats. +pub fn prepare(worktree: &Path, thread_limit: Option, make_excludes: F) -> std::io::Result where F: Fn() -> E + Sync, E: FnMut(&bstr::BStr) -> bool, @@ -152,8 +203,8 @@ mod windows { /// allocation without re-traversing `PathBuf`/`OsStr` each time. type WorkItem = (Vec, String); - /// Convert FILE_ID_BOTH_DIR_INFO to CachedMetadata. - fn cached_from_info(info: &FILE_ID_BOTH_DIR_INFO) -> CachedMetadata { + /// Convert FILE_ID_BOTH_DIR_INFO to a [`WorktreeStat`]. + fn stat_from_info(info: &FILE_ID_BOTH_DIR_INFO) -> WorktreeStat { let size = info.EndOfFile as u64; // FILETIME values are LARGE_INTEGER holding 100ns intervals since 1601-01-01 UTC. @@ -167,23 +218,14 @@ mod windows { let is_dir = (info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; let is_symlink = (info.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0; - // The executable bit, dev, ino, uid, and gid aren't exposed by Windows - // directory enumeration. Git on Windows defaults to core.filemode=false - // (so is_executable is ignored anyway); the remaining fields are only - // compared against matching zeros from `Stat::from_fs`'s Windows branch. - CachedMetadata { + WorktreeStat { is_dir, is_symlink, - is_executable: false, size, mtime_secs, mtime_nsecs, ctime_secs, ctime_nsecs, - dev: 0, - ino: 0, - uid: 0, - gid: 0, } } @@ -219,9 +261,9 @@ mod windows { /// Check if a UTF-16 name equals exactly ASCII ".git" (case-sensitive, matching the /// prior behaviour). This is intentional: on Windows a mis-cased `.Git` is the same - /// file to the filesystem but conventionally never appears, and the cache is - /// look-through — a missed skip just means one extra cached entry that will be - /// ignored by the status pipeline. + /// file to the filesystem but conventionally never appears, and the preprocessing pass + /// is look-through — a missed skip just means one extra entry that will be ignored + /// by the status pipeline. fn name_is_dotgit(name: &[u16]) -> bool { name.len() == 4 && name[0] == b'.' as u16 @@ -231,12 +273,12 @@ mod windows { } /// Result type for directory walking to simplify the return type. - type WalkResult = (Vec<(BString, CachedMetadata)>, Vec); + type WalkResult = (Vec<(BString, WorktreeStat)>, Vec); /// Walk a single directory using `GetFileInformationByHandleEx` with /// `FileIdBothDirectoryInfo`. /// - /// Returns (cacheable entries, subdirectories to recurse into). `buffer` is a + /// Returns (entries to record, subdirectories to recurse into). `buffer` is a /// reusable 64 KiB u64-aligned scratch buffer; reusing it across calls avoids /// a heap allocation per directory (6k+ per worktree on the Linux kernel). fn walk_directory(dir_path: &[u16], rel_prefix: &str, buffer: &mut [u64]) -> std::io::Result { @@ -256,7 +298,7 @@ mod windows { }; if handle == INVALID_HANDLE_VALUE { - // Directory doesn't exist or can't be read - not an error for a look-through cache. + // Directory doesn't exist or can't be read — not an error for a look-through preprocess. return Ok((files, subdirs)); } @@ -273,7 +315,7 @@ mod windows { }; if success == 0 { // End of enumeration (ERROR_NO_MORE_FILES) or access denied / similar. - // Either way, stop: the cache is best-effort and correctness falls back + // Either way, stop: the preprocess is best-effort and correctness falls back // to per-file syscalls in `index_as_worktree`. break; } @@ -300,12 +342,12 @@ mod windows { format!("{rel_prefix}/{name_str}") }; - let meta = cached_from_info(info); + let stat = stat_from_info(info); if is_dir && !is_reparse { let child = join_utf16(dir_path, name_slice); subdirs.push((child, rel_path.clone())); } - files.push((rel_path.into_bytes().into(), meta)); + files.push((rel_path.into_bytes().into(), stat)); } if info.NextEntryOffset == 0 { @@ -331,7 +373,7 @@ mod windows { worktree: &Path, thread_limit: Option, make_excludes: F, - ) -> std::io::Result + ) -> std::io::Result where F: Fn() -> E + Sync, E: FnMut(&bstr::BStr) -> bool, @@ -353,33 +395,33 @@ mod windows { active_workers: 0, }); let cvar = Condvar::new(); - let shared_cache = Mutex::new(MetadataCache::default()); + let shared = Mutex::new(WorktreeStats::default()); thread::scope(|s| { for _ in 0..num_threads { let make_excludes = &make_excludes; - s.spawn(|| worker(&queue_mutex, &cvar, &shared_cache, make_excludes())); + s.spawn(|| worker(&queue_mutex, &cvar, &shared, make_excludes())); } }); - Ok(shared_cache.into_inner().unwrap()) + Ok(shared.into_inner().unwrap()) } /// One worker of the parallel walker. Grabs batches of directories from the - /// shared queue, walks them into a thread-local cache, and pushes any discovered + /// shared queue, walks them into a thread-local map, and pushes any discovered /// subdirectories back onto the queue. Exits when the queue is drained and no /// worker is still producing. /// /// `is_excluded` is a thread-local predicate that returns true for directories /// whose contents should be skipped (gitignored). The excluded directory's own - /// metadata entry is still cached; only recursion is avoided. + /// metadata entry is still recorded; only recursion is avoided. fn worker bool>( queue_mutex: &Mutex, cvar: &Condvar, - shared_cache: &Mutex, + shared: &Mutex, mut is_excluded: E, ) { - let mut local_cache = MetadataCache::default(); + let mut local = WorktreeStats::default(); let mut local_stack: Vec = Vec::new(); let mut buffer = vec![0u64; BUFFER_U64S]; @@ -399,7 +441,7 @@ mod windows { if queue.active_workers == 0 { // Queue is empty and no one is producing more work: we're done. cvar.notify_all(); - shared_cache.lock().unwrap().extend(local_cache); + shared.lock().unwrap().extend(local); return; } queue = cvar.wait(queue).unwrap(); @@ -410,7 +452,7 @@ mod windows { let mut new_dirs: Vec = Vec::new(); while let Some((dir, rel_prefix)) = local_stack.pop() { if let Ok((files, subdirs)) = walk_directory(&dir, &rel_prefix, &mut buffer) { - local_cache.extend(files); + local.extend(files); for (child_path, child_rel) in subdirs { if !is_excluded(child_rel.as_bytes().into()) { new_dirs.push((child_path, child_rel)); @@ -431,14 +473,14 @@ mod windows { fn walk_worktree_single_threaded bool>( worktree: &Path, mut is_excluded: E, - ) -> std::io::Result { - let mut cache = MetadataCache::default(); + ) -> std::io::Result { + let mut stats = WorktreeStats::default(); let mut dir_stack: Vec = vec![(utf16_null_terminated(worktree), String::new())]; let mut buffer = vec![0u64; BUFFER_U64S]; while let Some((dir, rel_prefix)) = dir_stack.pop() { if let Ok((files, subdirs)) = walk_directory(&dir, &rel_prefix, &mut buffer) { - cache.extend(files); + stats.extend(files); for (child_path, child_rel) in subdirs { if !is_excluded(child_rel.as_bytes().into()) { dir_stack.push((child_path, child_rel)); @@ -447,7 +489,7 @@ mod windows { } } - Ok(cache) + Ok(stats) } } @@ -456,54 +498,50 @@ mod tests { use super::*; #[test] - fn test_cached_metadata_to_stat() { - let cached = CachedMetadata { + fn worktree_stat_to_stat() { + let stat = WorktreeStat { is_dir: false, is_symlink: false, - is_executable: true, size: 1234, mtime_secs: 1700000000, mtime_nsecs: 500_000_000, ctime_secs: 1699999999, ctime_nsecs: 100_000_000, - dev: 123, - ino: 456, - uid: 1000, - gid: 1000, }; - let stat = cached.to_stat(); - assert_eq!(stat.size, 1234); - assert_eq!(stat.mtime.secs, 1700000000); - assert_eq!(stat.mtime.nsecs, 500_000_000); - assert_eq!(stat.ctime.secs, 1699999999); - assert_eq!(stat.ctime.nsecs, 100_000_000); - assert_eq!(stat.dev, 123); - assert_eq!(stat.ino, 456); - assert_eq!(stat.uid, 1000); - assert_eq!(stat.gid, 1000); + let s = stat.to_stat(); + assert_eq!(s.size, 1234); + assert_eq!(s.mtime.secs, 1700000000); + assert_eq!(s.mtime.nsecs, 500_000_000); + assert_eq!(s.ctime.secs, 1699999999); + assert_eq!(s.ctime.nsecs, 100_000_000); + // dev/ino/uid/gid are always zero on Windows — `Stat::from_fs` zeros them too. + assert_eq!(s.dev, 0); + assert_eq!(s.ino, 0); + assert_eq!(s.uid, 0); + assert_eq!(s.gid, 0); } #[test] - fn test_lookup_is_case_sensitive() { - // The cache is keyed by the exact path bytes the walker emits. + fn lookup_is_case_sensitive() { + // The map is keyed by the exact path bytes the walker emits. // Mixed-case lookups miss rather than silently aliasing onto the wrong // file — a case-insensitive worktree falls back to a live `lstat` on miss. - let mut cache = MetadataCache::default(); - let meta = CachedMetadata { + let mut stats = WorktreeStats::default(); + let stat = WorktreeStat { size: 42, ..Default::default() }; - cache.insert(BString::from(b"src/foo.rs".as_slice()), meta.clone()); + stats.insert(BString::from(b"src/foo.rs".as_slice()), stat.clone()); - assert!(cache.get(&b"src/foo.rs"[..]).is_some()); - assert!(cache.get(&b"SRC/Foo.rs"[..]).is_none()); + assert!(stats.get(&b"src/foo.rs"[..]).is_some()); + assert!(stats.get(&b"SRC/Foo.rs"[..]).is_none()); - cache.insert(BString::from("ünïcode.txt".as_bytes()), meta); - assert!(cache.get("ünïcode.txt".as_bytes()).is_some()); + stats.insert(BString::from("ünïcode.txt".as_bytes()), stat); + assert!(stats.get("ünïcode.txt".as_bytes()).is_some()); } #[test] - fn test_prepare_returns_cache() { + fn prepare_returns_stats() { // Use a unique temp directory to avoid walking other files. use std::time::{SystemTime, UNIX_EPOCH}; let timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); @@ -518,11 +556,11 @@ mod tests { let nested_file = subdir.join("nested.txt"); std::fs::write(&nested_file, b"world").unwrap(); - let cache = prepare(&temp_dir, Some(1), || |_: &bstr::BStr| false).unwrap(); + let stats = prepare(&temp_dir, Some(1), || |_: &bstr::BStr| false).unwrap(); - assert!(!cache.is_empty()); - assert!(cache.contains_key(&b"test.txt"[..])); - assert!(cache.contains_key(&b"subdir/nested.txt"[..])); + assert!(!stats.is_empty()); + assert!(stats.contains_key(&b"test.txt"[..])); + assert!(stats.contains_key(&b"subdir/nested.txt"[..])); let _ = std::fs::remove_dir_all(&temp_dir); } diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 90ba59ab54e..ae873000e72 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -188,7 +188,7 @@ fn fixture_filtered_detailed( ), should_interrupt: &AtomicBool::default(), #[cfg(windows)] - metadata_cache: None, + worktree_stats: None, }; let options = Options { fs: fs_capabilities.map_or_else(|| gix_fs::Capabilities::probe(&git_dir), |new| new(&git_dir)), @@ -1057,7 +1057,7 @@ fn racy_git() { filter: Default::default(), should_interrupt: &AtomicBool::default(), #[cfg(windows)] - metadata_cache: None, + worktree_stats: None, }; let out = index_as_worktree( &index, diff --git a/gix-status/tests/status/index_as_worktree_with_renames.rs b/gix-status/tests/status/index_as_worktree_with_renames.rs index 94c8eeb6797..5b2d8e423bb 100644 --- a/gix-status/tests/status/index_as_worktree_with_renames.rs +++ b/gix-status/tests/status/index_as_worktree_with_renames.rs @@ -340,7 +340,7 @@ fn fixture_filtered_detailed( ignore_case_index_lookup: None, }, #[cfg(windows)] - metadata_cache: None, + worktree_stats: None, }; let options = Options { object_hash, diff --git a/gix/src/status/index_worktree.rs b/gix/src/status/index_worktree.rs index 266aa9f29ef..d5b6cb56236 100644 --- a/gix/src/status/index_worktree.rs +++ b/gix/src/status/index_worktree.rs @@ -28,12 +28,6 @@ pub enum Error { StatOptions(#[from] config::stat_options::Error), #[error(transparent)] ResourceCache(#[from] crate::diff::resource_cache::Error), - #[cfg(windows)] - #[error("Failed to prepare metadata cache")] - PrepareMetadataCache(#[from] std::io::Error), - #[cfg(windows)] - #[error(transparent)] - OpenIndex(#[from] crate::worktree::open_index::Error), } /// Options for use with [Repository::index_worktree_status()]. @@ -88,8 +82,11 @@ impl Repository { /// - A flag to stop the whole operation. /// * `options` /// - Additional configuration for all parts of the operation. - /// * `metadata_cache` *(Windows only)* - /// - Optional pre-populated metadata cache; see gix_status::metadata_cache. + /// * `worktree_stats` *(Windows only)* + /// - Optional precomputed worktree metadata from + /// gix_status::worktree_stats::prepare When `Some`, modification + /// checks consult it instead of calling `lstat` per file. Misses + /// fall through to a live syscall, so empty/partial maps are safe. /// /// ### Note /// @@ -109,7 +106,7 @@ impl Repository { progress: &mut dyn gix_features::progress::Progress, should_interrupt: &AtomicBool, options: Options, - #[cfg(windows)] metadata_cache: Option<&gix_status::MetadataCache>, + #[cfg(windows)] worktree_stats: Option<&gix_status::worktree_stats::WorktreeStats>, ) -> Result where T: Send + Clone, @@ -159,7 +156,7 @@ impl Repository { ignore_case_index_lookup: accelerate_lookup.as_ref(), }, #[cfg(windows)] - metadata_cache, + worktree_stats, }, gix_status::index_as_worktree_with_renames::Options { sorting: options.sorting, diff --git a/gix/src/status/iter/mod.rs b/gix/src/status/iter/mod.rs index cee8506b051..3e33b90a558 100644 --- a/gix/src/status/iter/mod.rs +++ b/gix/src/status/iter/mod.rs @@ -65,13 +65,13 @@ where .unwrap_or_default(); let should_interrupt = self.should_interrupt.clone().unwrap_or_default(); let submodule = BuiltinSubmoduleStatus::new(self.repo.clone().into_sync(), self.submodules)?; + // Best-effort: if the prep walk fails (missing workdir, syscall error), + // silently fall through to stat-based status rather than abort. #[cfg(windows)] - let metadata_cache = match self.metadata_cache { - crate::status::MetadataCacheConfig::Provided(cache) => Some(cache), - crate::status::MetadataCacheConfig::Disabled => None, - // Best-effort: if the prep walk fails (missing workdir, syscall error), - // silently fall through to stat-based status rather than abort. - crate::status::MetadataCacheConfig::Auto => crate::status::build_metadata_cache(self.repo, None).ok(), + let worktree_stats = if self.precompute_worktree_stats { + crate::status::precomputed_worktree_stats(self.repo, &index, None) + } else { + None }; #[cfg(feature = "parallel")] { @@ -143,7 +143,7 @@ where &should_interrupt, options, #[cfg(windows)] - metadata_cache.as_ref(), + worktree_stats.as_ref(), )?; Ok(Outcome { index_worktree: out, @@ -208,7 +208,7 @@ where &should_interrupt, options, #[cfg(windows)] - metadata_cache.as_ref(), + worktree_stats.as_ref(), )?; let mut iter = Iter { items: Vec::new().into_iter(), diff --git a/gix/src/status/mod.rs b/gix/src/status/mod.rs index d617fe88055..837e0d3298d 100644 --- a/gix/src/status/mod.rs +++ b/gix/src/status/mod.rs @@ -15,23 +15,12 @@ where index_worktree_options: index_worktree::Options, tree_index_renames: tree_index::TrackRenames, should_interrupt: Option, + /// Windows-only: when `true` (default), run a single batched parallel + /// directory walk before status to precompute worktree stats so that the + /// per-entry pipeline can skip per-file `lstat`. Ignored on non-Windows. + /// See [`crate::status::Platform::index_worktree_stats_preprocessing`]. #[cfg(windows)] - metadata_cache: MetadataCacheConfig, -} - -/// Windows-only: controls the metadata cache. `Auto` (default) trades a -/// one-shot gitignore-aware worktree walk (~30 ms / 90 k files) for avoiding -/// per-file `lstat` during status (~1 s for the same tree). -#[cfg(windows)] -#[derive(Default)] -pub enum MetadataCacheConfig { - /// Prepare the cache lazily inside the iterator using all cores. - #[default] - Auto, - /// Skip the cache. - Disabled, - /// Use this pre-built cache. - Provided(gix_status::MetadataCache), + precompute_worktree_stats: bool, } /// How to obtain a submodule's status. @@ -132,7 +121,7 @@ impl Repository { thread_limit: None, }, #[cfg(windows)] - metadata_cache: MetadataCacheConfig::default(), + precompute_worktree_stats: true, }; let untracked = self @@ -251,24 +240,24 @@ pub mod into_iter { } } -/// Build a gitignore-aware Windows metadata cache. Shared between the explicit -/// `prepare_index_worktree_metadata_cache` and the Auto branch in `into_iter`. +/// Run the gitignore-aware Windows worktree stats preprocessing pass. +/// One internal helper, called from the iterator on Windows unless +/// preprocessing is disabled. Returns `None` when the repo lacks a workdir +/// or the walk fails — callers fall through to live `lstat` either way, so +/// surfacing the failure as a hard error would be unhelpful. #[cfg(windows)] -pub(crate) fn build_metadata_cache( +pub(crate) fn precomputed_worktree_stats( repo: &Repository, + index: &gix_index::State, thread_limit: Option, -) -> Result { - let workdir = repo - .workdir() - .ok_or(crate::status::index_worktree::Error::MissingWorkDir)?; +) -> Option { + let workdir = repo.workdir()?; let sync_repo = repo.clone().into_sync(); - let index = repo.index_or_empty()?; - let index_state: &gix_index::State = &index; let make_excludes = || -> Box bool> { let thread_repo = sync_repo.to_thread_local(); let Ok(stack) = thread_repo.excludes( - index_state, + index, None, gix_worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, ) else { @@ -284,11 +273,7 @@ pub(crate) fn build_metadata_cache( }) }; - Ok(gix_status::metadata_cache::prepare( - workdir, - thread_limit, - make_excludes, - )?) + gix_status::worktree_stats::prepare(workdir, thread_limit, make_excludes).ok() } mod platform; diff --git a/gix/src/status/platform.rs b/gix/src/status/platform.rs index 72bd115ccf6..a07e28e61ad 100644 --- a/gix/src/status/platform.rs +++ b/gix/src/status/platform.rs @@ -124,37 +124,18 @@ where self.tree_index_renames = renames; self } -} -/// Windows-only metadata-cache builder methods. See -/// [`crate::status::MetadataCacheConfig`] for the default-on behaviour. -#[cfg(windows)] -impl Platform<'_, Progress> -where - Progress: gix_features::progress::Progress, -{ - /// Use `cache` instead of building one. For out-of-band prep (e.g. file- - /// watcher refresh) reused across status calls. - pub fn index_worktree_metadata_cache(mut self, cache: gix_status::MetadataCache) -> Self { - self.metadata_cache = crate::status::MetadataCacheConfig::Provided(cache); - self - } - - /// Skip the metadata cache. Prefer the Auto default unless measured. - pub fn disable_index_worktree_metadata_cache(mut self) -> Self { - self.metadata_cache = crate::status::MetadataCacheConfig::Disabled; + /// Enable or disable the Windows worktree stats preprocessing pass. + /// + /// On Windows, status runs a single batched parallel directory walk before + /// the per-entry pipeline so that modification checks can use precomputed + /// stat information instead of issuing one `lstat` per file. The walk is + /// fast (~30 ms / 90 k files) and replaces a much slower per-file pass. + /// Defaults to `true`; disabling falls back to per-file `lstat`. No-op on + /// non-Windows targets. + #[cfg(windows)] + pub fn index_worktree_stats_preprocessing(mut self, enable: bool) -> Self { + self.precompute_worktree_stats = enable; self } - - /// Eagerly prepare the cache with a specific `thread_limit` (`Some(1)` = - /// single-threaded, `None` = all cores). Use this to pick parallelism or - /// to fail-fast before building the iterator. - pub fn prepare_index_worktree_metadata_cache( - mut self, - thread_limit: Option, - ) -> Result { - let cache = crate::status::build_metadata_cache(self.repo, thread_limit)?; - self.metadata_cache = crate::status::MetadataCacheConfig::Provided(cache); - Ok(self) - } } From 0efcd29a8d455b833800b01e5efcbdfbde546efd Mon Sep 17 00:00:00 2001 From: Special Bread <250742548+special-bread@users.noreply.github.com> Date: Sun, 10 May 2026 16:18:28 +0100 Subject: [PATCH 4/4] Fix windows status edge case on cache name collision --- gix-status/src/worktree_stats.rs | 36 +++++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/gix-status/src/worktree_stats.rs b/gix-status/src/worktree_stats.rs index 0dacd12b265..14fe720bd66 100644 --- a/gix-status/src/worktree_stats.rs +++ b/gix-status/src/worktree_stats.rs @@ -180,8 +180,8 @@ where mod windows { use super::*; use std::collections::VecDeque; - use std::ffi::{OsString, c_void}; - use std::os::windows::ffi::{OsStrExt, OsStringExt}; + use std::ffi::c_void; + use std::os::windows::ffi::OsStrExt; use std::sync::{Condvar, Mutex}; use std::thread; @@ -335,19 +335,25 @@ mod windows { let is_dir = (info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; let is_reparse = (info.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0; - let name_str = OsString::from_wide(name_slice).to_string_lossy().into_owned(); - let rel_path = if rel_prefix.is_empty() { - name_str - } else { - format!("{rel_prefix}/{name_str}") - }; - - let stat = stat_from_info(info); - if is_dir && !is_reparse { - let child = join_utf16(dir_path, name_slice); - subdirs.push((child, rel_path.clone())); + // Decode the UTF-16 name fallibly: skip on ill-formed sequences rather than + // substituting U+FFFD. Lossy substitution can collapse two distinct invalid + // names onto the same key (one overwriting the other in the map) and never + // matches what `gix-index` stored anyway, so a miss + live `lstat` fallback + // is strictly cleaner. + if let Ok(name_str) = String::from_utf16(name_slice) { + let rel_path = if rel_prefix.is_empty() { + name_str + } else { + format!("{rel_prefix}/{name_str}") + }; + + let stat = stat_from_info(info); + if is_dir && !is_reparse { + let child = join_utf16(dir_path, name_slice); + subdirs.push((child, rel_path.clone())); + } + files.push((rel_path.into_bytes().into(), stat)); } - files.push((rel_path.into_bytes().into(), stat)); } if info.NextEntryOffset == 0 { @@ -404,7 +410,7 @@ mod windows { } }); - Ok(shared.into_inner().unwrap()) + Ok(shared.into_inner().unwrap_or_else(|err| err.into_inner())) } /// One worker of the parallel walker. Grabs batches of directories from the