From db595ebc23432762b62dc95185379c94016593e6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 30 Apr 2026 09:48:52 +0800 Subject: [PATCH 1/3] to-tree implementation TBD --- gix-index/Cargo.toml | 5 + gix-index/benches/to_tree.rs | 109 ++++++++++++++ gix-index/src/init.rs | 268 ++++++++++++++++++++++++++++++++++ gix-index/tests/index/init.rs | 209 +++++++++++++++++++++++++- 4 files changed, 588 insertions(+), 3 deletions(-) create mode 100644 gix-index/benches/to_tree.rs diff --git a/gix-index/Cargo.toml b/gix-index/Cargo.toml index b8de725ba9d..dce54279e01 100644 --- a/gix-index/Cargo.toml +++ b/gix-index/Cargo.toml @@ -21,6 +21,11 @@ name = "from-tree" harness = false path = "./benches/from_tree.rs" +[[bench]] +name = "to-tree" +harness = false +path = "./benches/to_tree.rs" + [features] ## Enable support for the SHA-1 hash by enabling the respective feature in the `gix-hash` crate. sha1 = ["gix-hash/sha1"] diff --git a/gix-index/benches/to_tree.rs b/gix-index/benches/to_tree.rs new file mode 100644 index 00000000000..71ee5b576b9 --- /dev/null +++ b/gix-index/benches/to_tree.rs @@ -0,0 +1,109 @@ +use std::{hint::black_box, io::Read}; + +use bstr::ByteSlice; +use criterion::{Criterion, Throughput, criterion_group, criterion_main}; +use gix_index::{ + State, + entry::{Flags, Mode}, +}; + +fn to_tree(c: &mut Criterion) { + let objects = MemoryDb { + object_hash: gix_hash::Kind::Sha1, + }; + let mut group = c.benchmark_group("to_tree"); + + let mut flat = State::new(gix_hash::Kind::Sha1); + for idx in 0..10_000 { + flat.dangerously_push_entry( + Default::default(), + repeated_id(b'a'), + Flags::empty(), + Mode::FILE, + format!("file-{idx:05}").as_bytes().as_bstr(), + ); + } + group.throughput(Throughput::Elements(flat.entries().len() as u64)); + group.bench_function("flat 10k files", |b| { + b.iter(|| { + let id = flat.to_tree(&objects, Default::default()).expect("tree can be written"); + black_box(id); + }); + }); + + let mut wide_deep = State::new(gix_hash::Kind::Sha1); + for dir_idx in 0..100 { + for file_idx in 0..100 { + wide_deep.dangerously_push_entry( + Default::default(), + repeated_id(b'a'), + Flags::empty(), + Mode::FILE, + format!("dir-{dir_idx:03}/file-{file_idx:03}").as_bytes().as_bstr(), + ); + } + } + group.throughput(Throughput::Elements(wide_deep.entries().len() as u64)); + group.bench_function("wide 100 x 100 files", |b| { + b.iter(|| { + let id = wide_deep + .to_tree(&objects, Default::default()) + .expect("tree can be written"); + black_box(id); + }); + }); + + let mut sparse = State::new(gix_hash::Kind::Sha1); + for idx in 0..10_000 { + sparse.dangerously_push_entry( + Default::default(), + repeated_id(b't'), + Flags::empty(), + Mode::DIR, + format!("sparse-{idx:05}/").as_bytes().as_bstr(), + ); + } + group.throughput(Throughput::Elements(sparse.entries().len() as u64)); + group.bench_function("sparse 10k directories", |b| { + b.iter(|| { + let id = sparse + .to_tree(&objects, Default::default()) + .expect("tree can be written"); + black_box(id); + }); + }); +} + +criterion_group!(benches, to_tree); +criterion_main!(benches); + +struct MemoryDb { + object_hash: gix_hash::Kind, +} + +impl gix_object::Exists for MemoryDb { + fn exists(&self, _id: &gix_hash::oid) -> bool { + true + } +} + +impl gix_object::Write for MemoryDb { + fn write_buf(&self, kind: gix_object::Kind, from: &[u8]) -> Result { + Ok(gix_object::compute_hash(self.object_hash, kind, from)?) + } + + fn write_stream( + &self, + kind: gix_object::Kind, + _size: u64, + from: &mut dyn Read, + ) -> Result { + let mut buf = Vec::new(); + from.read_to_end(&mut buf)?; + self.write_buf(kind, &buf) + } +} + +fn repeated_id(byte: u8) -> gix_hash::ObjectId { + gix_hash::ObjectId::from_hex(&vec![byte; gix_hash::Kind::Sha1.len_in_hex()]).expect("valid hex") +} diff --git a/gix-index/src/init.rs b/gix-index/src/init.rs index c7bbff8922c..515355eaaf5 100644 --- a/gix-index/src/init.rs +++ b/gix-index/src/init.rs @@ -232,3 +232,271 @@ pub mod from_tree { } } } + +/// Initialize tree objects from an index state. +pub mod to_tree { + use std::io::Write; + + use bstr::{BStr, BString, ByteSlice}; + use gix_object::{tree, tree::EntryMode}; + + use crate::{ + Entry, State, + entry::{self, Stage}, + extension, + }; + + /// The error returned by [State::to_tree()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Entry '{path}' is unmerged at stage {stage}")] + Unmerged { path: BString, stage: u32 }, + #[error("Entry '{path}' is invalid as both a file and directory would exist in the tree")] + FileDirectoryConflict { path: BString }, + #[error("The path \"{path}\" is invalid")] + InvalidComponent { + path: BString, + source: gix_validate::path::component::Error, + }, + #[error("Entry '{path}' has an invalid index mode {mode:?}")] + InvalidMode { path: BString, mode: entry::Mode }, + #[error("The object {id} at '{path}' does not exist")] + MissingObject { path: BString, id: gix_hash::ObjectId }, + #[error(transparent)] + Write(#[from] gix_object::write::Error), + #[error(transparent)] + Entries(#[from] crate::verify::entries::Error), + #[error("More than 4 billion entries would be represented by the tree at '{path}'")] + EntriesOverflow { path: BString }, + } + + /// Options for use with [State::to_tree()]. + #[derive(Default, Debug, Clone, Copy)] + pub struct Options { + /// Path component validation options. + pub validate: gix_validate::path::component::Options, + /// If true, don't fail if referenced objects are missing from `objects`. + /// + /// Commit entries, representing submodules, are never checked for existence. + pub missing_ok: bool, + } + + /// Tree creation. + impl State { + /// Write this index state as Git tree objects into `objects` and return the root tree id. + /// + /// If this state has a TREE extension, it is refreshed from the written trees on success. + /// If no TREE extension is present, none is created. + pub fn to_tree(&mut self, objects: Db, options: Options) -> Result + where + Db: gix_object::Write + gix_object::Exists, + { + let _span = gix_features::trace::coarse!("gix_index::State::to_tree()"); + self.verify_entries()?; + let update_tree_cache = self.tree.is_some(); + + let mut builder = Builder { + state: self, + objects: &objects, + options, + update_tree_cache, + }; + let (next_index, written) = builder.write_tree_at(0, BStr::new(b""), BStr::new(b""), true)?; + debug_assert_eq!(next_index, builder.state.entries.len()); + let written = written.expect("the root tree is always written"); + if update_tree_cache { + self.tree = written.cache_tree; + } + Ok(written.id) + } + } + + struct Builder<'a, Db> { + state: &'a State, + objects: &'a Db, + options: Options, + update_tree_cache: bool, + } + + struct WrittenTree { + id: gix_hash::ObjectId, + num_entries: u32, + cache_tree: Option, + } + + impl Builder<'_, Db> + where + Db: gix_object::Write + gix_object::Exists, + { + fn write_tree_at( + &mut self, + mut index: usize, + prefix: &BStr, + name: &BStr, + write_empty: bool, + ) -> Result<(usize, Option), Error> { + let mut tree_data = Vec::new(); + let mut num_entries = 0u32; + let mut children = Vec::new(); + let mut leaf_names = Vec::<&BStr>::new(); + + while let Some(entry) = self.state.entries.get(index) { + let path = entry.path(self.state); + if !path.starts_with(prefix) { + break; + } + + self.assure_unmerged(entry, path)?; + + if entry + .flags + .intersects(entry::Flags::REMOVE | entry::Flags::INTENT_TO_ADD) + { + index += 1; + continue; + } + + let rela_path = BStr::new(&path[prefix.len()..]); + if rela_path.is_empty() { + return Err(Error::InvalidComponent { + path: path.into(), + source: gix_validate::path::component(BStr::new(b""), None, self.options.validate) + .expect_err("empty component is invalid"), + }); + } + + let slash_pos = rela_path.find_byte(b'/'); + let is_sparse_leaf = entry.mode.is_sparse() && slash_pos == Some(rela_path.len() - 1); + if let Some(slash_pos) = slash_pos.filter(|_| !is_sparse_leaf) { + let component = BStr::new(&rela_path[..slash_pos]); + let child_path = BStr::new(&path[..prefix.len() + slash_pos]); + self.validate_component(child_path, component, None)?; + if leaf_names.binary_search_by(|name| (*name).cmp(component)).is_ok() { + return Err(Error::FileDirectoryConflict { + path: child_path.into(), + }); + } + + let child_prefix = BStr::new(&path[..prefix.len() + slash_pos + 1]); + let (next_index, child) = self.write_tree_at(index, child_prefix, component, false)?; + index = next_index; + + if let Some(child) = child { + encode_entry(&mut tree_data, EntryKind::Tree.into(), component, &child.id)?; + num_entries = num_entries + .checked_add(child.num_entries) + .ok_or_else(|| Error::EntriesOverflow { path: prefix.into() })?; + if let Some(child_cache_tree) = child.cache_tree { + children.push(child_cache_tree); + } + } + } else { + let filename = if is_sparse_leaf { + BStr::new(&rela_path[..rela_path.len() - 1]) + } else { + rela_path + }; + let insertion_pos = match leaf_names.binary_search_by(|name| (*name).cmp(filename)) { + Ok(_) => return Err(Error::FileDirectoryConflict { path: path.into() }), + Err(pos) => pos, + }; + self.write_entry(&mut tree_data, path, filename, entry)?; + leaf_names.insert(insertion_pos, filename); + num_entries = num_entries + .checked_add(1) + .ok_or_else(|| Error::EntriesOverflow { path: prefix.into() })?; + index += 1; + } + } + + if num_entries == 0 && !write_empty { + return Ok((index, None)); + } + + let id = self.objects.write_buf(gix_object::Kind::Tree, &tree_data)?; + let cache_tree = self.update_tree_cache.then(|| extension::Tree { + name: name.iter().copied().collect(), + id, + num_entries: Some(num_entries), + children, + }); + Ok(( + index, + Some(WrittenTree { + id, + num_entries, + cache_tree, + }), + )) + } + + fn assure_unmerged(&self, entry: &Entry, path: &BStr) -> Result<(), Error> { + let stage = entry.stage(); + if stage != Stage::Unconflicted { + return Err(Error::Unmerged { + path: path.into(), + stage: stage as u32, + }); + } + Ok(()) + } + + fn write_entry( + &self, + tree_data: &mut Vec, + path: &BStr, + filename: &BStr, + entry: &Entry, + ) -> Result<(), Error> { + let mode = entry.mode.to_tree_entry_mode().ok_or_else(|| Error::InvalidMode { + path: path.into(), + mode: entry.mode, + })?; + self.validate_component( + path, + filename, + mode.is_link().then_some(gix_validate::path::component::Mode::Symlink), + )?; + if !self.options.missing_ok && !mode.is_commit() && !self.objects.exists(&entry.id) { + return Err(Error::MissingObject { + path: path.into(), + id: entry.id, + }); + } + + encode_entry(tree_data, mode, filename, &entry.id)?; + Ok(()) + } + + fn validate_component( + &self, + path: &BStr, + component: &BStr, + mode: Option, + ) -> Result<(), Error> { + gix_validate::path::component(component, mode, self.options.validate) + .map(|_| ()) + .map_err(|source| Error::InvalidComponent { + path: path.into(), + source, + }) + } + } + + fn encode_entry( + out: &mut Vec, + mode: EntryMode, + filename: &BStr, + id: &gix_hash::oid, + ) -> Result<(), gix_object::write::Error> { + out.write_all(mode.as_bytes(&mut Default::default()))?; + out.write_all(b" ")?; + out.write_all(filename)?; + out.write_all(b"\0")?; + out.write_all(id.as_bytes())?; + Ok(()) + } + + use tree::EntryKind; +} diff --git a/gix-index/tests/index/init.rs b/gix-index/tests/index/init.rs index 85f6b144def..84c0f174581 100644 --- a/gix-index/tests/index/init.rs +++ b/gix-index/tests/index/init.rs @@ -1,6 +1,7 @@ -use std::{error::Error, path::Path}; +use std::{cell::RefCell, collections::HashSet, error::Error, io::Read, path::Path}; use crate::scripted_fixture_read_only; +use bstr::ByteSlice; use gix_index::State; #[test] @@ -76,6 +77,124 @@ fn from_tree_returns_file_directory_conflicts_until_fixed() -> crate::Result { Ok(()) } +#[test] +fn to_tree_roundtrips_to_fixture_tree() -> crate::Result { + let fixtures = [ + "make_index/v2.sh", + "make_index/v2_deeper_tree.sh", + "make_index/v2_all_file_kinds.sh", + "make_index/v3_added_files.sh", + "make_index/v3_sparse_index.sh", + "make_index/v4_more_files_IEOT.sh", + ]; + + for fixture in fixtures { + let worktree_dir = scripted_fixture_read_only(fixture)?; + let expected_tree_id = tree_id(&worktree_dir); + let git_dir = worktree_dir.join(".git"); + let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default())?; + let objects = MemoryDb::exists_all(gix_hash::Kind::Sha1); + + let actual_tree_id = index.to_tree(&objects, Default::default())?; + assert_eq!(actual_tree_id, expected_tree_id, "tree mismatch in {fixture:?}"); + } + Ok(()) +} + +#[test] +fn to_tree_empty_index_is_empty_tree() -> crate::Result { + let mut state = State::new(gix_hash::Kind::Sha1); + let objects = MemoryDb::exists_all(gix_hash::Kind::Sha1); + + let actual = state.to_tree(&objects, Default::default())?; + + assert_eq!(actual, gix_hash::ObjectId::empty_tree(gix_hash::Kind::Sha1)); + assert!(state.tree().is_none(), "TREE extension isn't created if absent"); + Ok(()) +} + +#[test] +fn to_tree_rejects_unmerged_entries() { + let mut index = super::Fixture::Loose("conflicting-file").open(); + let objects = MemoryDb::exists_all(gix_hash::Kind::Sha1); + + let err = index.to_tree(&objects, Default::default()).unwrap_err(); + + assert!(matches!(err, gix_index::init::to_tree::Error::Unmerged { .. })); +} + +#[test] +fn to_tree_rejects_file_directory_conflicts() { + let mut state = state_with_entries(["a", "a.b", "a/b"]); + let objects = MemoryDb::exists_all(gix_hash::Kind::Sha1); + + let err = state.to_tree(&objects, Default::default()).unwrap_err(); + + assert!(matches!( + err, + gix_index::init::to_tree::Error::FileDirectoryConflict { .. } + )); +} + +#[test] +fn to_tree_rejects_invalid_components() { + let mut state = state_with_entries(["a//b"]); + let objects = MemoryDb::exists_all(gix_hash::Kind::Sha1); + + let err = state.to_tree(&objects, Default::default()).unwrap_err(); + + assert!(matches!(err, gix_index::init::to_tree::Error::InvalidComponent { .. })); +} + +#[test] +fn to_tree_rejects_missing_objects_unless_allowed() -> crate::Result { + let mut state = state_with_entries(["file"]); + let objects = MemoryDb::written_only(gix_hash::Kind::Sha1); + + let err = state.to_tree(&objects, Default::default()).unwrap_err(); + assert!(matches!(err, gix_index::init::to_tree::Error::MissingObject { .. })); + + let mut options = gix_index::init::to_tree::Options::default(); + options.missing_ok = true; + let actual = state.to_tree(&objects, options)?; + assert_ne!(actual, gix_hash::Kind::Sha1.null()); + Ok(()) +} + +#[test] +fn to_tree_refreshes_existing_tree_extension() -> crate::Result { + let mut index = super::Fixture::Generated("v2").open(); + let original_cached_tree = index.tree().expect("fixture has TREE extension").id; + index.entries_mut()[0].id = repeated_id(b'b'); + let objects = MemoryDb::exists_all(gix_hash::Kind::Sha1); + + let actual = index.to_tree(&objects, Default::default())?; + + assert_ne!(actual, original_cached_tree); + let tree = index.tree().expect("TREE extension is preserved and refreshed"); + assert_eq!(tree.id, actual); + assert_eq!( + tree.num_entries, + Some(index.entries().len().try_into().expect("small fixture")) + ); + tree.verify(false, gix_object::find::Never)?; + Ok(()) +} + +#[test] +fn to_tree_does_not_create_missing_tree_extension() -> crate::Result { + let worktree_dir = scripted_fixture_read_only("make_index/v2.sh")?; + let odb = gix_odb::at(worktree_dir.join(".git").join("objects"))?; + let mut state = State::from_tree(&tree_id(&worktree_dir), &odb, Default::default())?; + assert!(state.tree().is_none()); + let objects = MemoryDb::exists_all(gix_hash::Kind::Sha1); + + state.to_tree(&objects, Default::default())?; + + assert!(state.tree().is_none()); + Ok(()) +} + #[test] fn new() { let state = State::new(gix_hash::Kind::Sha1); @@ -103,7 +222,91 @@ fn compare_states(actual: &State, expected: &State, fixture: &str) { } fn tree_id(root: &Path) -> gix_hash::ObjectId { - let hex_hash = - std::fs::read_to_string(root.join("head.tree")).expect("head.tree was created by git rev-parse @^{tree}"); + let hex_hash = std::fs::read_to_string(root.join("head.tree")).unwrap_or_else(|_| { + let mut out = std::process::Command::new("git") + .arg("-C") + .arg(root) + .args(["rev-parse", "@^{tree}"]) + .output() + .expect("git can determine the tree id"); + if !out.status.success() { + out = std::process::Command::new("git") + .arg("-C") + .arg(root) + .arg("write-tree") + .output() + .expect("git can write the tree id"); + } + assert!( + out.status.success(), + "git couldn't determine a tree: {}", + String::from_utf8_lossy(&out.stderr) + ); + String::from_utf8(out.stdout).expect("hex tree id is utf8") + }); hex_hash.trim().parse().expect("valid hash") } + +fn state_with_entries(paths: [&str; N]) -> State { + let mut state = State::new(gix_hash::Kind::Sha1); + for path in paths { + state.dangerously_push_entry( + Default::default(), + repeated_id(b'a'), + gix_index::entry::Flags::empty(), + gix_index::entry::Mode::FILE, + path.as_bytes().as_bstr(), + ); + } + state.sort_entries(); + state +} + +fn repeated_id(byte: u8) -> gix_hash::ObjectId { + gix_hash::ObjectId::from_hex(&vec![byte; gix_hash::Kind::Sha1.len_in_hex()]).expect("valid hex") +} + +struct MemoryDb { + object_hash: gix_hash::Kind, + exists_all: bool, + written: RefCell>, +} + +impl MemoryDb { + fn exists_all(object_hash: gix_hash::Kind) -> Self { + MemoryDb { + object_hash, + exists_all: true, + written: Default::default(), + } + } + + fn written_only(object_hash: gix_hash::Kind) -> Self { + MemoryDb { + object_hash, + exists_all: false, + written: Default::default(), + } + } +} + +impl gix_object::Exists for MemoryDb { + fn exists(&self, id: &gix_hash::oid) -> bool { + self.exists_all || self.written.borrow().contains(id) + } +} + +impl gix_object::Write for MemoryDb { + fn write_stream( + &self, + kind: gix_object::Kind, + _size: u64, + from: &mut dyn Read, + ) -> Result { + let mut buf = Vec::new(); + from.read_to_end(&mut buf)?; + let id = gix_object::compute_hash(self.object_hash, kind, &buf)?; + self.written.borrow_mut().insert(id); + Ok(id) + } +} From d3c7ba977c694ec38ed5482466658292fe1325d1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 30 Apr 2026 10:05:08 +0800 Subject: [PATCH 2/3] invalidate tree cache TBD --- gitoxide-core/src/index/checkout.rs | 2 +- gix-dir/tests/walk_utils/mod.rs | 2 +- gix-index/src/access/mod.rs | 40 +++++++++++++++-- gix-index/src/extension/mod.rs | 17 +++++++ gix-index/src/init.rs | 3 ++ gix-index/tests/index/access.rs | 47 ++++++++++++++++++++ gix-index/tests/index/file/write.rs | 4 +- gix-index/tests/index/init.rs | 17 +++++++ gix-status/tests/status/index_as_worktree.rs | 9 +++- gix/src/status/iter/types.rs | 2 +- 10 files changed, 134 insertions(+), 9 deletions(-) diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs index 8c71380e184..3927cb90070 100644 --- a/gitoxide-core/src/index/checkout.rs +++ b/gitoxide-core/src/index/checkout.rs @@ -44,7 +44,7 @@ pub fn checkout_exclusive( } else { gix::index::entry::Mode::SYMLINK }; - for entry in index.entries_mut().iter_mut().filter(|e| { + for entry in index.entries_mut_keep_tree_cache().iter_mut().filter(|e| { e.mode .contains(maybe_symlink_mode | gix::index::entry::Mode::DIR | gix::index::entry::Mode::COMMIT) }) { diff --git a/gix-dir/tests/walk_utils/mod.rs b/gix-dir/tests/walk_utils/mod.rs index 44ea60e20ff..99d5beda8c3 100644 --- a/gix-dir/tests/walk_utils/mod.rs +++ b/gix-dir/tests/walk_utils/mod.rs @@ -301,7 +301,7 @@ pub fn try_collect_filtered_opts( ); if fresh_index { index - .entries_mut() + .entries_mut_keep_tree_cache() .iter_mut() .filter(|e| { // relevant for partial checkouts, all related entries will have skip-worktree set, diff --git a/gix-index/src/access/mod.rs b/gix-index/src/access/mod.rs index cb5dba26ed7..4f321dc6442 100644 --- a/gix-index/src/access/mod.rs +++ b/gix-index/src/access/mod.rs @@ -63,6 +63,7 @@ impl State { &'state mut self, backing: &'backing PathStorageRef, ) -> impl Iterator { + self.invalidate_tree_cache(); self.entries.iter_mut().map(move |e| { let path = backing[e.path.clone()].as_bstr(); (e, path) @@ -489,21 +490,43 @@ impl State { self.path_backing.is_empty(), "BUG: return path backing only after taking it, once" ); + self.invalidate_tree_cache(); self.path_backing = backing; } - /// Return mutable entries in a slice. + /// Return mutable entries in a slice and invalidate the TREE extension, if present. + /// + /// Prefer [`entries_mut_keep_tree_cache()`][Self::entries_mut_keep_tree_cache()] if only tree-neutral fields + /// are changed. pub fn entries_mut(&mut self) -> &mut [Entry] { + self.invalidate_tree_cache(); + &mut self.entries + } + + /// Return mutable entries in a slice without invalidating the TREE extension. + /// + /// Use this only for mutations that cannot change the tree produced by [`State::to_tree()`][crate::State::to_tree()]. + /// This includes `stat` updates, storage metadata like [`EXTENDED`][entry::Flags::EXTENDED], and flags that only + /// describe worktree/cache status, like [`ASSUME_VALID`][entry::Flags::ASSUME_VALID], + /// [`UPTODATE`][entry::Flags::UPTODATE], [`FSMONITOR_VALID`][entry::Flags::FSMONITOR_VALID], or + /// [`SKIP_WORKTREE`][entry::Flags::SKIP_WORKTREE]. + /// + /// Do not use this method to change object ids, modes, stage bits, paths, entry ordering, or flags that affect + /// tree construction like [`REMOVE`][entry::Flags::REMOVE] or [`INTENT_TO_ADD`][entry::Flags::INTENT_TO_ADD]. + /// Use [`entries_mut()`][Self::entries_mut()] instead for those changes. + pub fn entries_mut_keep_tree_cache(&mut self) -> &mut [Entry] { &mut self.entries } /// Return a writable slice to entries and read-access to their path storage at the same time. pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorageRef) { + self.invalidate_tree_cache(); (&mut self.entries, &self.path_backing) } /// Return mutable entries along with their paths in an iterator. pub fn entries_mut_with_paths(&mut self) -> impl Iterator { + self.invalidate_tree_cache(); let paths = &self.path_backing; self.entries.iter_mut().map(move |e| { let path = paths[e.path.clone()].as_bstr(); @@ -526,6 +549,7 @@ impl State { self.path_backing.is_empty(), "BUG: cannot take out backing multiple times" ); + self.invalidate_tree_cache(); std::mem::take(&mut self.path_backing) } @@ -534,8 +558,9 @@ impl State { /// /// The `path` must use the repository-relative, slash-separated [`State`] path format. pub fn entry_mut_by_path_and_stage(&mut self, path: &BStr, stage: entry::Stage) -> Option<&mut Entry> { - self.entry_index_by_path_and_stage(path, stage) - .map(|idx| &mut self.entries[idx]) + let idx = self.entry_index_by_path_and_stage(path, stage)?; + self.invalidate_tree_cache(); + Some(&mut self.entries[idx]) } /// Push a new entry containing `stat`, `id`, `flags` and `mode` and `path` to the end of our storage, without performing @@ -558,6 +583,7 @@ impl State { mode: entry::Mode, path: &BStr, ) { + self.invalidate_tree_cache(); let path = { let path_start = self.path_backing.len(); self.path_backing.push_str(path); @@ -603,6 +629,7 @@ impl State { /// To implement this operation typically, one would rather add [entry::Flags::REMOVE] to each entry to remove /// them when [writing the index](Self::write_to()). pub fn remove_entries(&mut self, mut should_remove: impl FnMut(usize, &BStr, &mut Entry) -> bool) { + self.invalidate_tree_cache(); let mut index = 0; let paths = &self.path_backing; self.entries.retain_mut(|e| { @@ -620,8 +647,15 @@ impl State { /// Note that the memory used for the removed entries paths is not freed, as it's append-only, and /// that some extensions might refer to paths which are now deleted. pub fn remove_entry_at_index(&mut self, index: usize) -> Entry { + self.invalidate_tree_cache(); self.entries.remove(index) } + + fn invalidate_tree_cache(&mut self) { + if let Some(tree) = self.tree.as_mut() { + tree.invalidate_recursively(); + } + } } /// Extensions diff --git a/gix-index/src/extension/mod.rs b/gix-index/src/extension/mod.rs index 1cf9d4c98a7..21cfc677890 100644 --- a/gix-index/src/extension/mod.rs +++ b/gix-index/src/extension/mod.rs @@ -34,6 +34,23 @@ pub struct Tree { pub children: Vec, } +impl Tree { + /// Return true if this tree and all child trees are valid and their tree objects exist in `objects`. + pub fn is_fully_valid(&self, objects: &impl gix_object::Exists) -> bool { + self.num_entries.is_some() + && objects.exists(&self.id) + && self.children.iter().all(|child| child.is_fully_valid(objects)) + } + + /// Invalidate this tree and all child trees. + pub(crate) fn invalidate_recursively(&mut self) { + self.num_entries = None; + for child in &mut self.children { + child.invalidate_recursively(); + } + } +} + /// The link extension to track a shared index. #[derive(Clone)] pub struct Link { diff --git a/gix-index/src/init.rs b/gix-index/src/init.rs index 515355eaaf5..530aef1c066 100644 --- a/gix-index/src/init.rs +++ b/gix-index/src/init.rs @@ -294,6 +294,9 @@ pub mod to_tree { { let _span = gix_features::trace::coarse!("gix_index::State::to_tree()"); self.verify_entries()?; + if let Some(tree) = self.tree.as_ref().filter(|tree| tree.is_fully_valid(&objects)) { + return Ok(tree.id); + } let update_tree_cache = self.tree.is_some(); let mut builder = Builder { diff --git a/gix-index/tests/index/access.rs b/gix-index/tests/index/access.rs index 3ec58a8344d..3894f2cabd6 100644 --- a/gix-index/tests/index/access.rs +++ b/gix-index/tests/index/access.rs @@ -283,6 +283,46 @@ fn remove_entries() { file.remove_entries(|_, _, _| unreachable!("should not be called")); } +#[test] +fn entries_mut_invalidates_tree_cache() { + let mut file = Fixture::Generated("v2_more_files").open(); + assert!(file + .tree() + .expect("TREE extension is present") + .children + .iter() + .all(|tree| tree.num_entries.is_some())); + + file.entries_mut()[0].stat.size = 42; + + let tree = file.tree().expect("TREE extension remains present"); + assert_tree_cache_is_invalid(tree); +} + +#[test] +fn entries_mut_keep_tree_cache_preserves_tree_cache() { + let mut file = Fixture::Generated("v2_more_files").open(); + let tree_before = file.tree().expect("TREE extension is present").clone(); + + file.entries_mut_keep_tree_cache()[0].stat.size = 42; + + assert_eq!( + file.tree(), + Some(&tree_before), + "tree-neutral mutations may preserve the TREE extension" + ); +} + +#[test] +fn take_path_backing_invalidates_tree_cache() { + let mut file = Fixture::Generated("v2_more_files").open(); + + let _backing = file.take_path_backing(); + + let tree = file.tree().expect("TREE extension remains present"); + assert_tree_cache_is_invalid(tree); +} + #[test] fn remove_entry_at_index() { let mut file = Fixture::Loose("conflicting-file").open(); @@ -295,6 +335,13 @@ fn remove_entry_at_index() { assert_eq!(file.entries().len(), 0); } +fn assert_tree_cache_is_invalid(tree: &gix_index::extension::Tree) { + assert_eq!(tree.num_entries, None); + for child in &tree.children { + assert_tree_cache_is_invalid(child); + } +} + #[test] fn sort_entries() { let mut file = Fixture::Generated("v4_more_files_IEOT").open(); diff --git a/gix-index/tests/index/file/write.rs b/gix-index/tests/index/file/write.rs index b272258904d..f6c7fde7da8 100644 --- a/gix-index/tests/index/file/write.rs +++ b/gix-index/tests/index/file/write.rs @@ -172,7 +172,9 @@ fn state_comparisons_with_various_extension_configurations() { fn extended_flags_automatically_upgrade_the_version_to_avoid_data_loss() -> crate::Result { let mut expected = Generated("v2").open(); assert_eq!(expected.version(), Version::V2); - expected.entries_mut()[0].flags.insert(entry::Flags::EXTENDED); + expected.entries_mut_keep_tree_cache()[0] + .flags + .insert(entry::Flags::EXTENDED); let mut buf = Vec::new(); let (actual_version, _digest) = expected.write_to(&mut buf, Default::default())?; diff --git a/gix-index/tests/index/init.rs b/gix-index/tests/index/init.rs index 84c0f174581..63721231db7 100644 --- a/gix-index/tests/index/init.rs +++ b/gix-index/tests/index/init.rs @@ -181,6 +181,23 @@ fn to_tree_refreshes_existing_tree_extension() -> crate::Result { Ok(()) } +#[test] +fn to_tree_reuses_fully_valid_tree_extension() -> crate::Result { + let mut index = super::Fixture::Generated("v2").open(); + let original_cached_tree = index.tree().expect("fixture has TREE extension").id; + index.entries_mut_keep_tree_cache()[0].stat.size = 42; + let objects = MemoryDb::exists_all(gix_hash::Kind::Sha1); + + let actual = index.to_tree(&objects, Default::default())?; + + assert_eq!(actual, original_cached_tree); + assert!( + objects.written.borrow().is_empty(), + "a fully-valid TREE cache can be reused without writing objects" + ); + Ok(()) +} + #[test] fn to_tree_does_not_create_missing_tree_extension() -> crate::Result { let worktree_dir = scripted_fixture_read_only("make_index/v2.sh")?; diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 57e36674498..192bf3282ef 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -832,7 +832,12 @@ fn refresh() { assert_eq!( fixture_with_index( "status_changed", - |index| { index.entries_mut().iter_mut().for_each(|e| e.stat = Default::default()) }, + |index| { + index + .entries_mut_keep_tree_cache() + .iter_mut() + .for_each(|e| e.stat = Default::default()) + }, #[cfg(not(windows))] &[ ( @@ -1032,7 +1037,7 @@ fn racy_git() { // change. // This case doesn't happen in the realworld (except for file corruption) but // makes sure we are actually hitting the right codepath. - index.entries_mut()[0].stat.mtime.secs = timestamp; + index.entries_mut_keep_tree_cache()[0].stat.mtime.secs = timestamp; set_file_mtime( worktree.join("content"), FileTime::from_unix_time(i64::from(timestamp), 0), diff --git a/gix/src/status/iter/types.rs b/gix/src/status/iter/types.rs index c6ce3470513..09417a0706e 100644 --- a/gix/src/status/iter/types.rs +++ b/gix/src/status/iter/types.rs @@ -92,7 +92,7 @@ impl Outcome { IndexPersistedOrInMemory::InMemory(index) => index.clone(), }; - let entries = index.entries_mut(); + let entries = index.entries_mut_keep_tree_cache(); for (entry_index, change) in changes { let entry = &mut entries[entry_index]; match change { From 5f4fae61efc5be3f833072f9458d5199c597fe1f Mon Sep 17 00:00:00 2001 From: Codex GPT-5 Date: Thu, 30 Apr 2026 10:30:22 +0800 Subject: [PATCH 3/3] Fix PR CI fallout The PR CI lint job failed with clippy::semicolon-if-nothing-returned in gix-status/tests/status/index_as_worktree.rs after the keep-tree-cache call was expanded into a block closure. Add the requested semicolon so the closure has an explicit unit statement. The fast-test matrix also failed after tests regenerated gix-index/tests/fixtures/generated-archives/v2_deeper_tree.tar and git diff reported the archive had changed. The archive still carried an older fixture identity, so regenerate it from the current v2_deeper_tree fixture script identity to prevent CI from rewriting it. --- .../generated-archives/v2_deeper_tree.tar | Bin 78336 -> 81408 bytes gix-index/tests/index/init.rs | 6 ++++-- gix-status/tests/status/index_as_worktree.rs | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gix-index/tests/fixtures/generated-archives/v2_deeper_tree.tar b/gix-index/tests/fixtures/generated-archives/v2_deeper_tree.tar index c5ec346f6f07407cc1e48138ff9d2ddcc88c1ba3..815d09e29987be3c6892d6dc594aa2df154c0f82 100644 GIT binary patch delta 2755 zcmb_e-EZ4e6i=WX)N~U}sH@fi7dNcQ6sO6Wc2H@EZVjdTstBPQ4-jtc>*P|`cKwmI zFz}FA5ie+~Cxl=xLm(bN3~l0L-G>B!!4Qu~fW`)Bd@B=txBjfBMH!kk}G&26kPamzHKHheeJACl@SCPZz@J!GB{B+l; z_r77qu2XZh$haJKZV=F2%jw^^ae38~k?}MbH-#^3SS3y`AA43f?9WFIXTqcP)2G*3 z!uWHYm$Qtwu;Z@Fcec-(HS^c`pE|dlef`K^mkzx%`{mA)kM1}g5AF6HC(yxa_fRgD z%jSo2+0EI_1KC(Mo6F}1w#2d_Zr4u6U)k5bN};Ak=1)HH>am-b(zmT~3Ea+LYA7{q z&~Wwe&ZXtH!_}Vk`Fu9N5^E@z%h$1X_rDno+p%ni0+`erem2>iupLq(riZ*L5spbf(j99C z;1~vAdEwnJXM@%PSUPe>>V;_A7LZ1C>>FO1MVzCCfh^NlKx&n!(*TSGI!`^x5>>GY z=qwq75)GV@XW7Wo0gR;)p&{RPJ%?b(#KC3&WzSKsYWW^gEmJqB>bZ!Tz==!Xfrgz0 zG-o-}aD-&gi0%;w@EF;UD}@+I*H=~Iy1EbfImooU6vX5a4eWXmnec=mGDg$f7GVaA zSY8`st`UQHL=*LwAZ6V$3~P?@#d4z+hzlf78N{s@>k?qW0#2}5AuVM(WX58>H8qG3 z-8h6L2E<{9Am0U&0yxYFTi|ComjI3{n3}FP%o~At<9W9;QQf2{z>^WD!%#V`P%{Og zuJ2xi=vPiWQWzOcNdaqhk=O=SMf#4ToXE*iM=uIkyx~R$ObXYEl_Z%;2Dvr? z5pw-OPf{7F!*hHhJ>L{B8lkG_BEq?E-HBTzNt;9K=&I&FSqs<-ic;SvLi&(k1!l3a&Q^>{3T@oT zfb=Qq6npIIp2oTmZ zr0DRc@D4B$%89yJMl$5=_6r8w{)G64U3Hn4)Iy0P23f%{CRa1DH(I03_<=+Wmq;D; zfGg;^4ySdbl%!KO6fo69ty)NK1Z|W)jELh{&M?y9B?$`;b_KkfY_YkAMa2 zu{9$^)L2PkMQ{r-;Qc3cG)yhqihDqPQG|wM|4Fe-(pT9_6Qz+|^)y=g<(tz`=#ARN zl#K-1)BX9LCL3Q~UPSN42E2xaTP?-Yh8ho^hfA*CvJ53<* crate::Result { let err = state.to_tree(&objects, Default::default()).unwrap_err(); assert!(matches!(err, gix_index::init::to_tree::Error::MissingObject { .. })); - let mut options = gix_index::init::to_tree::Options::default(); - options.missing_ok = true; + let options = gix_index::init::to_tree::Options { + missing_ok: true, + ..Default::default() + }; let actual = state.to_tree(&objects, options)?; assert_ne!(actual, gix_hash::Kind::Sha1.null()); Ok(()) diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 192bf3282ef..46fc5ace6ae 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -836,7 +836,7 @@ fn refresh() { index .entries_mut_keep_tree_cache() .iter_mut() - .for_each(|e| e.stat = Default::default()) + .for_each(|e| e.stat = Default::default()); }, #[cfg(not(windows))] &[