Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gitoxide-core/src/index/checkout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub fn checkout_exclusive(
} else {
gix::index::entry::Mode::SYMLINK
};
for entry in index.entries_mut().iter_mut().filter(|e| {
for entry in index.entries_mut_keep_tree_cache().iter_mut().filter(|e| {
e.mode
.contains(maybe_symlink_mode | gix::index::entry::Mode::DIR | gix::index::entry::Mode::COMMIT)
}) {
Expand Down
2 changes: 1 addition & 1 deletion gix-dir/tests/walk_utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ pub fn try_collect_filtered_opts(
);
if fresh_index {
index
.entries_mut()
.entries_mut_keep_tree_cache()
.iter_mut()
.filter(|e| {
// relevant for partial checkouts, all related entries will have skip-worktree set,
Expand Down
5 changes: 5 additions & 0 deletions gix-index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ name = "from-tree"
harness = false
path = "./benches/from_tree.rs"

[[bench]]
name = "to-tree"
harness = false
path = "./benches/to_tree.rs"

[features]
## Enable support for the SHA-1 hash by enabling the respective feature in the `gix-hash` crate.
sha1 = ["gix-hash/sha1"]
Expand Down
109 changes: 109 additions & 0 deletions gix-index/benches/to_tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
use std::{hint::black_box, io::Read};

use bstr::ByteSlice;
use criterion::{Criterion, Throughput, criterion_group, criterion_main};
use gix_index::{
State,
entry::{Flags, Mode},
};

fn to_tree(c: &mut Criterion) {
let objects = MemoryDb {
object_hash: gix_hash::Kind::Sha1,
};
let mut group = c.benchmark_group("to_tree");

let mut flat = State::new(gix_hash::Kind::Sha1);
for idx in 0..10_000 {
flat.dangerously_push_entry(
Default::default(),
repeated_id(b'a'),
Flags::empty(),
Mode::FILE,
format!("file-{idx:05}").as_bytes().as_bstr(),
);
}
group.throughput(Throughput::Elements(flat.entries().len() as u64));
group.bench_function("flat 10k files", |b| {
b.iter(|| {
let id = flat.to_tree(&objects, Default::default()).expect("tree can be written");
black_box(id);
});
});

let mut wide_deep = State::new(gix_hash::Kind::Sha1);
for dir_idx in 0..100 {
for file_idx in 0..100 {
wide_deep.dangerously_push_entry(
Default::default(),
repeated_id(b'a'),
Flags::empty(),
Mode::FILE,
format!("dir-{dir_idx:03}/file-{file_idx:03}").as_bytes().as_bstr(),
);
}
}
group.throughput(Throughput::Elements(wide_deep.entries().len() as u64));
group.bench_function("wide 100 x 100 files", |b| {
b.iter(|| {
let id = wide_deep
.to_tree(&objects, Default::default())
.expect("tree can be written");
black_box(id);
});
});

let mut sparse = State::new(gix_hash::Kind::Sha1);
for idx in 0..10_000 {
sparse.dangerously_push_entry(
Default::default(),
repeated_id(b't'),
Flags::empty(),
Mode::DIR,
format!("sparse-{idx:05}/").as_bytes().as_bstr(),
);
}
group.throughput(Throughput::Elements(sparse.entries().len() as u64));
group.bench_function("sparse 10k directories", |b| {
b.iter(|| {
let id = sparse
.to_tree(&objects, Default::default())
.expect("tree can be written");
black_box(id);
});
});
}

criterion_group!(benches, to_tree);
criterion_main!(benches);

struct MemoryDb {
object_hash: gix_hash::Kind,
}

impl gix_object::Exists for MemoryDb {
fn exists(&self, _id: &gix_hash::oid) -> bool {
true
}
}

impl gix_object::Write for MemoryDb {
fn write_buf(&self, kind: gix_object::Kind, from: &[u8]) -> Result<gix_hash::ObjectId, gix_object::write::Error> {
Ok(gix_object::compute_hash(self.object_hash, kind, from)?)
}

fn write_stream(
&self,
kind: gix_object::Kind,
_size: u64,
from: &mut dyn Read,
) -> Result<gix_hash::ObjectId, gix_object::write::Error> {
let mut buf = Vec::new();
from.read_to_end(&mut buf)?;
self.write_buf(kind, &buf)
}
}

fn repeated_id(byte: u8) -> gix_hash::ObjectId {
gix_hash::ObjectId::from_hex(&vec![byte; gix_hash::Kind::Sha1.len_in_hex()]).expect("valid hex")
}
40 changes: 37 additions & 3 deletions gix-index/src/access/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ impl State {
&'state mut self,
backing: &'backing PathStorageRef,
) -> impl Iterator<Item = (&'state mut Entry, &'backing BStr)> {
self.invalidate_tree_cache();
self.entries.iter_mut().map(move |e| {
let path = backing[e.path.clone()].as_bstr();
(e, path)
Expand Down Expand Up @@ -489,21 +490,43 @@ impl State {
self.path_backing.is_empty(),
"BUG: return path backing only after taking it, once"
);
self.invalidate_tree_cache();
self.path_backing = backing;
}

/// Return mutable entries in a slice.
/// Return mutable entries in a slice and invalidate the TREE extension, if present.
///
/// Prefer [`entries_mut_keep_tree_cache()`][Self::entries_mut_keep_tree_cache()] if only tree-neutral fields
/// are changed.
pub fn entries_mut(&mut self) -> &mut [Entry] {
self.invalidate_tree_cache();
&mut self.entries
}

/// Return mutable entries in a slice without invalidating the TREE extension.
///
/// Use this only for mutations that cannot change the tree produced by [`State::to_tree()`][crate::State::to_tree()].
/// This includes `stat` updates, storage metadata like [`EXTENDED`][entry::Flags::EXTENDED], and flags that only
/// describe worktree/cache status, like [`ASSUME_VALID`][entry::Flags::ASSUME_VALID],
/// [`UPTODATE`][entry::Flags::UPTODATE], [`FSMONITOR_VALID`][entry::Flags::FSMONITOR_VALID], or
/// [`SKIP_WORKTREE`][entry::Flags::SKIP_WORKTREE].
///
/// Do not use this method to change object ids, modes, stage bits, paths, entry ordering, or flags that affect
/// tree construction like [`REMOVE`][entry::Flags::REMOVE] or [`INTENT_TO_ADD`][entry::Flags::INTENT_TO_ADD].
/// Use [`entries_mut()`][Self::entries_mut()] instead for those changes.
pub fn entries_mut_keep_tree_cache(&mut self) -> &mut [Entry] {
&mut self.entries
}

/// Return a writable slice to entries and read-access to their path storage at the same time.
pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorageRef) {
self.invalidate_tree_cache();
(&mut self.entries, &self.path_backing)
}

/// Return mutable entries along with their paths in an iterator.
pub fn entries_mut_with_paths(&mut self) -> impl Iterator<Item = (&mut Entry, &BStr)> {
self.invalidate_tree_cache();
let paths = &self.path_backing;
self.entries.iter_mut().map(move |e| {
let path = paths[e.path.clone()].as_bstr();
Expand All @@ -526,6 +549,7 @@ impl State {
self.path_backing.is_empty(),
"BUG: cannot take out backing multiple times"
);
self.invalidate_tree_cache();
std::mem::take(&mut self.path_backing)
}

Expand All @@ -534,8 +558,9 @@ impl State {
///
/// The `path` must use the repository-relative, slash-separated [`State`] path format.
pub fn entry_mut_by_path_and_stage(&mut self, path: &BStr, stage: entry::Stage) -> Option<&mut Entry> {
self.entry_index_by_path_and_stage(path, stage)
.map(|idx| &mut self.entries[idx])
let idx = self.entry_index_by_path_and_stage(path, stage)?;
self.invalidate_tree_cache();
Some(&mut self.entries[idx])
}

/// Push a new entry containing `stat`, `id`, `flags` and `mode` and `path` to the end of our storage, without performing
Expand All @@ -558,6 +583,7 @@ impl State {
mode: entry::Mode,
path: &BStr,
) {
self.invalidate_tree_cache();
let path = {
let path_start = self.path_backing.len();
self.path_backing.push_str(path);
Expand Down Expand Up @@ -603,6 +629,7 @@ impl State {
/// To implement this operation typically, one would rather add [entry::Flags::REMOVE] to each entry to remove
/// them when [writing the index](Self::write_to()).
pub fn remove_entries(&mut self, mut should_remove: impl FnMut(usize, &BStr, &mut Entry) -> bool) {
self.invalidate_tree_cache();
let mut index = 0;
let paths = &self.path_backing;
self.entries.retain_mut(|e| {
Expand All @@ -620,8 +647,15 @@ impl State {
/// Note that the memory used for the removed entries paths is not freed, as it's append-only, and
/// that some extensions might refer to paths which are now deleted.
pub fn remove_entry_at_index(&mut self, index: usize) -> Entry {
self.invalidate_tree_cache();
self.entries.remove(index)
}

fn invalidate_tree_cache(&mut self) {
if let Some(tree) = self.tree.as_mut() {
tree.invalidate_recursively();
}
}
}

/// Extensions
Expand Down
17 changes: 17 additions & 0 deletions gix-index/src/extension/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,23 @@ pub struct Tree {
pub children: Vec<Tree>,
}

impl Tree {
/// Return true if this tree and all child trees are valid and their tree objects exist in `objects`.
pub fn is_fully_valid(&self, objects: &impl gix_object::Exists) -> bool {
self.num_entries.is_some()
&& objects.exists(&self.id)
&& self.children.iter().all(|child| child.is_fully_valid(objects))
}

/// Invalidate this tree and all child trees.
pub(crate) fn invalidate_recursively(&mut self) {
self.num_entries = None;
for child in &mut self.children {
child.invalidate_recursively();
}
}
}

/// The link extension to track a shared index.
#[derive(Clone)]
pub struct Link {
Expand Down
Loading