Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions crates/fff-c/src/ffi_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ impl From<&FileItem> for FffFileItem {
git_status: cstring_new(format_git_status(item.git_status)),
size: item.size,
modified: item.modified,
access_frecency_score: item.access_frecency_score,
modification_frecency_score: item.modification_frecency_score,
total_frecency_score: item.total_frecency_score,
access_frecency_score: item.access_frecency_score as i64,
modification_frecency_score: item.modification_frecency_score as i64,
total_frecency_score: item.total_frecency_score as i64,
is_binary: item.is_binary,
}
}
Expand Down Expand Up @@ -322,9 +322,9 @@ impl FffGrepMatch {
context_after,
size: file.size,
modified: file.modified,
total_frecency_score: file.total_frecency_score,
access_frecency_score: file.access_frecency_score,
modification_frecency_score: file.modification_frecency_score,
total_frecency_score: file.total_frecency_score as i64,
access_frecency_score: file.access_frecency_score as i64,
modification_frecency_score: file.modification_frecency_score as i64,
line_number: m.line_number,
byte_offset: m.byte_offset,
col: m.col as u32,
Expand Down
10 changes: 8 additions & 2 deletions crates/fff-c/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,8 +403,14 @@ pub unsafe extern "C" fn fff_live_grep(
classify_definitions,
};

let result =
fff::grep::grep_search(picker.get_files(), &parsed, &options, picker.cache_budget());
let result = fff::grep::grep_search(
picker.get_files(),
&parsed,
&options,
picker.cache_budget(),
None,
None,
);
let grep_result = FffGrepResult::from_core(&result);
FffResult::ok_handle(grep_result as *mut c_void)
}
Expand Down
12 changes: 12 additions & 0 deletions crates/fff-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ crate-type = ["rlib", "staticlib", "cdylib"]
default = []
# Enable C FFI exports
ffi = []
# Call mi_collect(true) after large allocator churn (bigram build).
# Requires mimalloc to be the global allocator (linked by fff-nvim).
mimalloc-collect = ["dep:libmimalloc-sys"]
# Use zlob (Zig-compiled C globbing library) for glob matching.
# Requires Zig to be installed. When disabled, falls back to globset (pure Rust).
zlob = ["dep:zlob", "fff-query-parser/zlob"]
Expand Down Expand Up @@ -55,6 +58,7 @@ toml = "0.8"
tracing-appender = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
zlob = { workspace = true, optional = true }
libmimalloc-sys = { version = "0.1", optional = true, features = ["extended"] }
# Platform-specific: dunce for Windows to avoid \\?\ extended path prefix
[target.'cfg(windows)'.dependencies]
dunce = { workspace = true }
Expand All @@ -63,3 +67,11 @@ dunce = { workspace = true }
criterion = { version = "0.5", features = ["html_reports"] }
rand = { version = "0.8", features = ["small_rng"] }
tempfile = "3.8"

[[bench]]
name = "parse_bench"
harness = false

[[bench]]
name = "bigram_bench"
harness = false
119 changes: 119 additions & 0 deletions crates/fff-core/benches/bigram_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
use fff_search::types::{BigramFilter, BigramIndexBuilder};

/// Build a realistic bigram index for benchmarking.
/// Simulates a large repo by generating varied content per file.
fn build_test_index(file_count: usize) -> BigramFilter {
let builder = BigramIndexBuilder::new(file_count);

for i in 0..file_count {
// Generate varied content so we get a mix of sparse and dense columns
let content = format!(
"struct File{i} {{ fn process() {{ let controller = read(path); }} }} // module {i}"
);
builder.add_file_content(i, content.as_bytes());
}

builder.compress()
}

fn bench_bigram_query(c: &mut Criterion) {
let file_counts = [10_000, 100_000, 500_000];

for &file_count in &file_counts {
let index = build_test_index(file_count);
eprintln!(
"Index ({} files): {} columns ({} dense, {} sparse)",
file_count,
index.columns_used(),
index.dense_columns(),
index.sparse_columns(),
);

let mut group = c.benchmark_group(format!("bigram_query_{file_count}"));
group.sample_size(500);

let queries: &[(&str, &[u8])] = &[
("short_2char", b"st"),
("medium_6char", b"struct"),
("long_14char", b"let controller"),
("multi_word", b"fn process"),
];

for (name, query) in queries {
group.bench_with_input(BenchmarkId::from_parameter(name), query, |b, q| {
b.iter(|| {
let result = index.query(black_box(q));
black_box(&result);
});
});
}

group.finish();
}
}

fn bench_bigram_is_candidate(c: &mut Criterion) {
let index = build_test_index(500_000);
let candidates = index.query(b"struct").unwrap();

c.bench_function("is_candidate_500k", |b| {
b.iter(|| {
let mut count = 0u32;
for i in 0..500_000 {
if BigramFilter::is_candidate(black_box(&candidates), i) {
count += 1;
}
}
black_box(count)
});
});

c.bench_function("count_candidates_500k", |b| {
b.iter(|| BigramFilter::count_candidates(black_box(&candidates)));
});
}

fn bench_bigram_build(c: &mut Criterion) {
let mut group = c.benchmark_group("bigram_build");
group.sample_size(10);

let file_counts = [10_000, 100_000];

for &file_count in &file_counts {
// Pre-generate content so we only measure index building
let contents: Vec<String> = (0..file_count)
.map(|i| {
format!(
"struct File{i} {{ fn process() {{ let controller = read(path); }} }} // mod {i}"
)
})
.collect();

group.bench_with_input(
BenchmarkId::new("build_and_compress", file_count),
&file_count,
|b, &fc| {
b.iter(|| {
let builder = BigramIndexBuilder::new(fc);
for (i, content) in contents.iter().enumerate() {
builder.add_file_content(i, content.as_bytes());
}
let index = builder.compress();
black_box(index.columns_used())
});
},
);
}

group.finish();
}

criterion_group!(
benches,
bench_bigram_query,
bench_bigram_is_candidate,
bench_bigram_build,
);

criterion_main!(benches);
31 changes: 27 additions & 4 deletions crates/fff-core/src/constraints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,32 @@ use smallvec::SmallVec;

use crate::git::is_modified_status;

/// Case-insensitive ASCII substring search without allocation.
/// `needle` must already be lowercase.
#[inline]
fn contains_ascii_ci(haystack: &str, needle: &str) -> bool {
let h = haystack.as_bytes();
let n = needle.as_bytes();
if n.len() > h.len() {
return false;
}
if n.is_empty() {
return true;
}
let first = n[0];
for i in 0..=(h.len() - n.len()) {
if h[i].to_ascii_lowercase() == first
&& h[i..i + n.len()]
.iter()
.zip(n)
.all(|(a, b)| a.to_ascii_lowercase() == *b)
{
return true;
}
}
false
}

/// Minimum item count before switching to parallel iteration with rayon.
/// Below this threshold, the overhead of thread pool dispatch outweighs the benefit.
const PAR_THRESHOLD: usize = 10_000;
Expand All @@ -22,9 +48,6 @@ pub trait Constrainable {
/// The file's relative path (e.g. "src/main.rs")
fn relative_path(&self) -> &str;

/// The file's lowercased relative path for case-insensitive matching
fn relative_path_lower(&self) -> &str;

/// The file name component (e.g. "main.rs")
fn file_name(&self) -> &str;

Expand Down Expand Up @@ -152,7 +175,7 @@ fn item_matches_constraint_at_index<T: Constrainable>(
}

// only works with negation
Constraint::Text(text) => item.relative_path_lower().contains(text),
Constraint::Text(text) => contains_ascii_ci(item.relative_path(), text),

// Parts and Exclude are handled at a higher level
Constraint::Parts(_) | Constraint::Exclude(_) | Constraint::FileType(_) => true,
Expand Down
Loading