dmtrKovalenko · dmtrKovalenko · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/fff-c/src/ffi_types.rs b/crates/fff-c/src/ffi_types.rs
@@ -85,9 +85,9 @@ impl From<&FileItem> for FffFileItem {
             git_status: cstring_new(format_git_status(item.git_status)),
             size: item.size,
             modified: item.modified,
-            access_frecency_score: item.access_frecency_score,
-            modification_frecency_score: item.modification_frecency_score,
-            total_frecency_score: item.total_frecency_score,
+            access_frecency_score: item.access_frecency_score as i64,
+            modification_frecency_score: item.modification_frecency_score as i64,
+            total_frecency_score: item.total_frecency_score as i64,
             is_binary: item.is_binary,
         }
     }
@@ -322,9 +322,9 @@ impl FffGrepMatch {
             context_after,
             size: file.size,
             modified: file.modified,
-            total_frecency_score: file.total_frecency_score,
-            access_frecency_score: file.access_frecency_score,
-            modification_frecency_score: file.modification_frecency_score,
+            total_frecency_score: file.total_frecency_score as i64,
+            access_frecency_score: file.access_frecency_score as i64,
+            modification_frecency_score: file.modification_frecency_score as i64,
             line_number: m.line_number,
             byte_offset: m.byte_offset,
             col: m.col as u32,

diff --git a/crates/fff-c/src/lib.rs b/crates/fff-c/src/lib.rs
@@ -403,8 +403,14 @@ pub unsafe extern "C" fn fff_live_grep(
         classify_definitions,
     };
 
-    let result =
-        fff::grep::grep_search(picker.get_files(), &parsed, &options, picker.cache_budget());
+    let result = fff::grep::grep_search(
+        picker.get_files(),
+        &parsed,
+        &options,
+        picker.cache_budget(),
+        None,
+        None,
+    );
     let grep_result = FffGrepResult::from_core(&result);
     FffResult::ok_handle(grep_result as *mut c_void)
 }

diff --git a/crates/fff-core/Cargo.toml b/crates/fff-core/Cargo.toml
@@ -14,6 +14,9 @@ crate-type = ["rlib", "staticlib", "cdylib"]
 default = []
 # Enable C FFI exports
 ffi = []
+# Call mi_collect(true) after large allocator churn (bigram build).
+# Requires mimalloc to be the global allocator (linked by fff-nvim).
+mimalloc-collect = ["dep:libmimalloc-sys"]
 # Use zlob (Zig-compiled C globbing library) for glob matching.
 # Requires Zig to be installed. When disabled, falls back to globset (pure Rust).
 zlob = ["dep:zlob", "fff-query-parser/zlob"]
@@ -55,6 +58,7 @@ toml = "0.8"
 tracing-appender = "0.2"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 zlob = { workspace = true, optional = true }
+libmimalloc-sys = { version = "0.1", optional = true, features = ["extended"] }
 # Platform-specific: dunce for Windows to avoid \\?\ extended path prefix
 [target.'cfg(windows)'.dependencies]
 dunce = { workspace = true }
@@ -63,3 +67,11 @@ dunce = { workspace = true }
 criterion = { version = "0.5", features = ["html_reports"] }
 rand = { version = "0.8", features = ["small_rng"] }
 tempfile = "3.8"
+
+[[bench]]
+name = "parse_bench"
+harness = false
+
+[[bench]]
+name = "bigram_bench"
+harness = false
diff --git a/crates/fff-core/benches/bigram_bench.rs b/crates/fff-core/benches/bigram_bench.rs
@@ -0,0 +1,119 @@
+use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
+use fff_search::types::{BigramFilter, BigramIndexBuilder};
+
+/// Build a realistic bigram index for benchmarking.
+/// Simulates a large repo by generating varied content per file.
+fn build_test_index(file_count: usize) -> BigramFilter {
+    let builder = BigramIndexBuilder::new(file_count);
+
+    for i in 0..file_count {
+        // Generate varied content so we get a mix of sparse and dense columns
+        let content = format!(
+            "struct File{i} {{ fn process() {{ let controller = read(path); }} }} // module {i}"
+        );
+        builder.add_file_content(i, content.as_bytes());
+    }
+
+    builder.compress()
+}
+
+fn bench_bigram_query(c: &mut Criterion) {
+    let file_counts = [10_000, 100_000, 500_000];
+
+    for &file_count in &file_counts {
+        let index = build_test_index(file_count);
+        eprintln!(
+            "Index ({} files): {} columns ({} dense, {} sparse)",
+            file_count,
+            index.columns_used(),
+            index.dense_columns(),
+            index.sparse_columns(),
+        );
+
+        let mut group = c.benchmark_group(format!("bigram_query_{file_count}"));
+        group.sample_size(500);
+
+        let queries: &[(&str, &[u8])] = &[
+            ("short_2char", b"st"),
+            ("medium_6char", b"struct"),
+            ("long_14char", b"let controller"),
+            ("multi_word", b"fn process"),
+        ];
+
+        for (name, query) in queries {
+            group.bench_with_input(BenchmarkId::from_parameter(name), query, |b, q| {
+                b.iter(|| {
+                    let result = index.query(black_box(q));
+                    black_box(&result);
+                });
+            });
+        }
+
+        group.finish();
+    }
+}
+
+fn bench_bigram_is_candidate(c: &mut Criterion) {
+    let index = build_test_index(500_000);
+    let candidates = index.query(b"struct").unwrap();
+
+    c.bench_function("is_candidate_500k", |b| {
+        b.iter(|| {
+            let mut count = 0u32;
+            for i in 0..500_000 {
+                if BigramFilter::is_candidate(black_box(&candidates), i) {
+                    count += 1;
+                }
+            }
+            black_box(count)
+        });
+    });
+
+    c.bench_function("count_candidates_500k", |b| {
+        b.iter(|| BigramFilter::count_candidates(black_box(&candidates)));
+    });
+}
+
+fn bench_bigram_build(c: &mut Criterion) {
+    let mut group = c.benchmark_group("bigram_build");
+    group.sample_size(10);
+
+    let file_counts = [10_000, 100_000];
+
+    for &file_count in &file_counts {
+        // Pre-generate content so we only measure index building
+        let contents: Vec<String> = (0..file_count)
+            .map(|i| {
+                format!(
+                    "struct File{i} {{ fn process() {{ let controller = read(path); }} }} // mod {i}"
+                )
+            })
+            .collect();
+
+        group.bench_with_input(
+            BenchmarkId::new("build_and_compress", file_count),
+            &file_count,
+            |b, &fc| {
+                b.iter(|| {
+                    let builder = BigramIndexBuilder::new(fc);
+                    for (i, content) in contents.iter().enumerate() {
+                        builder.add_file_content(i, content.as_bytes());
+                    }
+                    let index = builder.compress();
+                    black_box(index.columns_used())
+                });
+            },
+        );
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_bigram_query,
+    bench_bigram_is_candidate,
+    bench_bigram_build,
+);
+
+criterion_main!(benches);
diff --git a/...s/fff-query-parser/benches/parse_bench.rs → crates/fff-core/benches/parse_bench.rs b/...s/fff-query-parser/benches/parse_bench.rs → crates/fff-core/benches/parse_bench.rs
diff --git a/crates/fff-core/src/constraints.rs b/crates/fff-core/src/constraints.rs
@@ -12,6 +12,32 @@ use smallvec::SmallVec;
 
 use crate::git::is_modified_status;
 
+/// Case-insensitive ASCII substring search without allocation.
+/// `needle` must already be lowercase.
+#[inline]
+fn contains_ascii_ci(haystack: &str, needle: &str) -> bool {
+    let h = haystack.as_bytes();
+    let n = needle.as_bytes();
+    if n.len() > h.len() {
+        return false;
+    }
+    if n.is_empty() {
+        return true;
+    }
+    let first = n[0];
+    for i in 0..=(h.len() - n.len()) {
+        if h[i].to_ascii_lowercase() == first
+            && h[i..i + n.len()]
+                .iter()
+                .zip(n)
+                .all(|(a, b)| a.to_ascii_lowercase() == *b)
+        {
+            return true;
+        }
+    }
+    false
+}
+
 /// Minimum item count before switching to parallel iteration with rayon.
 /// Below this threshold, the overhead of thread pool dispatch outweighs the benefit.
 const PAR_THRESHOLD: usize = 10_000;
@@ -22,9 +48,6 @@ pub trait Constrainable {
     /// The file's relative path (e.g. "src/main.rs")
     fn relative_path(&self) -> &str;
 
-    /// The file's lowercased relative path for case-insensitive matching
-    fn relative_path_lower(&self) -> &str;
-
     /// The file name component (e.g. "main.rs")
     fn file_name(&self) -> &str;
 
@@ -152,7 +175,7 @@ fn item_matches_constraint_at_index<T: Constrainable>(
         }
 
         // only works with negation
-        Constraint::Text(text) => item.relative_path_lower().contains(text),
+        Constraint::Text(text) => contains_ascii_ci(item.relative_path(), text),
 
         // Parts and Exclude are handled at a higher level
         Constraint::Parts(_) | Constraint::Exclude(_) | Constraint::FileType(_) => true,