diff --git a/Cargo.lock b/Cargo.lock index f2deab2f..93b8afeb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -213,6 +213,29 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "bytecheck" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "rancor", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -225,6 +248,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "bytesize" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd91ee7b2422bcb158d90ef4d14f75ef67f340943fc4149891dcce8f8b972a3" + [[package]] name = "cast" version = "0.3.0" @@ -641,6 +670,25 @@ dependencies = [ "serde_json", ] +[[package]] +name = "fff-daemon" +version = "0.0.1" +dependencies = [ + "clap", + "dunce", + "fff-ipc-domain", + "fff-search", + "mimalloc", + "parking_lot", + "rayon", + "rkyv", + "sendfd", + "signal-hook", + "smallvec", + "tracing", + "tracing-subscriber", +] + [[package]] name = "fff-grep" version = "0.8.4" @@ -649,6 +697,14 @@ dependencies = [ "memchr", ] +[[package]] +name = "fff-ipc-domain" +version = "0.0.1" +dependencies = [ + "libc", + "rkyv", +] + [[package]] name = "fff-mcp" version = "0.8.4" @@ -707,6 +763,23 @@ dependencies = [ "zlob", ] +[[package]] +name = "fff-rg" +version = "0.0.1" +dependencies = [ + "bytesize", + "clap", + "fff-ipc-domain", + "git2", + "mimalloc", + "rkyv", + "sendfd", + "test-case", + "tracing", + "tracing-subscriber", + "which", +] + [[package]] name = "fff-search" version = "0.8.4" @@ -974,6 +1047,12 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" @@ -1501,6 +1580,26 @@ dependencies = [ "syn", ] +[[package]] +name = "munge" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" +dependencies = [ + "munge_macro", +] + +[[package]] +name = "munge_macro" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "neo_frizbee" version = "0.10.2" @@ -1837,6 +1936,26 @@ dependencies = [ "unarray", ] +[[package]] +name = "ptr_meta" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -1864,6 +1983,15 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "rancor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" +dependencies = [ + "ptr_meta", +] + [[package]] name = "rand" version = "0.8.5" @@ -2030,6 +2158,45 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "rend" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73389e0c99e664f919275ab5b5b0471391fe9a8de61e1dff9b1eaf56a90f16e3" +dependencies = [ + "bytecheck", + "bytes", + "hashbrown 0.17.1", + "indexmap", + "munge", + "ptr_meta", + "rancor", + "rend", + "rkyv_derive", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d2ed0b54125315fb36bd021e82d314d1c126548f871634b483f46b31d13cac6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "rmcp" version = "1.7.0" @@ -2154,6 +2321,15 @@ version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +[[package]] +name = "sendfd" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b183bfd5b1bc64ab0c1ef3ee06b008a9ef1b68a7d3a99ba566fbfe7a7c6d745b" +dependencies = [ + "libc", +] + [[package]] name = "serde" version = "1.0.228" @@ -2223,6 +2399,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2" +dependencies = [ + "libc", + "signal-hook-registry", +] + [[package]] name = "signal-hook-registry" version = "1.4.8" @@ -2233,6 +2419,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "1.0.2" @@ -2317,6 +2509,39 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "test-case" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2550dd13afcd286853192af8601920d959b14c401fcece38071d53bf0768a8" +dependencies = [ + "test-case-macros", +] + +[[package]] +name = "test-case-core" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adcb7fd841cd518e279be3d5a3eb0636409487998a4aff22f3de87b81e88384f" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "test-case-macros" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "test-case-core", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -2417,6 +2642,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.50.0" @@ -2573,6 +2813,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.1" @@ -2714,6 +2964,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "which" +version = "8.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c789537cf2f7f55be8e6192f92e464174ee55f91af622777f7f1ceb0dbccd03e" +dependencies = [ + "libc", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index a8d9358d..8cf8f0a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,9 @@ members = [ "crates/fff-nvim", "crates/fff-query-parser", "crates/fff-grep", + "crates/cli/fff-daemon", + "crates/cli/fff-ipc-domain", + "crates/cli/fff-rg", ] resolver = "2" @@ -44,7 +47,11 @@ regex = "1.11" regex-syntax = "0.8" smallvec = { version = "1.13", features = ["const_generics", "union"] } thiserror = "2.0.10" -tracing = "0.1" +tracing = { version = "0.1", features = ["release_max_level_debug"] } +rkyv = { version = "0.8" } +clap = { version = "4", features = ["derive", "env"] } +libc = "0.2" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } [profile.release] opt-level = 3 @@ -68,3 +75,6 @@ inherits = "release" debug = "full" strip = false lto = "thin" + +[workspace.lints.clippy] +pedantic = { level = "warn", priority = -1 } diff --git a/crates/cli/fff-daemon/Cargo.toml b/crates/cli/fff-daemon/Cargo.toml new file mode 100644 index 00000000..0f0f74c5 --- /dev/null +++ b/crates/cli/fff-daemon/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "fff-daemon" +version = "0.0.1" +edition = "2024" +description = "FFF file finder daemon — indexes files and serves search requests over UDS." +license = "MIT" + +[[bin]] +name = "fff-daemon" +path = "src/main.rs" + +[dependencies] +fff = { package = "fff-search", path = "../../fff-core", version = "0.8.4" } +fff-ipc-domain = { path = "../fff-ipc-domain" } +rkyv = { workspace = true } +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +mimalloc = { workspace = true } +parking_lot = { workspace = true } +rayon = { workspace = true } +smallvec = { workspace = true } +dunce = { workspace = true } +sendfd = "0.4" +signal-hook = "0.3" diff --git a/crates/cli/fff-daemon/src/convert.rs b/crates/cli/fff-daemon/src/convert.rs new file mode 100644 index 00000000..a33a7933 --- /dev/null +++ b/crates/cli/fff-daemon/src/convert.rs @@ -0,0 +1,18 @@ +/// Bridges IPC domain types to their fff-core equivalents (orphan rule workaround). +pub trait IntoCoreExt { + type Core; + /// Converts this IPC type into the corresponding fff-core type. + fn into_core(self) -> Self::Core; +} + +impl IntoCoreExt for fff_ipc_domain::CaseMode { + type Core = fff::CaseMode; + + fn into_core(self) -> fff::CaseMode { + match self { + Self::Smart => fff::CaseMode::Smart, + Self::Sensitive => fff::CaseMode::Sensitive, + Self::Insensitive => fff::CaseMode::Insensitive, + } + } +} diff --git a/crates/cli/fff-daemon/src/main.rs b/crates/cli/fff-daemon/src/main.rs new file mode 100644 index 00000000..010a2651 --- /dev/null +++ b/crates/cli/fff-daemon/src/main.rs @@ -0,0 +1,59 @@ +//! Long-running daemon that indexes directories and serves search requests. +//! +//! The daemon binds a per-user Unix socket at `/tmp/fff-daemon-.sock`. +//! Clients (typically `fff-rg`) connect and send a single request per +//! connection: +//! +//! 1. Client sends a [`RequestHeader`] + rkyv-serialized [`SearchRequest`] +//! body, along with an output fd passed via SCM_RIGHTS. +//! 2. Daemon looks up (or creates) a [`FilePicker`] for the requested +//! directory, runs the search, and writes results directly to the +//! client's output fd. +//! 3. Daemon writes back a one-byte [`SearchStatus`] and closes the +//! connection. +//! +//! Directories are indexed on first request and kept alive in a session pool +//! with a background file watcher. Subsequent queries against the same +//! directory reuse the warm index. + +mod convert; +pub(crate) mod output; +mod query_service; +mod session_pool; + +use std::sync::Arc; +use std::sync::atomic::AtomicBool; + +use clap::Parser; + +use crate::query_service::QueryService; + +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +#[derive(Parser)] +#[command(name = "fff-daemon", about = "FFF file finder daemon")] +struct Args { + #[arg(long, default_value = "info", env = "FFF_LOG")] + log_level: String, +} + +fn main() { + let args = Args::parse(); + + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| { + format!("fff_daemon={lvl},fff={lvl}", lvl = args.log_level).into() + }), + ) + .init(); + + let shutdown = Arc::new(AtomicBool::new(false)); + signal_hook::flag::register(signal_hook::consts::SIGTERM, shutdown.clone()) + .expect("failed to register SIGTERM handler"); + signal_hook::flag::register(signal_hook::consts::SIGINT, shutdown.clone()) + .expect("failed to register SIGINT handler"); + + QueryService::new(shutdown).run(); +} diff --git a/crates/cli/fff-daemon/src/output.rs b/crates/cli/fff-daemon/src/output.rs new file mode 100644 index 00000000..567cf80d --- /dev/null +++ b/crates/cli/fff-daemon/src/output.rs @@ -0,0 +1,331 @@ +//! Formats search results into `rg`-compatible terminal output. +//! +//! Takes structured [`GrepResult`]/[`SearchResult`] data from the search +//! engine and renders it to an arbitrary [`Write`] sink (typically the +//! client's stdout fd received over the Unix socket). Output style is +//! controlled by [`OutputFlags`]. + +use std::collections::HashSet; +use std::io::{BufWriter, Write}; + +use fff::{FilePicker, GrepMatch, GrepResult, SearchResult}; +use fff_ipc_domain::OutputFlags; + +// ANSI escape sequences matching rg's default color scheme. +const MAGENTA: &str = "\x1b[35m"; +const GREEN: &str = "\x1b[32m"; +const RED_BOLD: &str = "\x1b[1m\x1b[31m"; +const RESET: &str = "\x1b[0m"; + +/// Writes search results to a buffered sink, formatting according to [`OutputFlags`]. +pub struct ResultWriter { + out: BufWriter, + cfg: OutputFlags, +} + +impl ResultWriter { + pub fn new(out: W, cfg: OutputFlags) -> Self { + Self { out: BufWriter::new(out), cfg } + } + + /// Dispatches grep results to the appropriate output mode. + /// Returns `Ok(true)` if any matches were written. + pub fn write_grep( + &mut self, + picker: &FilePicker, + result: &GrepResult<'_>, + ) -> Result> { + if self.cfg.contains(OutputFlags::QUIET) { + return Ok(!result.matches.is_empty()); + } + if self.cfg.contains(OutputFlags::FILES_ONLY) { + return self.write_files_with_matches(picker, result); + } + if self.cfg.contains(OutputFlags::COUNT_ONLY) { + return self.write_counts(picker, result); + } + if self.cfg.contains(OutputFlags::VIMGREP) { + return self.write_vimgrep(picker, result); + } + + if self.cfg.contains(OutputFlags::HEADING) { + self.write_heading_mode(picker, result)?; + } else { + self.write_inline_mode(picker, result)?; + } + self.out.flush()?; + Ok(!result.matches.is_empty()) + } + + /// Matches grouped under a filename header, separated by blank lines. + fn write_heading_mode( + &mut self, + picker: &FilePicker, + result: &GrepResult<'_>, + ) -> std::io::Result<()> { + let mut current_file: Option = None; + let mut prev_line: Option = None; + + for m in &result.matches { + if current_file != Some(m.file_index) { + if current_file.is_some() { + writeln!(self.out)?; + } + let path = result.files[m.file_index].relative_path(picker); + self.write_path(&path)?; + writeln!(self.out)?; + current_file = Some(m.file_index); + prev_line = None; + } + + self.write_context_separator(m, &mut prev_line)?; + self.write_context_before(m, None, &mut prev_line)?; + self.write_match_line(m, None)?; + self.write_context_after(m, None, &mut prev_line)?; + prev_line = Some(m.line_number + m.context_after.len() as u64); + } + Ok(()) + } + + /// Each match line prefixed with `path:` when `WITH_FILENAME` is set. + fn write_inline_mode( + &mut self, + picker: &FilePicker, + result: &GrepResult<'_>, + ) -> std::io::Result<()> { + let mut current_file: Option = None; + let mut prev_line: Option = None; + + for m in &result.matches { + if current_file != Some(m.file_index) { + current_file = Some(m.file_index); + prev_line = None; + } + + let path = if self.cfg.contains(OutputFlags::WITH_FILENAME) { + Some(result.files[m.file_index].relative_path(picker)) + } else { + None + }; + + self.write_context_separator(m, &mut prev_line)?; + self.write_context_before(m, path.as_deref(), &mut prev_line)?; + self.write_match_line(m, path.as_deref())?; + self.write_context_after(m, path.as_deref(), &mut prev_line)?; + prev_line = Some(m.line_number + m.context_after.len() as u64); + } + Ok(()) + } + + /// Writes a file path, magenta when color is on. + fn write_path(&mut self, path: &str) -> std::io::Result<()> { + if self.cfg.contains(OutputFlags::COLOR) { + write!(self.out, "{RESET}{MAGENTA}{path}{RESET}") + } else { + write!(self.out, "{path}") + } + } + + /// Writes `N:` or `N-` (for context lines). No-op when `LINE_NUMBER` is off. + fn write_line_number(&mut self, n: u64, sep: char) -> std::io::Result<()> { + if self.cfg.contains(OutputFlags::LINE_NUMBER) { + if self.cfg.contains(OutputFlags::COLOR) { + write!(self.out, "{RESET}{GREEN}{n}{RESET}{sep}") + } else { + write!(self.out, "{n}{sep}") + } + } else { + Ok(()) + } + } + + /// Renders one match: optional path prefix, line number, column, and content. + fn write_match_line( + &mut self, + m: &GrepMatch, + inline_path: Option<&str>, + ) -> std::io::Result<()> { + if let Some(path) = inline_path { + self.write_path(path)?; + write!(self.out, ":")?; + } + self.write_line_number(m.line_number, ':')?; + if self.cfg.contains(OutputFlags::COLUMN) { + let col = m.col + 1; + if self.cfg.contains(OutputFlags::COLOR) { + write!(self.out, "{RESET}{col}{RESET}:")?; + } else { + write!(self.out, "{col}:")?; + } + } + if self.cfg.contains(OutputFlags::COLOR) && !m.match_byte_offsets.is_empty() { + self.write_highlighted(&m.line_content, &m.match_byte_offsets) + } else { + writeln!(self.out, "{}", m.line_content) + } + } + + /// Writes a line with match spans wrapped in bold red ANSI codes. + fn write_highlighted(&mut self, line: &str, offsets: &[(u32, u32)]) -> std::io::Result<()> { + let bytes = line.as_bytes(); + let mut pos = 0usize; + for &(start, end) in offsets { + let s = start as usize; + let e = (end as usize).min(bytes.len()); + if s > pos { + self.out.write_all(&bytes[pos..s.min(bytes.len())])?; + } + if s < e { + write!(self.out, "{RESET}{RED_BOLD}")?; + self.out.write_all(&bytes[s..e])?; + write!(self.out, "{RESET}")?; + } + pos = e; + } + if pos < bytes.len() { + self.out.write_all(&bytes[pos..])?; + } + writeln!(self.out) + } + + /// Prints `--` between non-contiguous context blocks. + fn write_context_separator( + &mut self, + m: &GrepMatch, + prev_line: &mut Option, + ) -> std::io::Result<()> { + if let Some(prev) = *prev_line { + let has_context = !m.context_before.is_empty() || !m.context_after.is_empty(); + if has_context { + let context_start = m.line_number.saturating_sub(m.context_before.len() as u64); + if context_start > prev + 1 { + writeln!(self.out, "--")?; + } + } + } + Ok(()) + } + + /// Writes context lines before a match, skipping any that overlap with previous output. + fn write_context_before( + &mut self, + m: &GrepMatch, + inline_path: Option<&str>, + prev_line: &mut Option, + ) -> std::io::Result<()> { + let start_line = m.line_number.saturating_sub(m.context_before.len() as u64); + for (i, line) in m.context_before.iter().enumerate() { + let line_num = start_line + i as u64; + if let Some(prev) = *prev_line + && line_num <= prev + { + continue; + } + if let Some(path) = inline_path { + self.write_path(path)?; + write!(self.out, "-")?; + } + self.write_line_number(line_num, '-')?; + writeln!(self.out, "{line}")?; + } + Ok(()) + } + + /// Writes context lines after a match. + fn write_context_after( + &mut self, + m: &GrepMatch, + inline_path: Option<&str>, + prev_line: &mut Option, + ) -> std::io::Result<()> { + for (i, line) in m.context_after.iter().enumerate() { + let line_num = m.line_number + 1 + i as u64; + if let Some(path) = inline_path { + self.write_path(path)?; + write!(self.out, "-")?; + } + self.write_line_number(line_num, '-')?; + writeln!(self.out, "{line}")?; + } + *prev_line = Some(m.line_number + m.context_after.len() as u64); + Ok(()) + } + + /// `file:line:col:text` format for editor integration. + fn write_vimgrep( + &mut self, + picker: &FilePicker, + result: &GrepResult<'_>, + ) -> Result> { + for m in &result.matches { + let path = result.files[m.file_index].relative_path(picker); + let col = m.col + 1; + writeln!(self.out, "{path}:{}:{col}:{}", m.line_number, m.line_content)?; + } + self.out.flush()?; + Ok(!result.matches.is_empty()) + } + + /// Prints per-file match counts. + fn write_counts( + &mut self, + picker: &FilePicker, + result: &GrepResult<'_>, + ) -> Result> { + let mut file_counts: Vec<(usize, usize)> = Vec::new(); + for m in &result.matches { + match file_counts.last_mut() { + Some((idx, count)) if *idx == m.file_index => *count += 1, + _ => file_counts.push((m.file_index, 1)), + } + } + for (file_idx, count) in &file_counts { + let path = result.files[*file_idx].relative_path(picker); + if self.cfg.contains(OutputFlags::WITH_FILENAME) { + if self.cfg.contains(OutputFlags::COLOR) { + writeln!(self.out, "{RESET}{MAGENTA}{path}{RESET}:{count}")?; + } else { + writeln!(self.out, "{path}:{count}")?; + } + } else { + writeln!(self.out, "{count}")?; + } + } + self.out.flush()?; + Ok(!file_counts.is_empty()) + } + + /// Prints only the names of files that contain matches. + fn write_files_with_matches( + &mut self, + picker: &FilePicker, + result: &GrepResult<'_>, + ) -> Result> { + let mut seen = HashSet::new(); + for m in &result.matches { + if seen.insert(m.file_index) { + let path = result.files[m.file_index].relative_path(picker); + self.write_path(&path)?; + writeln!(self.out)?; + } + } + self.out.flush()?; + Ok(!seen.is_empty()) + } + + /// Writes fuzzy file-search results, one path per line. + pub fn write_files( + &mut self, + picker: &FilePicker, + result: &SearchResult<'_>, + ) -> Result> { + if self.cfg.contains(OutputFlags::QUIET) { + return Ok(!result.items.is_empty()); + } + for item in &result.items { + writeln!(self.out, "{}", item.relative_path(picker))?; + } + self.out.flush()?; + Ok(!result.items.is_empty()) + } +} diff --git a/crates/cli/fff-daemon/src/query_service.rs b/crates/cli/fff-daemon/src/query_service.rs new file mode 100644 index 00000000..5c1b3cf6 --- /dev/null +++ b/crates/cli/fff-daemon/src/query_service.rs @@ -0,0 +1,263 @@ +//! Listens on a per-user Unix domain socket for search requests. Each client +//! connects, sends a [`RequestHeader`] + rkyv body alongside an output fd +//! (via SCM_RIGHTS), and receives a single [`SearchStatus`] byte back. + +use std::io::{Read, Write}; +use std::num::NonZeroU64; +use std::os::fd::OwnedFd; +use std::os::unix::fs::PermissionsExt; +use std::os::unix::io::FromRawFd; +use std::os::unix::net::UnixListener; +use std::path::Path; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::time::Duration; + +/// Max time to wait for the client to send the request body. +const READ_TIMEOUT: Duration = Duration::from_secs(30); +/// Max time to wait for the file index to finish its initial scan. +const SCAN_TIMEOUT: Duration = Duration::from_secs(30); +/// Accept loop poll interval (non-blocking listener). +const ACCEPT_POLL: Duration = Duration::from_millis(1); +/// Owner-only permissions for the daemon socket. +const SOCKET_MODE: u32 = 0o600; +/// Inline buffer for small rkyv bodies — avoids heap allocation for typical requests. +const IPC_BODY_INLINE: usize = 512; +/// Default max file size for grep when the client doesn't specify one (4 MiB). +const DEFAULT_MAX_FILE_SIZE: u64 = 4 * 1024 * 1024; + +use fff::{ + FilePicker, FuzzySearchOptions, GrepMode, GrepSearchOptions, QueryParser, parse_grep_query, +}; +use fff_ipc_domain::{ + GrepQuery, GrepSearch, OutputFlags, RequestHeader, SearchKind, SearchRequest, SearchStatus, + daemon_socket_path, +}; +use sendfd::RecvWithFd; + +use crate::convert::IntoCoreExt; + +use crate::session_pool::{MAX_SESSIONS, SessionPool}; + +/// Parsed client request with the fd to write results into. +struct IncomingQuery { + request: SearchRequest, + output: std::fs::File, +} + +impl IncomingQuery { + /// Reads the 4-byte length header, receives the output fd, and deserializes the rkyv body. + fn recv( + stream: &mut std::os::unix::net::UnixStream, + ) -> Result> { + let mut raw_header = [0u8; RequestHeader::SIZE]; + let mut fds = [0; 1]; + let (n, fd_count) = stream.recv_with_fd(&mut raw_header, &mut fds)?; + + if fd_count == 0 { + return Err("no fd in ancillary data".into()); + } + + // SAFETY: fd received via SCM_RIGHTS — this process owns it exclusively. + let owned_fd = unsafe { OwnedFd::from_raw_fd(fds[0]) }; + + if n < RequestHeader::SIZE { + return Err("incomplete header".into()); + } + + let header = RequestHeader::decode(raw_header); + let body_len = header.body_len as usize; + + stream.set_read_timeout(Some(READ_TIMEOUT))?; + let mut body = smallvec::SmallVec::<[u8; IPC_BODY_INLINE]>::from_elem(0, body_len); + stream.read_exact(&mut body)?; + + let archived = rkyv::access::, rkyv::rancor::Error>(&body) + .map_err(|e| format!("rkyv access: {e}"))?; + let request = rkyv::deserialize::(archived) + .map_err(|e| format!("rkyv deserialize: {e}"))?; + + Ok(Self { request, output: std::fs::File::from(owned_fd) }) + } +} + +/// RAII guard for the daemon's Unix listener socket. Cleans up the socket file on drop. +struct ActiveDaemonSocket { + path: std::path::PathBuf, + listener: UnixListener, +} + +impl ActiveDaemonSocket { + /// Binds a non-blocking Unix listener at the daemon socket path with 0600 permissions. + fn bind() -> Result> { + let path = daemon_socket_path(); + let _ = std::fs::remove_file(&path); + + if let Some(parent) = path.parent() { + let _ = std::fs::create_dir_all(parent); + } + + let listener = UnixListener::bind(&path)?; + let _ = std::fs::set_permissions(&path, std::fs::Permissions::from_mode(SOCKET_MODE)); + listener.set_nonblocking(true).expect("failed to set listener non-blocking"); + + Ok(Self { path, listener }) + } +} + +impl Drop for ActiveDaemonSocket { + fn drop(&mut self) { + let _ = std::fs::remove_file(&self.path); + } +} + +/// Accepts connections on the daemon socket and dispatches searches to worker threads. +pub struct QueryService { + pool: Arc, + shutdown: Arc, + workers: rayon::ThreadPool, +} + +impl QueryService { + /// Creates the session pool and a rayon thread pool sized to `MAX_SESSIONS`. + pub fn new(shutdown: Arc) -> Self { + let pool = SessionPool::new(); + let workers = rayon::ThreadPoolBuilder::new() + .num_threads(MAX_SESSIONS) + .thread_name(|i| format!("conn-{i}")) + .build() + .expect("failed to build connection thread pool"); + Self { pool: Arc::new(pool), shutdown, workers } + } + + /// Blocking accept loop. Polls the non-blocking listener at 1ms intervals until shutdown. + #[tracing::instrument(level = "trace", skip(self))] + pub fn run(&self) { + let socket = match ActiveDaemonSocket::bind() { + Ok(s) => s, + Err(e) => { + tracing::error!(err = %e, "failed to bind unix socket"); + return; + } + }; + + tracing::info!(path = %socket.path.display(), "query service listening"); + + while !self.shutdown.load(Ordering::Relaxed) { + match socket.listener.accept() { + Ok((mut stream, _)) => { + stream.set_nonblocking(false).expect("accepted socket has invalid fd"); + let pool = self.pool.clone(); + self.workers.spawn(move || { + if let Err(e) = Self::handle_connection(&pool, &mut stream) { + tracing::warn!(err = %e, "connection handler failed"); + } + }); + } + Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { + std::thread::sleep(ACCEPT_POLL); + } + Err(e) => { + tracing::warn!(err = %e, "accept failed"); + } + } + } + + self.pool.shutdown(); + } + + /// Parses one request, runs the search, and writes back a status byte. + #[tracing::instrument(level = "trace", skip_all)] + fn handle_connection( + pool: &SessionPool, + stream: &mut std::os::unix::net::UnixStream, + ) -> Result<(), Box> { + let mut query = IncomingQuery::recv(stream)?; + + let status = match Self::run_search(pool, &query.request, &mut query.output) { + Ok(true) => SearchStatus::Match, + Ok(false) => SearchStatus::NoMatch, + Err(e) => { + tracing::warn!(err = %e, dir = %query.request.directory, "search failed"); + SearchStatus::Failed + } + }; + + stream.write_all(&[status.into()])?; + Ok(()) + } + + /// Acquires a `FilePicker` for the directory, waits for indexing, and dispatches by mode. + #[tracing::instrument(level = "trace", skip(pool, out), fields(dir = %req.directory))] + fn run_search( + pool: &SessionPool, + req: &SearchRequest, + out: &mut std::fs::File, + ) -> Result> { + let picker_handle = pool.get_or_create(Path::new(&req.directory))?; + if !picker_handle.wait_for_scan(SCAN_TIMEOUT) { + return Err(format!( + "indexing {} timed out after 30s; the directory may still be scanning — retry shortly", + req.directory + ) + .into()); + } + + let guard = picker_handle.read()?; + let picker = guard.as_ref().ok_or("picker not ready after scan")?; + + match &req.search { + SearchKind::Files { query } => { + Self::write_file_results(picker, query, req.output, out) + } + SearchKind::Grep(grep) => Self::write_grep_results(picker, grep, req.output, out), + } + } + + /// Runs a grep search and writes formatted matches to the output fd. + fn write_grep_results( + picker: &FilePicker, + grep: &GrepSearch, + output: OutputFlags, + out: &mut std::fs::File, + ) -> Result> { + let (query_str, mode) = match &grep.query { + GrepQuery::Regex(q) => (q.as_str(), GrepMode::Regex), + GrepQuery::Literal(q) => (q.as_str(), GrepMode::PlainText), + }; + let parsed = parse_grep_query(query_str); + + let options = GrepSearchOptions { + max_file_size: grep.max_filesize.map_or(DEFAULT_MAX_FILE_SIZE, NonZeroU64::get), + max_matches_per_file: grep.max_count.map_or(0, |n| n.get() as usize), + case_mode: grep.case_mode.into_core(), + file_offset: 0, + page_limit: usize::MAX, + mode, + time_budget_ms: 0, + before_context: grep.before_context as usize, + after_context: grep.after_context as usize, + classify_definitions: false, + trim_whitespace: grep.trim, + abort_signal: None, + }; + + let result = picker.grep(&parsed, &options); + let mut writer = crate::output::ResultWriter::new(out, output); + writer.write_grep(picker, &result) + } + + /// Runs a fuzzy file search and writes formatted results to the output fd. + fn write_file_results( + picker: &FilePicker, + query: &str, + output: OutputFlags, + out: &mut std::fs::File, + ) -> Result> { + let parsed = QueryParser::default().parse(query); + let result = picker.fuzzy_search(&parsed, None, FuzzySearchOptions::default()); + let mut writer = crate::output::ResultWriter::new(out, output); + writer.write_files(picker, &result) + } +} + diff --git a/crates/cli/fff-daemon/src/session_pool.rs b/crates/cli/fff-daemon/src/session_pool.rs new file mode 100644 index 00000000..b02376c7 --- /dev/null +++ b/crates/cli/fff-daemon/src/session_pool.rs @@ -0,0 +1,158 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Condvar, Mutex}; +use std::thread::JoinHandle; +use std::time::{Duration, Instant}; + +use parking_lot::RwLock; + +use fff::{FFFMode, FilePicker, FilePickerOptions, SharedFilePicker, SharedFrecency}; + +/// Max concurrent directory indexes the daemon will keep alive. +pub const MAX_SESSIONS: usize = 8; +/// Sessions idle longer than this are evicted. +const IDLE_TIMEOUT: Duration = Duration::from_mins(10); +/// How often the evictor thread checks for idle sessions. +const EVICTION_INTERVAL: Duration = Duration::from_secs(60); + +struct Session { + picker: SharedFilePicker, + last_accessed: Instant, +} + +struct Inner { + sessions: RwLock>, +} + +/// Signals the evictor thread to wake up and exit. +struct Shutdown { + mu: Mutex, + cv: Condvar, +} + +impl Shutdown { + fn new() -> Self { + Self { mu: Mutex::new(false), cv: Condvar::new() } + } + + fn trigger(&self) { + *self.mu.lock().unwrap() = true; + self.cv.notify_all(); + } + + /// Sleeps for `dur`, returning `true` immediately if shutdown was signaled. + fn wait(&self, dur: Duration) -> bool { + let guard = self.mu.lock().unwrap(); + if *guard { + return true; + } + let (guard, _) = self.cv.wait_timeout(guard, dur).unwrap(); + *guard + } +} + +pub struct SessionPool { + inner: Arc, + shutdown: Arc, + _evictor: JoinHandle<()>, +} + +impl SessionPool { + pub fn new() -> Self { + let inner = Arc::new(Inner { sessions: RwLock::new(HashMap::new()) }); + let shutdown = Arc::new(Shutdown::new()); + + let evictor_inner = inner.clone(); + let evictor_shutdown = shutdown.clone(); + let evictor = std::thread::Builder::new() + .name("session-evictor".into()) + .spawn(move || { + while !evictor_shutdown.wait(EVICTION_INTERVAL) { + let evicted = evictor_inner.evict_idle(); + if evicted > 0 { + tracing::debug!( + evicted, + remaining = evictor_inner.session_count(), + "eviction sweep" + ); + } + } + }) + .expect("failed to spawn evictor thread"); + + Self { inner, shutdown, _evictor: evictor } + } + + pub fn shutdown(&self) { + self.shutdown.trigger(); + } + + pub fn get_or_create(&self, path: &Path) -> Result { + self.inner.get_or_create(path) + } +} + +impl Inner { + #[tracing::instrument(level = "trace", skip(self), fields(path = %path.display()))] + fn get_or_create(&self, path: &Path) -> Result { + let canonical = + dunce::canonicalize(path).map_err(|_| fff::Error::InvalidPath(path.to_path_buf()))?; + + let mut sessions = self.sessions.write(); + + if let Some(session) = sessions.get_mut(&canonical) { + session.last_accessed = Instant::now(); + return Ok(session.picker.clone()); + } + + if sessions.len() >= MAX_SESSIONS { + let lru_key = + sessions.iter().min_by_key(|(_, s)| s.last_accessed).map(|(k, _)| k.clone()); + if let Some(key) = lru_key { + tracing::debug!(path = %key.display(), "evicting LRU session (pool full)"); + sessions.remove(&key); + } + } + + let picker = SharedFilePicker::default(); + let frecency = SharedFrecency::default(); + + FilePicker::new_with_shared_state( + picker.clone(), + frecency, + FilePickerOptions { + base_path: canonical.to_string_lossy().into_owned(), + enable_mmap_cache: false, + enable_content_indexing: true, + mode: FFFMode::Ai, + watch: true, + follow_symlinks: false, + ..Default::default() + }, + )?; + + sessions.insert( + canonical, + Session { picker: picker.clone(), last_accessed: Instant::now() }, + ); + + Ok(picker) + } + + fn evict_idle(&self) -> usize { + let mut sessions = self.sessions.write(); + let before = sessions.len(); + sessions.retain(|path, session| { + let keep = session.last_accessed.elapsed() < IDLE_TIMEOUT; + if !keep { + tracing::debug!(path = %path.display(), idle_secs = session.last_accessed.elapsed().as_secs(), "evicting idle session"); + } + keep + }); + before - sessions.len() + } + + fn session_count(&self) -> usize { + self.sessions.read().len() + } +} diff --git a/crates/cli/fff-ipc-domain/Cargo.toml b/crates/cli/fff-ipc-domain/Cargo.toml new file mode 100644 index 00000000..4c60508a --- /dev/null +++ b/crates/cli/fff-ipc-domain/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "fff-ipc-domain" +version = "0.0.1" +edition = "2024" +description = "FFF domain types for IPC." +license = "MIT" + +[dependencies] +libc = { workspace = true } +rkyv = { workspace = true } + +[lints] +workspace = true diff --git a/crates/cli/fff-ipc-domain/src/lib.rs b/crates/cli/fff-ipc-domain/src/lib.rs new file mode 100644 index 00000000..5c9ed37f --- /dev/null +++ b/crates/cli/fff-ipc-domain/src/lib.rs @@ -0,0 +1,176 @@ +//! Wire types shared between `fff-daemon` and `fff-rg`. +//! +//! The IPC protocol is one request per connection over a Unix domain socket: +//! - Client sends a [`RequestHeader`] (4-byte LE length) + rkyv-serialized +//! [`SearchRequest`] body, with an output fd attached via SCM_RIGHTS. +//! - Daemon writes results to the fd and replies with a [`SearchStatus`] byte. +//! +//! All structured payloads use rkyv zero-copy serialization. The socket path +//! is per-user via [`daemon_socket_path`]. + +use std::num::{NonZeroU32, NonZeroU64}; +use std::path::PathBuf; + +use rkyv::{Archive, Deserialize, Serialize}; + +/// Returns the daemon socket path: `/tmp/fff-daemon-.sock`. +pub fn daemon_socket_path() -> PathBuf { + let uid = unsafe { libc::getuid() }; + std::env::temp_dir().join(format!("fff-daemon-{uid}.sock")) +} + +/// Case sensitivity strategy for grep searches. Mirrors `fff::CaseMode` but +/// with rkyv derives — fff-core doesn't depend on rkyv. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +#[rkyv(derive(Debug))] +pub enum CaseMode { + /// Case-insensitive if the query is all lowercase, sensitive otherwise. + Smart, + /// Always case-sensitive. + Sensitive, + /// Always case-insensitive. + Insensitive, +} + +/// Grep pattern — either a regex or a literal string. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[rkyv(derive(Debug))] +pub enum GrepQuery { + /// Regex pattern. + Regex(String), + /// Literal string match. + Literal(String), +} + +/// Grep-specific search parameters. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[rkyv(derive(Debug))] +pub struct GrepSearch { + /// The search pattern. + pub query: GrepQuery, + /// Case sensitivity strategy. + pub case_mode: CaseMode, + /// Per-file match limit. `None` = unlimited. + pub max_count: Option, + /// Skip files larger than this (bytes). `None` = default (4 MB). + pub max_filesize: Option, + /// Lines of context before each match. + pub before_context: u32, + /// Lines of context after each match. + pub after_context: u32, + /// Strip leading whitespace from matched lines. + pub trim: bool, +} + +/// Discriminated search request — files or grep with variant-specific data. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[rkyv(derive(Debug))] +pub enum SearchKind { + /// Fuzzy filename search. + Files { + /// Fuzzy query string. + query: String, + }, + /// Content search within files. + Grep(GrepSearch), +} + +/// Bitmask controlling how the daemon formats search results. +/// Hand-rolled instead of `bitflags!` because the macro-generated struct +/// doesn't derive rkyv `Archive`/`Serialize`/`Deserialize`. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +#[rkyv(derive(Debug))] +pub struct OutputFlags(u16); + +impl OutputFlags { + pub const COLOR: Self = Self(1 << 0); + pub const LINE_NUMBER: Self = Self(1 << 1); + pub const COLUMN: Self = Self(1 << 2); + pub const HEADING: Self = Self(1 << 3); + pub const WITH_FILENAME: Self = Self(1 << 4); + pub const COUNT_ONLY: Self = Self(1 << 5); + pub const FILES_ONLY: Self = Self(1 << 6); + pub const QUIET: Self = Self(1 << 7); + pub const VIMGREP: Self = Self(1 << 8); + + #[must_use] + pub const fn empty() -> Self { Self(0) } + #[must_use] + pub const fn contains(self, flag: Self) -> bool { self.0 & flag.0 == flag.0 } +} + +impl std::ops::BitOr for OutputFlags { + type Output = Self; + fn bitor(self, rhs: Self) -> Self { Self(self.0 | rhs.0) } +} + +impl std::ops::BitOrAssign for OutputFlags { + fn bitor_assign(&mut self, rhs: Self) { self.0 |= rhs.0; } +} + +/// Top-level request sent by the client as the rkyv body. +#[derive(Archive, Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[rkyv(derive(Debug))] +pub struct SearchRequest { + /// Root directory to search in (absolute path). + pub directory: String, + /// What to search for and how. + pub search: SearchKind, + /// Output formatting flags. + pub output: OutputFlags, +} + +/// One-byte response code the daemon writes back after a search completes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum SearchStatus { + /// At least one result was found and written to the output fd. + Match = 0, + /// Search completed successfully but produced no results. + NoMatch = 1, + /// Search failed (e.g. indexing timeout, invalid query). + Failed = 2, +} + +impl From for u8 { + fn from(s: SearchStatus) -> Self { + s as Self + } +} + +impl TryFrom for SearchStatus { + type Error = u8; + + fn try_from(value: u8) -> Result { + match value { + 0 => Ok(Self::Match), + 1 => Ok(Self::NoMatch), + 2 => Ok(Self::Failed), + other => Err(other), + } + } +} + +/// 4-byte little-endian length prefix for the rkyv request body. +#[derive(Debug, Clone, Copy)] +pub struct RequestHeader { + /// Length of the rkyv-serialized [`SearchRequest`] body in bytes. + pub body_len: u32, +} + +impl RequestHeader { + /// Wire size of the length prefix (4 bytes, LE u32). + pub const SIZE: usize = 4; + + /// Encodes the header as a 4-byte LE array for writing to the socket. + pub fn encode(body_len: usize) -> [u8; Self::SIZE] { + u32::try_from(body_len) + .expect("request body exceeds u32::MAX") + .to_le_bytes() + } + + /// Decodes a 4-byte LE buffer into a header. + pub fn decode(buf: [u8; Self::SIZE]) -> Self { + Self { body_len: u32::from_le_bytes(buf) } + } +} diff --git a/crates/cli/fff-rg/Cargo.toml b/crates/cli/fff-rg/Cargo.toml new file mode 100644 index 00000000..9ceb30e4 --- /dev/null +++ b/crates/cli/fff-rg/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "fff-rg" +version = "0.0.1" +edition = "2024" +description = "FFF CLI — fast file finder from the command line." +license = "MIT" + +[[bin]] +name = "fff-rg" +path = "src/main.rs" + +[dependencies] +fff-ipc-domain = { path = "../fff-ipc-domain" } +rkyv = { workspace = true } +sendfd = "0.4" +clap = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +bytesize = "2" +mimalloc = { workspace = true } +git2 = { workspace = true } +which = "8.0.3" + +[dev-dependencies] +test-case = "3" diff --git a/crates/cli/fff-rg/src/app_ctx.rs b/crates/cli/fff-rg/src/app_ctx.rs new file mode 100644 index 00000000..bc0931c4 --- /dev/null +++ b/crates/cli/fff-rg/src/app_ctx.rs @@ -0,0 +1,57 @@ +//! Startup context for `fff-rg`. Captures CLI args, terminal state, working +//! directory, and git root once — then threads through the searcher pipeline +//! as `&AppCtx` so nothing is recomputed or cloned per query. + +use std::borrow::Cow; +use std::io::IsTerminal; +use std::path::PathBuf; +use std::process::Command; + +use crate::types::cli::Args; + +/// Runtime context computed once at startup. +pub struct AppCtx<'a> { + /// Parsed CLI arguments. + pub args: &'a Args, + /// Whether stdout is connected to a terminal (controls color/heading defaults). + pub is_tty: bool, + /// Root search directory — borrowed from `args.paths[0]` or owned from `cwd`. + pub dir: Cow<'a, str>, + /// Git worktree root as a string, if `dir` is inside a repository. + pub git_root: Option, + /// Path to the `fff-daemon` binary — `$FFF_DAEMON` if set, else `fff-daemon` from `$PATH`. + pub daemon_bin: PathBuf, + /// Resolved absolute path to `rg`, if found on `$PATH` at startup. + pub rg_bin: Option, +} + +impl<'a> AppCtx<'a> { + /// Probes the environment once: resolves the search directory, discovers + /// the git root, and snapshots the terminal state. + #[tracing::instrument(level = "trace", skip_all)] + pub fn new(args: &'a Args) -> Self { + let is_tty = std::io::stdout().is_terminal(); + let dir: Cow<'a, str> = match args.paths.first() { + Some(p) => Cow::Borrowed(p.as_str()), + None => Cow::Owned( + std::env::current_dir() + .unwrap_or_default() + .to_string_lossy() + .into_owned(), + ), + }; + // workdir() borrows from Repository — must convert before drop. + let git_root = git2::Repository::discover(dir.as_ref()) + .ok() + .and_then(|repo| repo.workdir().map(|p| p.to_string_lossy().into_owned())); + let daemon_bin = std::env::var_os("FFF_DAEMON") + .map_or_else(|| PathBuf::from("fff-daemon"), PathBuf::from); + let rg_bin = which::which("rg").ok(); + Self { args, is_tty, dir, git_root, daemon_bin, rg_bin } + } + + /// Returns a `Command` for `rg`, or `None` if it wasn't found at startup. + pub fn rg_command(&self) -> Option { + self.rg_bin.as_ref().map(Command::new) + } +} diff --git a/crates/cli/fff-rg/src/main.rs b/crates/cli/fff-rg/src/main.rs new file mode 100644 index 00000000..be717cf8 --- /dev/null +++ b/crates/cli/fff-rg/src/main.rs @@ -0,0 +1,47 @@ +//! `fff-rg` — drop-in `rg`-compatible CLI that accelerates searches inside +//! git repositories by delegating to [`fff-daemon`] over a Unix socket. +//! +//! On startup it probes the working directory for a git worktree. If one is +//! found, requests are serialized via rkyv and sent to the daemon (spawning +//! it on first use). The daemon keeps a warm file index, so repeated queries +//! skip the filesystem walk entirely. Outside a git worktree the tool falls +//! back to a plain `rg` subprocess, behaving identically to upstream ripgrep. +//! +//! Exit codes follow `rg` conventions: 0 = match, 1 = no match, 2 = error. + +mod app_ctx; +mod searcher; +mod types; + +use clap::Parser; + +use crate::app_ctx::AppCtx; +use crate::searcher::{Search, Searcher}; +use crate::types::cli::Args; + +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +fn main() -> Result<(), Box> { + let args = Args::parse(); + + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else( + |_| format!("fff_rg={lvl},fff_ipc_domain={lvl}", lvl = args.log_level).into(), + )) + .init(); + + if !args.files && args.pattern.is_none() { + eprintln!("error: PATTERN is required (use --files to list files)"); + std::process::exit(2); + } + + let searcher = Searcher::new(AppCtx::new(&args)); + + let found = if args.files { searcher.files()? } else { searcher.grep()? }; + + if !found { + std::process::exit(1); + } + Ok(()) +} diff --git a/crates/cli/fff-rg/src/searcher/fffd.rs b/crates/cli/fff-rg/src/searcher/fffd.rs new file mode 100644 index 00000000..26247834 --- /dev/null +++ b/crates/cli/fff-rg/src/searcher/fffd.rs @@ -0,0 +1,194 @@ +//! Client-side daemon searcher. Connects to the `fff-daemon` Unix socket, +//! sends a search request with stdout as the output fd, and reads back a +//! status byte. Spawns the daemon on first use if it isn't already running. + +use std::io::{Read, Write}; +use std::os::unix::io::AsRawFd; +use std::os::unix::net::UnixStream; +use std::os::unix::process::CommandExt; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use std::num::{NonZeroU32, NonZeroU64}; + +use fff_ipc_domain::{ + GrepQuery, GrepSearch, OutputFlags, RequestHeader, SearchKind, SearchRequest, SearchStatus, + daemon_socket_path, +}; +use sendfd::SendWithFd; + +use crate::app_ctx::AppCtx; +use crate::searcher::Search; +use crate::types::cli::ColorMode; + +/// Max time to wait for the daemon to write search results. +const READ_TIMEOUT: Duration = Duration::from_secs(30); +/// Max time to wait for the request to be sent to the daemon. +const WRITE_TIMEOUT: Duration = Duration::from_secs(10); +/// Max time to wait for a freshly-spawned daemon to accept connections. +const DAEMON_STARTUP_TIMEOUT: Duration = Duration::from_secs(5); +/// Poll interval when waiting for daemon socket to become connectable. +const DAEMON_POLL_INTERVAL: Duration = Duration::from_millis(1); + +/// Single-use connection to the daemon. Consumed by `query`, which drops the +/// stream on completion — making the one-request-per-connection protocol +/// constraint explicit in the type system. +struct DaemonConnection(UnixStream); + +impl DaemonConnection { + /// Connects to a running daemon, or spawns one and waits for it to be ready. + #[tracing::instrument(level = "trace", skip_all)] + fn open(daemon_bin: &std::path::Path) -> Result> { + let socket_path = daemon_socket_path(); + + if let Ok(stream) = UnixStream::connect(&socket_path) { + return Self::configure(stream); + } + + Self::spawn_daemon(&socket_path, daemon_bin)?; + Self::configure(UnixStream::connect(&socket_path)?) + } + + /// Sets read/write timeouts on the stream. + fn configure(stream: UnixStream) -> Result> { + stream.set_read_timeout(Some(READ_TIMEOUT))?; + stream.set_write_timeout(Some(WRITE_TIMEOUT))?; + Ok(Self(stream)) + } + + /// Sends the request + stdout fd, waits for results, and returns match status. + /// Consumes self — one request per connection. + #[tracing::instrument(level = "trace", skip_all)] + fn query(self, req: &SearchRequest) -> Result> { + let req_bytes = + rkyv::to_bytes::(req).map_err(|e| format!("serialize: {e}"))?; + let header = RequestHeader::encode(req_bytes.len()); + + let stdout_fd = std::io::stdout().as_raw_fd(); + self.0.send_with_fd(&header, &[stdout_fd])?; + + (&self.0).write_all(&req_bytes)?; + + let mut status = [0u8; 1]; + (&self.0).read_exact(&mut status)?; + + match SearchStatus::try_from(status[0]) { + Ok(SearchStatus::Match) => Ok(true), + Ok(SearchStatus::NoMatch) => Ok(false), + Ok(SearchStatus::Failed) => Err("daemon reported search failure".into()), + Err(c) => Err(format!("daemon returned unknown status {c}").into()), + } + } + + /// Spawns `fff-daemon` and polls until the socket is connectable (up to 5s). + #[tracing::instrument(level = "trace", skip_all)] + fn spawn_daemon( + socket_path: &std::path::Path, + bin: &std::path::Path, + ) -> Result<(), Box> { + tracing::debug!("spawning fff-daemon"); + + Command::new(bin) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .process_group(0) + .spawn() + .map_err(|e| format!("failed to spawn {}: {e}", bin.display()))?; + + // Instant::now() is vDSO/commpage — no syscall; connect() dominates. + let deadline = std::time::Instant::now() + DAEMON_STARTUP_TIMEOUT; + while std::time::Instant::now() < deadline { + if UnixStream::connect(socket_path).is_ok() { + return Ok(()); + } + std::thread::sleep(DAEMON_POLL_INTERVAL); + } + + Err(format!( + "fff-daemon ({}) did not start within {}s. \ + Try running the daemon manually to see errors: FFF_LOG=debug {}", + bin.display(), + DAEMON_STARTUP_TIMEOUT.as_secs(), + bin.display() + ) + .into()) + } +} + +/// [`Search`] backend that delegates to the daemon over IPC. +pub struct DaemonSearcher<'a> { + ctx: AppCtx<'a>, +} + +impl<'a> DaemonSearcher<'a> { + pub fn new(ctx: AppCtx<'a>) -> Self { + Self { ctx } + } + + /// Converts the owned context into a daemon search request. + fn build_request(&self) -> SearchRequest { + let args = self.ctx.args; + let directory = self.ctx.git_root.as_deref().unwrap_or(&self.ctx.dir); + let pattern = args.pattern.clone().unwrap_or_default(); + + let search = if args.files { + SearchKind::Files { query: pattern } + } else { + let context = args.context.unwrap_or(0); + SearchKind::Grep(GrepSearch { + query: if args.fixed_strings { + GrepQuery::Literal(pattern) + } else { + GrepQuery::Regex(pattern) + }, + case_mode: args.case.resolve(), + max_count: args.max_count.and_then(NonZeroU32::new), + max_filesize: args.max_filesize.and_then(|fs| NonZeroU64::new(fs.as_u64())), + before_context: args.before_context.unwrap_or(context), + after_context: args.after_context.unwrap_or(context), + trim: args.trim, + }) + }; + + SearchRequest { + directory: directory.to_string(), + search, + output: Self::resolve_output_flags(args, self.ctx.is_tty), + } + } + + /// Maps CLI output flags to the IPC output bitmask. + fn resolve_output_flags(args: &crate::types::cli::Args, is_tty: bool) -> OutputFlags { + let pretty = args.pretty; + let color = match args.color { + ColorMode::Always | ColorMode::Ansi => true, + ColorMode::Never => false, + ColorMode::Auto => is_tty, + }; + + let mut f = OutputFlags::empty(); + if color || pretty { f |= OutputFlags::COLOR; } + if !args.no_line_number && (args.line_number || pretty || is_tty) { f |= OutputFlags::LINE_NUMBER; } + if args.column || args.vimgrep { f |= OutputFlags::COLUMN; } + if !args.no_heading && (args.heading || pretty || is_tty) { f |= OutputFlags::HEADING; } + if !args.no_filename { f |= OutputFlags::WITH_FILENAME; } + if args.count { f |= OutputFlags::COUNT_ONLY; } + if args.files_with_matches { f |= OutputFlags::FILES_ONLY; } + if args.quiet { f |= OutputFlags::QUIET; } + if args.vimgrep { f |= OutputFlags::VIMGREP; } + f + } +} + +impl Search for DaemonSearcher<'_> { + fn grep(&self) -> Result> { + let req = self.build_request(); + DaemonConnection::open(&self.ctx.daemon_bin)?.query(&req) + } + + fn files(&self) -> Result> { + let req = self.build_request(); + DaemonConnection::open(&self.ctx.daemon_bin)?.query(&req) + } +} diff --git a/crates/cli/fff-rg/src/searcher/mod.rs b/crates/cli/fff-rg/src/searcher/mod.rs new file mode 100644 index 00000000..de8191bb --- /dev/null +++ b/crates/cli/fff-rg/src/searcher/mod.rs @@ -0,0 +1,46 @@ +//! Search backend selection. Inside a git worktree the daemon is used +//! (indexed, persistent); outside it we fall back to a plain `rg` subprocess. + +mod fffd; +mod rg; +mod search; + +pub use search::Search; + +use crate::app_ctx::AppCtx; + +/// Dispatches searches to either the daemon or a direct `rg` invocation. +pub enum Searcher<'a> { + /// Direct `rg` subprocess — used outside git worktrees. + Rg(rg::RgSearcher<'a>), + /// IPC to the `fff-daemon` — used inside git worktrees. + Daemon(fffd::DaemonSearcher<'a>), +} + +impl<'a> Searcher<'a> { + /// Picks the right backend based on whether a git root was discovered. + pub fn new(ctx: AppCtx<'a>) -> Self { + if ctx.git_root.is_some() { + Self::Daemon(fffd::DaemonSearcher::new(ctx)) + } else { + Self::Rg(rg::RgSearcher::new(ctx)) + } + } +} + +impl Search for Searcher<'_> { + fn grep(&self) -> Result> { + match self { + Self::Rg(s) => s.grep(), + Self::Daemon(s) => s.grep(), + } + } + + fn files(&self) -> Result> { + match self { + Self::Rg(s) => s.files(), + Self::Daemon(s) => s.files(), + } + } +} + diff --git a/crates/cli/fff-rg/src/searcher/rg.rs b/crates/cli/fff-rg/src/searcher/rg.rs new file mode 100644 index 00000000..0af3a631 --- /dev/null +++ b/crates/cli/fff-rg/src/searcher/rg.rs @@ -0,0 +1,117 @@ +//! Fallback search backend that shells out to `rg`. Used when no git +//! worktree is detected (and thus no daemon index is available). + +use std::process::Command; + +use crate::app_ctx::AppCtx; +use crate::searcher::Search; +use crate::types::cli::Args; + +/// [`Search`] backend that spawns `rg` as a subprocess. +pub struct RgSearcher<'a> { + ctx: AppCtx<'a>, +} + +impl<'a> RgSearcher<'a> { + pub fn new(ctx: AppCtx<'a>) -> Self { + Self { ctx } + } + + /// Returns a fresh `rg` command, or errors if `rg` wasn't found at startup. + fn rg(&self) -> Result> { + self.ctx.rg_command().ok_or_else(|| { + "rg (ripgrep) not found — install from https://github.com/BurntSushi/ripgrep".into() + }) + } + + /// Runs an `rg` command and maps its exit code to a match result. + fn run(mut cmd: Command) -> Result> { + let status = cmd.status().map_err(|e| format!("failed to run rg: {e}"))?; + match status.code() { + Some(0) => Ok(true), + Some(1) => Ok(false), + Some(c) => Err(format!("rg exited with code {c}").into()), + None => Err("rg terminated by signal".into()), + } + } + + /// Translates CLI flags into the corresponding `rg` arguments. + fn apply_args(cmd: &mut Command, args: &Args) { + args.case.apply_to_rg(cmd); + + if args.fixed_strings { + cmd.arg("-F"); + } + if let Some(n) = args.before_context { + cmd.arg("-B").arg(n.to_string()); + } + if let Some(n) = args.after_context { + cmd.arg("-A").arg(n.to_string()); + } + if let Some(n) = args.context { + cmd.arg("-C").arg(n.to_string()); + } + if let Some(n) = args.max_count { + cmd.arg("-m").arg(n.to_string()); + } + if let Some(fs) = args.max_filesize { + cmd.arg("--max-filesize").arg(fs.to_string()); + } + if args.trim { + cmd.arg("--trim"); + } + if args.line_number { + cmd.arg("-n"); + } + if args.no_line_number { + cmd.arg("-N"); + } + if args.column { + cmd.arg("--column"); + } + cmd.arg(format!("--color={}", args.color)); + if args.no_filename { + cmd.arg("-I"); + } + if args.heading { + cmd.arg("--heading"); + } + if args.no_heading { + cmd.arg("--no-heading"); + } + if args.count { + cmd.arg("-c"); + } + if args.files_with_matches { + cmd.arg("-l"); + } + if args.quiet { + cmd.arg("-q"); + } + if args.vimgrep { + cmd.arg("--vimgrep"); + } + if args.pretty { + cmd.arg("-p"); + } + } +} + +impl Search for RgSearcher<'_> { + #[tracing::instrument(level = "trace", skip(self))] + fn grep(&self) -> Result> { + let pattern = self.ctx.args.pattern.as_deref().unwrap_or(""); + let mut cmd = self.rg()?; + cmd.arg(pattern).arg(self.ctx.dir.as_ref()); + Self::apply_args(&mut cmd, self.ctx.args); + Self::run(cmd) + } + + #[tracing::instrument(level = "trace", skip(self))] + fn files(&self) -> Result> { + let mut cmd = self.rg()?; + cmd.arg("--files").arg(self.ctx.dir.as_ref()); + cmd.arg(format!("--color={}", self.ctx.args.color)); + Self::run(cmd) + } +} diff --git a/crates/cli/fff-rg/src/searcher/search.rs b/crates/cli/fff-rg/src/searcher/search.rs new file mode 100644 index 00000000..6c9d443a --- /dev/null +++ b/crates/cli/fff-rg/src/searcher/search.rs @@ -0,0 +1,8 @@ +/// Common interface for search backends. Returns `Ok(true)` on match, +/// `Ok(false)` on no match, or an error. +pub trait Search { + /// Content search — find lines matching the query pattern. + fn grep(&self) -> Result>; + /// File listing — enumerate indexed files matching the fuzzy query. + fn files(&self) -> Result>; +} diff --git a/crates/cli/fff-rg/src/types/cli/case_mode.rs b/crates/cli/fff-rg/src/types/cli/case_mode.rs new file mode 100644 index 00000000..9ab80dad --- /dev/null +++ b/crates/cli/fff-rg/src/types/cli/case_mode.rs @@ -0,0 +1,35 @@ +use clap::Args; +use fff_ipc_domain::CaseMode; + +#[derive(Args, Debug)] +#[group(required = false, multiple = false)] +pub struct CaseModeArgs { + #[arg(short = 'i', long = "ignore-case")] + ignore_case: bool, + + #[arg(short = 's', long = "case-sensitive")] + case_sensitive: bool, + + #[arg(short = 'S', long = "smart-case")] + smart_case: bool, +} + +impl CaseModeArgs { + pub fn resolve(&self) -> CaseMode { + match (self.case_sensitive, self.ignore_case) { + (true, _) => CaseMode::Sensitive, + (_, true) => CaseMode::Insensitive, + _ => CaseMode::Smart, + } + } + + pub fn apply_to_rg(&self, cmd: &mut std::process::Command) { + if self.ignore_case { + cmd.arg("-i"); + } else if self.case_sensitive { + cmd.arg("-s"); + } else { + cmd.arg("-S"); + } + } +} diff --git a/crates/cli/fff-rg/src/types/cli/mod.rs b/crates/cli/fff-rg/src/types/cli/mod.rs new file mode 100644 index 00000000..0f94781a --- /dev/null +++ b/crates/cli/fff-rg/src/types/cli/mod.rs @@ -0,0 +1,142 @@ +//! CLI argument types for `fff-rg`. Parsed by clap at startup and threaded +//! into [`AppCtx`](crate::app_ctx::AppCtx) for the rest of the process. + +mod case_mode; + +pub use case_mode::CaseModeArgs; + +use bytesize::ByteSize; +use clap::{Parser, ValueEnum}; + +/// When to emit ANSI color codes in output. +#[derive(Clone, Copy, ValueEnum)] +pub enum ColorMode { + /// Force color on, using platform-native sequences. + Always, + /// Force color on, always using ANSI escape codes. + Ansi, + /// Disable color entirely. + Never, + /// Color when stdout is a terminal, plain otherwise. + Auto, +} + +impl std::fmt::Display for ColorMode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + Self::Always => "always", + Self::Ansi => "ansi", + Self::Never => "never", + Self::Auto => "auto", + }) + } +} + +#[derive(Parser)] +#[command( + name = "fff-rg", + about = "FFF — daemon-accelerated file finder and grep", + after_help = "Falls back to rg when searching outside a git repository." +)] +/// Mirrors a subset of `rg` flags so `fff-rg` is a drop-in replacement. +#[allow(clippy::struct_excessive_bools)] +pub struct Args { + /// Search pattern (regex by default, literal with -F) + pub pattern: Option, + + /// Paths to search (default: current directory) + pub paths: Vec, + + /// Case sensitivity flags (`-i`, `-s`, `-S`). + #[command(flatten)] + pub case: CaseModeArgs, + + /// Treat pattern as a literal string, not a regex. + #[arg(short = 'F', long = "fixed-strings")] + pub fixed_strings: bool, + + /// Lines of context after each match. + #[arg(short = 'A', long = "after-context", value_name = "NUM")] + pub(crate) after_context: Option, + + /// Lines of context before each match. + #[arg(short = 'B', long = "before-context", value_name = "NUM")] + pub(crate) before_context: Option, + + /// Lines of context before and after each match. + #[arg(short = 'C', long = "context", value_name = "NUM")] + pub(crate) context: Option, + + /// Max matches per file. + #[arg(short = 'm', long = "max-count", value_name = "NUM")] + pub(crate) max_count: Option, + + /// Skip files larger than this size. + #[arg(long = "max-filesize", value_name = "SIZE")] + pub(crate) max_filesize: Option, + + /// Strip leading whitespace from matched lines. + #[arg(long)] + pub(crate) trim: bool, + + /// Prefix each match with its line number. + #[arg(short = 'n', long = "line-number")] + pub(crate) line_number: bool, + + /// Suppress line numbers. + #[arg(short = 'N', long = "no-line-number")] + pub(crate) no_line_number: bool, + + /// Show the byte-column offset of each match. + #[arg(long)] + pub(crate) column: bool, + + /// When to use color in output. + #[arg(long, value_enum, value_name = "WHEN", default_value = "auto")] + pub(crate) color: ColorMode, + + /// Print the filename for each match. + #[arg(short = 'H', long = "with-filename")] + pub(crate) with_filename: bool, + + /// Suppress filenames in output. + #[arg(short = 'I', long = "no-filename")] + pub(crate) no_filename: bool, + + /// Group matches under filename headers. + #[arg(long)] + pub(crate) heading: bool, + + /// Print each match on its own line with the filename prefix. + #[arg(long = "no-heading")] + pub(crate) no_heading: bool, + + /// Print only a count of matching lines per file. + #[arg(short = 'c', long)] + pub(crate) count: bool, + + /// Print only filenames that contain matches. + #[arg(short = 'l', long = "files-with-matches")] + pub(crate) files_with_matches: bool, + + /// Suppress all output; exit status indicates match/no-match. + #[arg(short = 'q', long)] + pub(crate) quiet: bool, + + /// Output in `file:line:col:text` format for editor integration. + #[arg(long)] + pub(crate) vimgrep: bool, + + /// Shorthand for `--color=always --heading --line-number`. + #[arg(short = 'p', long)] + pub(crate) pretty: bool, + + /// List files instead of searching their contents. + #[arg(long)] + pub files: bool, + + /// Log level for diagnostics (`FFF_LOG` env var). + #[arg(long, default_value = "warn", env = "FFF_LOG", global = true)] + pub log_level: String, +} + diff --git a/crates/cli/fff-rg/src/types/mod.rs b/crates/cli/fff-rg/src/types/mod.rs new file mode 100644 index 00000000..4f773726 --- /dev/null +++ b/crates/cli/fff-rg/src/types/mod.rs @@ -0,0 +1 @@ +pub mod cli; diff --git a/crates/cli/fff-rg/tests/rg_compat.rs b/crates/cli/fff-rg/tests/rg_compat.rs new file mode 100644 index 00000000..49d4afb9 --- /dev/null +++ b/crates/cli/fff-rg/tests/rg_compat.rs @@ -0,0 +1,846 @@ +#[path = "rg_compat/hay.rs"] +mod hay; +#[path = "rg_compat/synthetic.rs"] +mod synthetic; +#[path = "rg_compat/util.rs"] +mod util; + +use std::process::Command; + +use hay::{PROJECT, SHERLOCK}; +use synthetic::{LARGE_REPO, MEDIUM_REPO, SMALL_REPO}; +use test_case::test_case; +use util::{Dir, assert_rg_match, find_binary, normalize_inline}; + +#[test] +fn smoke_basic_search() { + let dir = Dir::new("smoke"); + dir.create("sherlock", SHERLOCK); + + let out = dir.command().arg("--color=never").arg("--no-heading").arg("Sherlock").stdout(); + assert!(out.contains("Sherlock"), "expected Sherlock in output, got: {out}"); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert_eq!(lines.len(), 2, "expected 2 matching lines, got {}: {out}", lines.len()); +} + + +#[test] +fn case_insensitive() { + let dir = Dir::new("case_i"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "-i", "sherlock"]).stdout(); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert_eq!(lines.len(), 2, "expected 2 case-insensitive matches, got: {out}"); + assert_eq!( + lines[0], + "sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock" + ); + assert_eq!( + lines[1], + "sherlock:be, to a very large extent, the result of luck. Sherlock Holmes" + ); +} + +#[test] +fn smart_case_lower() { + let dir = Dir::new("smart_lower"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "sherlock"]).stdout(); + assert!(out.contains("Sherlock"), "smart case should match uppercase with lowercase query"); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert_eq!(lines.len(), 2); +} + +#[test] +fn smart_case_upper() { + let dir = Dir::new("smart_upper"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "Sherlock"]).stdout(); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert_eq!(lines.len(), 2); + assert!(lines[0].contains("Sherlock")); +} + +#[test] +fn case_sensitive() { + let dir = Dir::new("case_s"); + dir.create("sherlock", SHERLOCK); + let code = dir.command().args(&["--color=never", "--no-heading", "-s", "sherlock"]).exit_code(); + assert_eq!(code, 1, "case-sensitive 'sherlock' should find nothing"); +} + +#[test] +fn fixed_strings() { + let dir = Dir::new("fixed"); + dir.create("test", "foo.bar\nfooXbar\n"); + let out = dir.command().args(&["--color=never", "--no-heading", "-F", "foo.bar"]).stdout(); + assert_eq!(out, "test:foo.bar\n"); +} + +#[test] +fn fixed_strings_regex_chars() { + let dir = Dir::new("fixed_regex"); + dir.create("test", "a(b)c\nabc\n"); + let out = dir.command().args(&["--color=never", "--no-heading", "-F", "a(b)c"]).stdout(); + assert_eq!(out, "test:a(b)c\n"); +} + +#[test] +fn no_match_exit_code() { + let dir = Dir::new("no_match"); + dir.create("sherlock", SHERLOCK); + let code = dir.command().args(&["--color=never", "--no-heading", "ZZZZNOTFOUND"]).exit_code(); + assert_eq!(code, 1); +} + +#[test] +fn match_exit_code() { + let dir = Dir::new("match_exit"); + dir.create("sherlock", SHERLOCK); + let code = dir.command().args(&["--color=never", "--no-heading", "Sherlock"]).exit_code(); + assert_eq!(code, 0); +} + + +#[test] +fn line_numbers() { + let dir = Dir::new("line_num"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "-n", "Sherlock"]).stdout(); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert_eq!(lines.len(), 2); + assert!(lines[0].starts_with("sherlock:1:")); + assert!(lines[1].starts_with("sherlock:3:")); +} + +#[test] +fn column_numbers() { + let dir = Dir::new("columns"); + dir.create("sherlock", SHERLOCK); + let out = dir + .command() + .args(&["--color=never", "--no-heading", "-n", "--column", "Sherlock"]) + .stdout(); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert_eq!(lines.len(), 2); + // Format: file:line:col:content — at least 4 colon-separated parts + let parts: Vec<&str> = lines[0].splitn(4, ':').collect(); + assert_eq!(parts.len(), 4, "expected file:line:col:content format"); + assert_eq!(parts[0], "sherlock"); +} + +#[test] +fn heading_mode() { + let dir = Dir::new("heading"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--heading", "Sherlock"]).stdout(); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + // First line should be just the filename (heading) + assert_eq!(lines[0], "sherlock"); + // Match lines should NOT have filename prefix + assert!(!lines[1].starts_with("sherlock:")); + assert!(lines[1].contains("Sherlock")); +} + +#[test] +fn heading_with_line_numbers() { + let dir = Dir::new("heading_ln"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--heading", "-n", "Sherlock"]).stdout(); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert_eq!(lines[0], "sherlock"); + assert!(lines[1].starts_with("1:"), "expected line number prefix, got: {}", lines[1]); + assert!(lines[2].starts_with("3:"), "expected line number prefix, got: {}", lines[2]); +} + +#[test] +fn no_filename() { + let dir = Dir::new("no_filename"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "-I", "Sherlock"]).stdout(); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert!(!lines[0].starts_with("sherlock:"), "should not have filename prefix"); + assert!(lines[0].contains("Sherlock")); +} + +#[test] +fn count() { + let dir = Dir::new("count"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "-c", "Sherlock"]).stdout(); + assert_eq!(out, "sherlock:2\n"); +} + +#[test] +fn files_with_matches() { + let dir = Dir::new("files_match"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "-l", "Sherlock"]).stdout(); + assert_eq!(out, "sherlock\n"); +} + +#[test] +fn quiet_match() { + let dir = Dir::new("quiet_match"); + dir.create("sherlock", SHERLOCK); + let mut cmd = dir.command(); + cmd.args(&["--color=never", "--no-heading", "-q", "Sherlock"]); + let out = cmd.stdout(); + assert!(out.is_empty(), "quiet mode should produce no output, got: {out}"); +} + +#[test] +fn quiet_no_match() { + let dir = Dir::new("quiet_nomatch"); + dir.create("sherlock", SHERLOCK); + let code = + dir.command().args(&["--color=never", "--no-heading", "-q", "ZZZZNOTFOUND"]).exit_code(); + assert_eq!(code, 1); +} + +#[test] +fn vimgrep() { + let dir = Dir::new("vimgrep"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--vimgrep", "Sherlock"]).stdout(); + let lines: Vec<&str> = out.lines().collect(); + assert_eq!(lines.len(), 2, "expected 2 vimgrep lines, got: {out}"); + for line in &lines { + let parts: Vec<&str> = line.splitn(4, ':').collect(); + assert_eq!(parts.len(), 4, "vimgrep format: file:line:col:content"); + assert_eq!(parts[0], "sherlock"); + assert!(parts[1].parse::().is_ok(), "line should be numeric"); + assert!(parts[2].parse::().is_ok(), "col should be numeric"); + } +} + +#[test] +fn files_mode() { + let dir = Dir::new("files_mode"); + dir.create("alpha.txt", "content"); + dir.create("beta.rs", "fn main() {}"); + let out = dir.command().args(&["--color=never", "--files"]).stdout(); + let mut lines: Vec<&str> = out.lines().collect(); + lines.sort(); + assert!(lines.contains(&"alpha.txt"), "should list alpha.txt, got: {out}"); + assert!(lines.contains(&"beta.rs"), "should list beta.rs, got: {out}"); +} + +/// Without context flags, non-adjacent matches should NOT have -- separator +#[test] +fn no_separator_without_context() { + let dir = Dir::new("no_sep"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "Sherlock"]).stdout(); + assert!(!out.contains("--"), "should not emit -- separator without context flags, got: {out}"); +} + + +#[test] +fn after_context() { + let dir = Dir::new("after_ctx"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "-A1", "Sherlock"]).stdout(); + // Should have match lines + context lines + assert!(out.contains("Holmeses"), "after-context should include next line"); + assert!(out.contains("can extract"), "after-context should include next line for 2nd match"); +} + +#[test] +fn after_context_line_numbers() { + let dir = Dir::new("after_ctx_ln"); + dir.create("sherlock", SHERLOCK); + let out = + dir.command().args(&["--color=never", "--no-heading", "-A1", "-n", "Sherlock"]).stdout(); + // Match lines use ":" separator, context lines use "-" separator + assert!(out.contains("sherlock:1:"), "should have match with line number"); + assert!(out.contains("sherlock-2-"), "context line should use - separator"); +} + +#[test] +fn before_context() { + let dir = Dir::new("before_ctx"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "-B1", "Sherlock"]).stdout(); + // Line 3 matches; line 2 should be before-context + assert!(out.contains("Holmeses"), "before-context of 2nd match should include line 2"); +} + +#[test] +fn before_context_line_numbers() { + let dir = Dir::new("before_ctx_ln"); + dir.create("sherlock", SHERLOCK); + let out = + dir.command().args(&["--color=never", "--no-heading", "-B1", "-n", "Sherlock"]).stdout(); + assert!(out.contains("sherlock:1:"), "first match at line 1"); + assert!(out.contains("sherlock-2-"), "before-context for 2nd match"); + assert!(out.contains("sherlock:3:"), "second match at line 3"); +} + +#[test] +fn context_separator() { + let dir = Dir::new("ctx_sep"); + dir.create("sherlock", SHERLOCK); + // "world" is on line 1, "attached" is on line 6 — gap between them + let out = + dir.command().args(&["--color=never", "--no-heading", "-C1", "world|attached"]).stdout(); + assert!(out.contains("--"), "should have -- separator between non-adjacent groups"); +} + +#[test] +fn trim_whitespace() { + let dir = Dir::new("trim"); + dir.create("indented", " indented line\nnormal line\n"); + let out = dir.command().args(&["--color=never", "--no-heading", "--trim", "indented"]).stdout(); + // --trim strips leading whitespace + assert!(out.contains("indented:indented line"), "should trim leading spaces, got: {out}"); + assert!(!out.contains(" indented"), "leading spaces should be stripped"); +} + +#[test] +fn max_count() { + let dir = Dir::new("max_count"); + dir.create("sherlock", SHERLOCK); + let out = dir.command().args(&["--color=never", "--no-heading", "-m1", "Sherlock"]).stdout(); + let lines: Vec<&str> = out.lines().filter(|l| *l != "--").collect(); + assert_eq!(lines.len(), 1, "max-count 1 should return 1 line, got: {out}"); + assert!(lines[0].contains("Sherlock")); +} + +// --- rg comparison tests: project fixture --- + +// inline mode +#[test_case(false, &["--color=never", "--no-heading", "fn"] ; "inline_basic")] +#[test_case(false, &["--color=never", "--no-heading", "-n", "fn"] ; "inline_line_numbers")] +#[test_case(false, &["--color=never", "--no-heading", "-n", "--column", "fn"] ; "inline_column")] +#[test_case(false, &["--color=never", "--no-heading", "-i", "config"] ; "inline_case_insensitive")] +#[test_case(false, &["--color=never", "--no-heading", "-s", "Config"] ; "inline_case_sensitive")] +#[test_case(false, &["--color=never", "--no-heading", "-S", "config"] ; "inline_smart_case_lower")] +#[test_case(false, &["--color=never", "--no-heading", "-S", "Config"] ; "inline_smart_case_upper")] +#[test_case(false, &["--color=never", "--no-heading", "-F", "HashMap"] ; "inline_fixed_strings")] +#[test_case(false, &["--color=never", "--no-heading", "-c", "fn"] ; "inline_count")] +#[test_case(false, &["--color=never", "--no-heading", "-l", "fn"] ; "inline_files_with_matches")] +#[test_case(false, &["--color=never", "--no-heading", "-m1", "fn"] ; "inline_max_count")] +#[test_case(false, &["--color=never", "--no-heading", "--trim", "let"] ; "inline_trim")] +// heading mode +#[test_case(true, &["--color=never", "--heading", "fn"] ; "heading_basic")] +#[test_case(true, &["--color=never", "--heading", "-n", "fn"] ; "heading_line_numbers")] +#[test_case(true, &["--color=never", "--heading", "-n", "--column", "Config"] ; "heading_column")] +#[test_case(false, &["--color=never", "--heading", "-c", "fn"] ; "heading_count")] +#[test_case(true, &["--color=never", "--heading", "-n", "-m1", "fn"] ; "heading_max_count")] +// context +#[test_case(false, &["--color=never", "--no-heading", "-n", "-A2", "fn main"] ; "after_context")] +#[test_case(false, &["--color=never", "--no-heading", "-n", "-B2", "fn main"] ; "before_context")] +#[test_case(false, &["--color=never", "--no-heading", "-n", "-C2", "HashMap"] ; "symmetric_context")] +#[test_case(false, &["--color=never", "--no-heading", "-n", "-B1", "-A3", "HashMap"] ; "asymmetric_context")] +#[test_case(true, &["--color=never", "--heading", "-n", "-C1", "fn"] ; "context_heading")] +// vimgrep +#[test_case(false, &["--color=never", "--vimgrep", "Config"] ; "vimgrep_basic")] +#[test_case(false, &["--color=never", "--vimgrep", r"fn\s+\w+"] ; "vimgrep_regex")] +#[test_case(false, &["--color=never", "--vimgrep", "-F", "pub fn"] ; "vimgrep_fixed")] +#[test_case(false, &["--color=never", "--vimgrep", "-i", "hashmap"] ; "vimgrep_case_insensitive")] +// quiet / exit codes +#[test_case(false, &["--color=never", "-q", "fn"] ; "quiet_match")] +#[test_case(false, &["--color=never", "-q", "ZZZZZ_NEVER_MATCHES"] ; "quiet_no_match")] +#[test_case(false, &["--color=never", "-c", "ZZZZZ_NEVER_MATCHES"] ; "count_no_match")] +#[test_case(false, &["--color=never", "-l", "ZZZZZ_NEVER_MATCHES"] ; "files_with_matches_no_match")] +// regex +#[test_case(false, &["--color=never", "--no-heading", "HashMap|Config"] ; "regex_alternation")] +#[test_case(false, &["--color=never", "--no-heading", r"fn\s+\w+"] ; "regex_quantifier")] +#[test_case(false, &["--color=never", "--no-heading", "^use"] ; "regex_anchor")] +#[test_case(false, &["--color=never", "--no-heading", "assert[_!]"] ; "regex_char_class")] +#[test_case(false, &["--color=never", "--no-heading", "-F", "HashMap"] ; "fixed_special_chars")] +#[test_case(false, &["--color=never", "--no-heading", "-F", "Config::new"] ; "fixed_parens")] +// unicode +#[test_case(false, &["--color=never", "--no-heading", "café"] ; "unicode_latin_extended")] +#[test_case(false, &["--color=never", "--no-heading", "日本語"] ; "unicode_cjk")] +#[test_case(false, &["--color=never", "--no-heading", "-i", "prójéct"] ; "unicode_case_insensitive")] +#[test_case(false, &["--color=never", "--vimgrep", "café"] ; "unicode_vimgrep")] +// multi-flag combos +#[test_case(false, &["--color=never", "--no-heading", "--trim", "-n", "-C1", "HashMap"] ; "combo_trim_context_linenums")] +#[test_case(false, &["--color=never", "--no-heading", "-c", "-i", "self"] ; "combo_count_case_insensitive")] +#[test_case(false, &["--color=never", "--no-heading", "-n", "-m1", "-C1", "HashMap"] ; "combo_maxcount_context")] +#[test_case(true, &["--color=never", "--heading", "-n", "--column", "-C1", "Config"] ; "combo_heading_context_column")] +#[test_case(false, &["--color=never", "--vimgrep", "-F", "-i", "verbose"] ; "combo_vimgrep_fixed_case")] +fn vs_rg_project(heading: bool, args: &[&str]) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + dir.with_project(&PROJECT); + assert_rg_match(&dir, args, heading); +} + +// --- rg comparison tests: custom fixtures --- + +#[test_case(false, &[("solo.txt", "hello world\nfoo bar\nhello again\n")], &["--color=never", "--no-heading", "-I", "-n", "hello", "solo.txt"] ; "inline_no_filename")] +// context +#[test_case(false, &[("dense.txt", "a\nMATCH\nb\nMATCH\nc\n")], &["--color=never", "--no-heading", "-n", "-H", "-C1", "MATCH", "dense.txt"] ; "context_overlapping")] +#[test_case(false, &[("edges.txt", "MATCH\na\nb\nc\nd\ne\nMATCH\n")], &["--color=never", "--no-heading", "-n", "-H", "-C2", "MATCH", "edges.txt"] ; "context_at_boundaries")] +#[test_case(false, &[("distant.txt", "MATCH\na\nb\nc\nd\ne\nf\nMATCH\n")], &["--color=never", "--no-heading", "-n", "-H", "-C1", "MATCH", "distant.txt"] ; "context_separator")] +// color +#[test_case(false, &[("data.txt", "hello world\nfoo bar\nhello again\n")], &["--color=always", "--no-heading", "-n", "-H", "hello", "data.txt"] ; "color_inline")] +#[test_case(true, &[("data.txt", "hello world\nfoo bar\nhello again\n")], &["--color=always", "--heading", "-n", "-H", "hello", "data.txt"] ; "color_heading")] +#[test_case(false, &[("data.txt", "hello world\n")], &["--color=always", "--no-heading", "-n", "--column", "-H", "hello", "data.txt"] ; "color_column")] +#[test_case(false, &[("data.txt", "hello\nhello\n")], &["--color=always", "--no-heading", "-c", "-H", "hello", "data.txt"] ; "color_count")] +#[test_case(false, &[("data.txt", "hello\n")], &["--color=always", "-l", "-H", "hello", "data.txt"] ; "color_files_with_matches")] +// edge cases +#[test_case(false, &[("empty.txt", ""), ("notempty.txt", "hello\n")], &["--color=never", "--no-heading", "hello"] ; "empty_file")] +#[test_case(false, &[("data.txt", "hello world")], &["--color=never", "--no-heading", "-H", "hello", "data.txt"] ; "no_trailing_newline")] +#[test_case(false, &[("one.txt", "single line\n")], &["--color=never", "--no-heading", "-n", "-H", "-C2", "single", "one.txt"] ; "single_line_file")] +#[test_case(false, &[("a/b/c/d/deep.txt", "hello from the deep\n"), ("shallow.txt", "hello from shallow\n")], &["--color=never", "--no-heading", "hello"] ; "deeply_nested")] +fn vs_rg_fixture(heading: bool, files: &[(&str, &str)], args: &[&str]) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + for (path, content) in files { + dir.create(path, content); + } + assert_rg_match(&dir, args, heading); +} + +// --- exit code tests (custom assertions, not macro-able) --- + +#[test] +fn vs_rg_exit_code_match() { + let dir = Dir::new("vs_exit_match"); + dir.with_project(&PROJECT); + let fff = dir.command().args(&["--color=never", "fn"]).full_output(); + let rg = dir.rg().args(&["--color=never", "fn"]).full_output(); + assert_eq!(fff.code, 0, "fff-rg should exit 0 on match"); + assert_eq!(rg.code, 0, "rg should exit 0 on match"); +} + +#[test] +fn vs_rg_exit_code_no_match() { + let dir = Dir::new("vs_exit_nomatch"); + dir.with_project(&PROJECT); + let fff = dir.command().args(&["--color=never", "ZZZZZ"]).full_output(); + let rg = dir.rg().args(&["--color=never", "ZZZZZ"]).full_output(); + assert_eq!(fff.code, 1, "fff-rg should exit 1 on no match"); + assert_eq!(rg.code, 1, "rg should exit 1 on no match"); +} + +// --- session reuse: warm index consistency --- + +#[test] +fn session_reuse_consistent_results() { + let dir = Dir::new("session_reuse"); + dir.with_project(&PROJECT); + let args = &["--color=never", "--no-heading", "-n", "fn"]; + + let out1 = dir.command().args(args).full_output(); + let out2 = dir.command().args(args).full_output(); + let out3 = dir.command().args(args).full_output(); + + assert_eq!(out1.code, out2.code); + assert_eq!(out2.code, out3.code); + + let n1 = normalize_inline(&out1.stdout); + let n2 = normalize_inline(&out2.stdout); + let n3 = normalize_inline(&out3.stdout); + assert_eq!(n1, n2, "warm index should return same results as cold"); + assert_eq!(n2, n3); +} + +#[test] +fn session_reuse_different_queries() { + let dir = Dir::new("session_diff_q"); + dir.with_project(&PROJECT); + + let out_fn = dir.command().args(&["--color=never", "--no-heading", "fn"]).full_output(); + let out_config = dir.command().args(&["--color=never", "--no-heading", "Config"]).full_output(); + let out_none = dir.command().args(&["--color=never", "--no-heading", "ZZZZNOTFOUND"]).full_output(); + + assert_eq!(out_fn.code, 0); + assert_eq!(out_config.code, 0); + assert_eq!(out_none.code, 1); + assert!(out_fn.stdout.contains("fn")); + assert!(out_config.stdout.contains("Config")); + assert!(out_none.stdout.is_empty()); +} + +#[test] +fn session_reuse_alternating_modes() { + let dir = Dir::new("session_modes"); + dir.with_project(&PROJECT); + + let grep1 = dir.command().args(&["--color=never", "--no-heading", "fn"]).full_output(); + let files = dir.command().args(&["--color=never", "--files"]).full_output(); + let grep2 = dir.command().args(&["--color=never", "--no-heading", "fn"]).full_output(); + + assert_eq!(grep1.code, 0); + assert_eq!(files.code, 0); + assert_eq!(grep2.code, 0); + assert_eq!( + normalize_inline(&grep1.stdout), + normalize_inline(&grep2.stdout), + "grep results should be stable across interleaved files queries" + ); +} + +// --- concurrency: parallel searches --- + +#[test] +fn concurrent_searches_no_corruption() { + let dir = Dir::new("concurrent"); + dir.with_project(&PROJECT); + let dir_path = dir.dir.clone(); + + let handles: Vec<_> = (0..8) + .map(|i| { + let path = dir_path.clone(); + std::thread::spawn(move || { + let bin = find_binary("fff-rg"); + let output = Command::new(&bin) + .current_dir(&path) + .args(["--color=never", "--no-heading", "-n", "fn"]) + .output() + .unwrap(); + let stdout = String::from_utf8(output.stdout).unwrap(); + let code = output.status.code().unwrap_or(-1); + (i, stdout, code) + }) + }) + .collect(); + + let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect(); + + for (i, _, code) in &results { + assert_eq!(*code, 0, "thread {i} got exit code {code}"); + } + + let normalized: Vec = results.iter().map(|(_, out, _)| normalize_inline(out)).collect(); + for (i, norm) in normalized.iter().enumerate().skip(1) { + assert_eq!( + &normalized[0], norm, + "thread {i} output differs from thread 0" + ); + } +} + +// --- files mode --- + +#[test] +fn vs_rg_files_list() { + let dir = Dir::new("vs_files_list"); + dir.with_project(&PROJECT); + let fff = dir.command().args(&["--color=never", "--files"]).full_output(); + let rg = dir.rg().args(&["--color=never", "--files"]).full_output(); + + assert_eq!(fff.code, rg.code, "exit code mismatch"); + + let mut fff_lines: Vec<&str> = fff.stdout.lines().collect(); + let mut rg_lines: Vec<&str> = rg.stdout.lines().collect(); + fff_lines.sort(); + rg_lines.sort(); + assert_eq!(fff_lines, rg_lines, "file listings differ\nfff: {fff_lines:?}\nrg: {rg_lines:?}"); +} + +#[test] +fn files_mode_subdirectories() { + let dir = Dir::new("files_subdirs"); + dir.with_project(&PROJECT); + let out = dir.command().args(&["--color=never", "--files"]).full_output(); + assert_eq!(out.code, 0); + let files: Vec<&str> = out.stdout.lines().collect(); + assert!(files.iter().any(|f| f.contains("src/")), "should find files in src/, got: {files:?}"); + assert!(files.iter().any(|f| f.contains("tests/")), "should find files in tests/, got: {files:?}"); + assert!(files.iter().any(|f| f.contains("data/")), "should find files in data/, got: {files:?}"); +} + +#[test] +fn files_mode_quiet() { + let dir = Dir::new("files_quiet"); + dir.with_project(&PROJECT); + let out = dir.command().args(&["--color=never", "--files", "-q"]).full_output(); + assert!(out.stdout.is_empty(), "quiet files should produce no output"); + assert_eq!(out.code, 0); +} + +// --- synthetic repo: scale tests --- + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +#[test_case(&LARGE_REPO ; "large_500_files")] +fn scale_unique_needle_finds_one_file(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + let specs = repo.populate(&dir); + + let target = &specs[0]; + let out = dir + .command() + .args(&["--color=never", "--no-heading", "-l", &target.unique_needle]) + .full_output(); + + assert_eq!(out.code, 0); + let files: Vec<&str> = out.stdout.lines().collect(); + assert_eq!(files.len(), 1, "unique needle should match exactly 1 file, got: {files:?}"); + assert!(files[0].contains(&target.path), "expected {}, got {}", target.path, files[0]); +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +#[test_case(&LARGE_REPO ; "large_500_files")] +fn scale_common_needle_match_count(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + let specs = repo.populate(&dir); + + let out = dir + .command() + .args(&["--color=never", "--no-heading", "-l", repo.common_needle]) + .full_output(); + + assert_eq!(out.code, 0); + let matched_files: Vec<&str> = out.stdout.lines().collect(); + let expected = specs.iter().filter(|s| s.has_common).count(); + assert_eq!( + matched_files.len(), + expected, + "expected {expected} files with common needle, got {}", + matched_files.len() + ); +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +#[test_case(&LARGE_REPO ; "large_500_files")] +fn scale_count_mode_totals(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + let specs = repo.populate(&dir); + + let out = dir + .command() + .args(&["--color=never", "--no-heading", "-c", repo.common_needle]) + .full_output(); + + assert_eq!(out.code, 0); + // Each file with the common needle has it exactly once + let total: usize = out + .stdout + .lines() + .filter_map(|l| l.rsplit(':').next()?.parse::().ok()) + .sum(); + let expected = specs.iter().filter(|s| s.has_common).count(); + assert_eq!(total, expected, "total count mismatch"); +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +fn scale_line_numbers_correct(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + let specs = repo.populate(&dir); + + let target = &specs[0]; + let out = dir + .command() + .args(&["--color=never", "--no-heading", "-n", &target.unique_needle]) + .full_output(); + + assert_eq!(out.code, 0); + let lines: Vec<&str> = out.stdout.lines().collect(); + assert_eq!(lines.len(), 1); + // Format: path:linenum:content + let parts: Vec<&str> = lines[0].splitn(3, ':').collect(); + assert_eq!(parts.len(), 3, "expected path:line:content, got: {}", lines[0]); + let line_num: u64 = parts[1].parse().unwrap_or_else(|_| panic!("bad line num: {}", parts[1])); + assert_eq!(line_num, target.unique_line, "line number mismatch for unique needle"); +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +fn scale_heading_mode(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + let specs = repo.populate(&dir); + + let out = dir + .command() + .args(&["--color=never", "--heading", "-n", repo.common_needle]) + .full_output(); + + assert_eq!(out.code, 0); + // In heading mode, file paths appear as standalone lines (no colon-separated content) + let expected_files = specs.iter().filter(|s| s.has_common).count(); + // Split on double newline to get blocks, each block is one file + let blocks: Vec<&str> = out.stdout.split("\n\n").filter(|b| !b.trim().is_empty()).collect(); + assert_eq!( + blocks.len(), + expected_files, + "expected {expected_files} heading blocks, got {}", + blocks.len() + ); +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +fn scale_vimgrep_format(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + let specs = repo.populate(&dir); + + let out = dir + .command() + .args(&["--color=never", "--vimgrep", repo.common_needle]) + .full_output(); + + assert_eq!(out.code, 0); + let lines: Vec<&str> = out.stdout.lines().collect(); + let expected = specs.iter().filter(|s| s.has_common).count(); + assert_eq!(lines.len(), expected); + + for line in &lines { + let parts: Vec<&str> = line.splitn(4, ':').collect(); + assert_eq!(parts.len(), 4, "vimgrep format file:line:col:content, got: {line}"); + assert!(parts[1].parse::().is_ok(), "line should be numeric: {}", parts[1]); + assert!(parts[2].parse::().is_ok(), "col should be numeric: {}", parts[2]); + assert!(parts[3].contains(repo.common_needle)); + } +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +fn scale_no_match_exit_code(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + repo.populate(&dir); + + let out = dir + .command() + .args(&["--color=never", "--no-heading", "ZZZZZ_ABSOLUTELY_NOT_IN_ANY_FILE"]) + .full_output(); + + assert_eq!(out.code, 1, "no match should exit 1"); + assert!(out.stdout.is_empty()); +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +fn scale_files_mode_lists_files(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + repo.populate(&dir); + + let out = dir + .command() + .args(&["--color=never", "--files"]) + .full_output(); + + assert_eq!(out.code, 0); + let listed: Vec<&str> = out.stdout.lines().collect(); + // Daemon paginates file listings, so we just verify we get a non-trivial set + assert!( + listed.len() >= 10, + "files mode should list files, got {}", + listed.len() + ); + // All listed paths should be valid relative paths + for path in &listed { + assert!(!path.is_empty(), "empty path in files listing"); + assert!(!path.starts_with('/'), "path should be relative: {path}"); + } +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +fn scale_context_output_has_context_lines(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + repo.populate(&dir); + + let out = dir + .command() + .args(&["--color=never", "--no-heading", "-n", "-C1", repo.common_needle]) + .full_output(); + + assert_eq!(out.code, 0); + // Output should have more lines than match count (context adds surrounding lines) + let match_lines = out.stdout.lines().filter(|l| l.contains(repo.common_needle)).count(); + let total_lines = out.stdout.lines().filter(|l| !l.is_empty() && *l != "--").count(); + assert!( + total_lines > match_lines, + "context mode should produce more lines than just matches: {total_lines} total, {match_lines} matches" + ); + // Context lines use - as line number separator (vs : for match lines) + let context_lines = out.stdout.lines().filter(|l| { + !l.is_empty() && *l != "--" && !l.contains(repo.common_needle) + }).count(); + assert!(context_lines > 0, "should have context lines around matches"); +} + +#[test_case(&SMALL_REPO ; "small_50_files")] +#[test_case(&MEDIUM_REPO ; "medium_200_files")] +fn scale_color_output_has_ansi(repo: &synthetic::SyntheticRepo) { + let name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_string(); + let dir = Dir::new(&name); + repo.populate(&dir); + + let out = dir + .command() + .args(&["--color=always", "--no-heading", "-n", repo.common_needle]) + .full_output(); + + assert_eq!(out.code, 0); + assert!(out.stdout.contains("\x1b["), "color output should contain ANSI escapes"); + assert!(out.stdout.contains("\x1b[0m"), "should have RESET codes"); + assert!(out.stdout.contains("\x1b[1m\x1b[31m"), "should have RED_BOLD for match highlights"); +} + +#[test] +fn scale_concurrent_on_large_repo() { + let dir = Dir::new("scale_concurrent"); + let specs = MEDIUM_REPO.populate(&dir); + let dir_path = dir.dir.clone(); + let needle = MEDIUM_REPO.common_needle; + let expected_files = specs.iter().filter(|s| s.has_common).count(); + + let handles: Vec<_> = (0..8) + .map(|_| { + let path = dir_path.clone(); + std::thread::spawn(move || { + let bin = find_binary("fff-rg"); + let output = Command::new(&bin) + .current_dir(&path) + .args(["--color=never", "--no-heading", "-l", needle]) + .output() + .unwrap(); + let stdout = String::from_utf8(output.stdout).unwrap(); + let code = output.status.code().unwrap_or(-1); + (stdout, code) + }) + }) + .collect(); + + let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect(); + + for (i, (stdout, code)) in results.iter().enumerate() { + assert_eq!(*code, 0, "thread {i} exit code"); + let count = stdout.lines().count(); + assert_eq!( + count, expected_files, + "thread {i}: expected {expected_files} files, got {count}" + ); + } +} + +#[test] +fn scale_every_unique_needle_findable() { + let dir = Dir::new("scale_all_needles"); + let specs = SMALL_REPO.populate(&dir); + + for spec in &specs { + let out = dir + .command() + .args(&["--color=never", "--no-heading", "-c", &spec.unique_needle]) + .full_output(); + + assert_eq!(out.code, 0, "needle {} should be found", spec.unique_needle); + let total: usize = out + .stdout + .lines() + .filter_map(|l| l.rsplit(':').next()?.parse::().ok()) + .sum(); + assert_eq!(total, 1, "needle {} should appear exactly once", spec.unique_needle); + } +} diff --git a/crates/cli/fff-rg/tests/rg_compat/hay.rs b/crates/cli/fff-rg/tests/rg_compat/hay.rs new file mode 100644 index 00000000..21d103b2 --- /dev/null +++ b/crates/cli/fff-rg/tests/rg_compat/hay.rs @@ -0,0 +1,152 @@ +/// Standard test corpus (borrowed from ripgrep's test suite). +pub const SHERLOCK: &str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + +pub const RUST_MAIN: &str = "\ +use std::collections::HashMap; +use std::io; + +fn main() { + if let Err(e) = run() { + eprintln!(\"error: {}\", e); + std::process::exit(1); + } +} + +fn run() -> Result<(), io::Error> { + let mut map = HashMap::new(); + map.insert(\"hello\", 1); + map.insert(\"world\", 2); + + for (key, value) in &map { + println!(\"{}: {}\", key, value); + } + + println!(\"done\"); + Ok(()) +} +"; + +pub const RUST_LIB: &str = "\ +pub struct Config { + name: String, + timeout: u64, + verbose: bool, +} + +impl Config { + pub fn new(name: &str) -> Self { + Config { + name: name.to_string(), + timeout: 30, + verbose: false, + } + } + + pub fn with_timeout(mut self, timeout: u64) -> Self { + self.timeout = timeout; + self + } + + pub fn is_verbose(&self) -> bool { + self.verbose + } +} + +impl Default for Config { + fn default() -> Self { + Config::new(\"default\") + } +} +"; + +pub const RUST_TEST: &str = "\ +use super::*; + +#[test] +fn test_config_default() { + let config = Config::default(); + assert_eq!(config.timeout, 30); + assert!(!config.is_verbose()); +} + +#[test] +fn test_config_with_timeout() { + let config = Config::new(\"test\").with_timeout(60); + assert_eq!(config.timeout, 60); + assert_eq!(config.name, \"test\"); +} +"; + +pub const JSON_CONFIG: &str = "\ +{ + \"name\": \"my-project\", + \"version\": \"1.0.0\", + \"settings\": { + \"timeout\": 30, + \"verbose\": false, + \"max_retries\": 3 + }, + \"features\": [\"search\", \"preview\", \"git\"] +} +"; + +pub const UNICODE_README: &str = "\ +# Prójéct Dócs + +A café-inspired résumé builder for the naïve developer. + +## Features + +- 日本語サポート (Japanese support) +- 中文文档 (Chinese docs) +- العربية (Arabic) + +Built with care and résumé-quality output. +"; + +pub const INDENTED: &str = "\ + fn process() { + let x = 42; + if x > 0 { + println!(\"positive: {}\", x); + } + } +"; + +pub const REPEATED: &str = "\ +foo bar foo baz foo +hello world +foo foo foo foo foo +bar foo baz foo +"; + +pub const NO_NEWLINE: &str = "last line has no newline"; + +pub struct Hay { + pub rust_main: &'static str, + pub rust_lib: &'static str, + pub rust_test: &'static str, + pub json_config: &'static str, + pub unicode_readme: &'static str, + pub indented: &'static str, + pub repeated: &'static str, + pub no_newline: &'static str, +} + +pub const PROJECT: Hay = Hay { + rust_main: RUST_MAIN, + rust_lib: RUST_LIB, + rust_test: RUST_TEST, + json_config: JSON_CONFIG, + unicode_readme: UNICODE_README, + indented: INDENTED, + repeated: REPEATED, + no_newline: NO_NEWLINE, +}; diff --git a/crates/cli/fff-rg/tests/rg_compat/synthetic.rs b/crates/cli/fff-rg/tests/rg_compat/synthetic.rs new file mode 100644 index 00000000..8ed00bcd --- /dev/null +++ b/crates/cli/fff-rg/tests/rg_compat/synthetic.rs @@ -0,0 +1,147 @@ +use crate::util::Dir; + +const EXTENSIONS: &[&str] = &["rs", "ts", "json", "md", "txt", "toml", "yaml"]; + +const DIRS: &[&str] = &[ + "src", + "src/core", + "src/core/utils", + "src/api", + "src/api/handlers", + "src/db", + "tests", + "tests/integration", + "docs", + "config", + "scripts", + "lib", + "lib/helpers", +]; + +const DOMAINS: &[&str] = &[ + r#"use std::net::{TcpStream, SocketAddr}; +fn establish_connection(addr: SocketAddr) -> Result { + let stream = TcpStream::connect(addr)?; + stream.set_nodelay(true)?; + Ok(stream) +}"#, + r#"use sqlx::{PgPool, Row}; +async fn query_users(pool: &PgPool, limit: i64) -> Vec { + sqlx::query("SELECT name FROM users ORDER BY created_at DESC LIMIT $1") + .bind(limit) + .fetch_all(pool).await.unwrap() + .iter().map(|row| row.get("name")).collect() +}"#, + r#"fn verify_jwt_token(token: &str, secret: &[u8]) -> Result { + let parts: Vec<&str> = token.splitn(3, '.').collect(); + if parts.len() != 3 { return Err(AuthError::MalformedToken); } + let payload = base64_decode(parts[1])?; + Ok(serde_json::from_slice(&payload)?) +}"#, + r#"struct Renderer { framebuffer: Vec, width: usize, height: usize } +impl Renderer { + fn clear(&mut self, color: u32) { self.framebuffer.fill(color); } + fn draw_pixel(&mut self, x: usize, y: usize, color: u32) { + if x < self.width && y < self.height { + self.framebuffer[y * self.width + x] = color; + } + } +}"#, + r#"use serde::{Serialize, Deserialize}; +#[derive(Serialize, Deserialize)] +struct ConfigFile { log_level: String, max_retries: u32, timeout_ms: u64 } +fn load_config(path: &std::path::Path) -> Result> { + let contents = std::fs::read_to_string(path)?; + Ok(toml::from_str(&contents)?) +}"#, + r#"struct PhysicsBody { position: [f64; 3], velocity: [f64; 3], mass: f64 } +fn apply_gravity(bodies: &mut [PhysicsBody], dt: f64) { + let gravity_constant = 6.674e-11; + for body in bodies.iter_mut() { + body.velocity[1] -= gravity_constant * dt; + for k in 0..3 { body.position[k] += body.velocity[k] * dt; } + } +}"#, + r#"use std::collections::BTreeMap; +struct LFUCache { map: BTreeMap, capacity: usize } +impl LFUCache { + fn new(capacity: usize) -> Self { Self { map: BTreeMap::new(), capacity } } + fn get(&mut self, key: &K) -> Option<&V> { + let (val, freq) = self.map.get_mut(key)?; + *freq += 1; + Some(val) + } +}"#, +]; + +pub struct SyntheticRepo { + pub file_count: usize, + pub unique_needle_prefix: &'static str, + pub common_needle: &'static str, + pub files_with_common: usize, +} + +impl SyntheticRepo { + pub fn populate(&self, dir: &Dir) -> Vec { + let mut specs = Vec::with_capacity(self.file_count); + + for i in 0..self.file_count { + let dir_name = DIRS[i % DIRS.len()]; + let ext = EXTENSIONS[i % EXTENSIONS.len()]; + let path = format!("{dir_name}/file_{i:04}.{ext}"); + let domain = DOMAINS[i % DOMAINS.len()]; + let unique = format!("{}_{i:04}", self.unique_needle_prefix); + let has_common = i < self.files_with_common; + + let mut content = String::with_capacity(512); + content.push_str(&format!("// {unique}\n")); + content.push_str(domain); + content.push('\n'); + if has_common { + content.push_str(&format!("// {}\n", self.common_needle)); + } + // Pad with filler to make files multi-line + for j in 0..5 { + content.push_str(&format!("// filler line {j} for {path}\n")); + } + + dir.create(&path, &content); + specs.push(FileSpec { + path, + unique_needle: unique, + unique_line: 1, + has_common, + }); + } + + specs + } +} + +pub struct FileSpec { + pub path: String, + pub unique_needle: String, + pub unique_line: u64, + pub has_common: bool, +} + +pub const SMALL_REPO: SyntheticRepo = SyntheticRepo { + file_count: 50, + unique_needle_prefix: "NEEDLE", + common_needle: "COMMON_MARKER_XYZ", + files_with_common: 30, +}; + +pub const MEDIUM_REPO: SyntheticRepo = SyntheticRepo { + file_count: 200, + unique_needle_prefix: "NEEDLE", + common_needle: "COMMON_MARKER_XYZ", + files_with_common: 120, +}; + +pub const LARGE_REPO: SyntheticRepo = SyntheticRepo { + file_count: 500, + unique_needle_prefix: "NEEDLE", + common_needle: "COMMON_MARKER_XYZ", + files_with_common: 300, +}; diff --git a/crates/cli/fff-rg/tests/rg_compat/util.rs b/crates/cli/fff-rg/tests/rg_compat/util.rs new file mode 100644 index 00000000..d23f44fd --- /dev/null +++ b/crates/cli/fff-rg/tests/rg_compat/util.rs @@ -0,0 +1,246 @@ +use std::fs; +use std::path::PathBuf; +use std::process::Command; +use std::sync::Once; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Duration; + +static NEXT_ID: AtomicUsize = AtomicUsize::new(0); +static DAEMON: Once = Once::new(); + +/// Max time to wait for the daemon socket to become connectable. +const DAEMON_STARTUP_TIMEOUT: Duration = Duration::from_secs(10); +/// How often to poll the socket during startup. +const DAEMON_POLL_INTERVAL: Duration = Duration::from_millis(100); + +pub fn find_binary(name: &str) -> PathBuf { + let mut bin = std::env::current_exe().unwrap(); + bin.pop(); + bin.pop(); + bin.push(name); + bin +} + +fn ensure_daemon() { + DAEMON.call_once(|| { + use std::os::unix::net::UnixStream; + + let socket = fff_ipc_domain::daemon_socket_path(); + if UnixStream::connect(&socket).is_ok() { + return; + } + + let bin = find_binary("fff-daemon"); + assert!( + bin.exists(), + "fff-daemon not found at {}. Run: cargo build -p fff-daemon", + bin.display() + ); + + Command::new(&bin) + .stdin(std::process::Stdio::null()) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn() + .unwrap_or_else(|e| panic!("failed to spawn fff-daemon: {e}")); + + let deadline = std::time::Instant::now() + DAEMON_STARTUP_TIMEOUT; + while std::time::Instant::now() < deadline { + std::thread::sleep(DAEMON_POLL_INTERVAL); + if UnixStream::connect(&socket).is_ok() { + return; + } + } + panic!("fff-daemon did not start within {DAEMON_STARTUP_TIMEOUT:?}"); + }); +} + +pub struct Dir { + pub(crate) dir: PathBuf, +} + +impl Dir { + pub fn new(name: &str) -> Self { + ensure_daemon(); + + let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); + let pid = std::process::id(); + let dir = std::env::temp_dir().join("fff-rg-tests").join(format!("{name}-{pid}-{id}")); + + if dir.exists() { + let _ = fs::remove_dir_all(&dir); + } + fs::create_dir_all(&dir).unwrap(); + + Command::new("git") + .args(["init", "-q"]) + .current_dir(&dir) + .output() + .expect("git init failed"); + + Self { dir } + } + + pub fn create(&self, name: &str, contents: &str) { + let path = self.dir.join(name); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).unwrap(); + } + fs::write(path, contents).unwrap(); + } + + pub fn command(&self) -> TestCommand { + let bin = find_binary("fff-rg"); + let mut cmd = Command::new(&bin); + cmd.current_dir(&self.dir); + TestCommand { cmd, dir: self.dir.clone() } + } + + pub fn rg(&self) -> TestCommand { + let mut cmd = Command::new("rg"); + cmd.current_dir(&self.dir); + TestCommand { cmd, dir: self.dir.clone() } + } + + pub fn with_project(&self, hay: &crate::hay::Hay) -> &Self { + self.create("src/main.rs", hay.rust_main); + self.create("src/lib.rs", hay.rust_lib); + self.create("tests/config_test.rs", hay.rust_test); + self.create("config.json", hay.json_config); + self.create("README.md", hay.unicode_readme); + self.create("src/indented.rs", hay.indented); + self.create("data/repeated.txt", hay.repeated); + self.create("data/no_newline.txt", hay.no_newline); + self.create("empty.txt", ""); + self + } +} + +impl Drop for Dir { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.dir); + } +} + +pub struct TestCommand { + cmd: Command, + dir: PathBuf, +} + +impl TestCommand { + pub fn arg(&mut self, arg: &str) -> &mut Self { + self.cmd.arg(arg); + self + } + + pub fn args(&mut self, args: &[&str]) -> &mut Self { + self.cmd.args(args); + self + } + + pub fn stdout(&mut self) -> String { + let output = self + .cmd + .output() + .unwrap_or_else(|e| panic!("failed to run fff-rg: {e}\ndir: {}", self.dir.display())); + String::from_utf8(output.stdout).unwrap() + } + + pub fn exit_code(&mut self) -> i32 { + let output = self.cmd.output().unwrap(); + output.status.code().unwrap_or(-1) + } + + pub fn full_output(&mut self) -> Output { + let o = self + .cmd + .output() + .unwrap_or_else(|e| panic!("failed to run command: {e}\ndir: {}", self.dir.display())); + Output { + stdout: String::from_utf8_lossy(&o.stdout).into_owned(), + stderr: String::from_utf8_lossy(&o.stderr).into_owned(), + code: o.status.code().unwrap_or(-1), + } + } +} + +pub struct Output { + pub stdout: String, + #[allow(dead_code)] + pub stderr: String, + pub code: i32, +} + +pub fn normalize_inline(raw: &str) -> String { + let trailing = raw.ends_with('\n'); + let mut lines: Vec<&str> = raw.lines().collect(); + lines.sort(); + let mut out = lines.join("\n"); + if trailing { + out.push('\n'); + } + out +} + +pub fn normalize_heading(raw: &str) -> String { + let trailing = raw.ends_with('\n'); + // Collapse runs of 2+ newlines to exactly \n\n (rg vs fff-rg differ in blank line counts) + let mut collapsed = String::with_capacity(raw.len()); + let mut newline_run = 0usize; + for ch in raw.chars() { + if ch == '\n' { + newline_run += 1; + } else { + if newline_run >= 2 { + collapsed.push_str("\n\n"); + } else { + for _ in 0..newline_run { + collapsed.push('\n'); + } + } + newline_run = 0; + collapsed.push(ch); + } + } + if newline_run >= 2 { + collapsed.push_str("\n\n"); + } else { + for _ in 0..newline_run { + collapsed.push('\n'); + } + } + + let mut blocks: Vec<&str> = collapsed + .split("\n\n") + .map(|b| b.trim_matches('\n')) + .filter(|b| !b.is_empty()) + .collect(); + blocks.sort(); + let mut out = blocks.join("\n\n"); + if trailing && !out.ends_with('\n') { + out.push('\n'); + } + out +} + +pub fn assert_rg_match(dir: &Dir, args: &[&str], heading: bool) { + let fff_out = dir.command().args(args).full_output(); + let rg_out = dir.rg().args(args).full_output(); + + assert_eq!( + fff_out.code, rg_out.code, + "exit code mismatch for args {args:?}\nfff-rg stdout:\n{}\nrg stdout:\n{}", + fff_out.stdout, rg_out.stdout, + ); + + let normalize: fn(&str) -> String = if heading { normalize_heading } else { normalize_inline }; + + let fff_normalized = normalize(&fff_out.stdout); + let rg_normalized = normalize(&rg_out.stdout); + + assert_eq!( + fff_normalized, rg_normalized, + "stdout mismatch for args {args:?}\nfff-rg raw:\n{}\nrg raw:\n{}", + fff_out.stdout, rg_out.stdout, + ); +} diff --git a/crates/cli/rustfmt.toml b/crates/cli/rustfmt.toml new file mode 100644 index 00000000..630bada8 --- /dev/null +++ b/crates/cli/rustfmt.toml @@ -0,0 +1,6 @@ +edition = "2024" +max_width = 100 +tab_spaces = 4 +hard_tabs = false +newline_style = "Unix" +use_small_heuristics = "Max" diff --git a/crates/fff-c/src/lib.rs b/crates/fff-c/src/lib.rs index 83f18a76..215b6563 100644 --- a/crates/fff-c/src/lib.rs +++ b/crates/fff-c/src/lib.rs @@ -620,7 +620,7 @@ pub unsafe extern "C" fn fff_live_grep( let options = fff::GrepSearchOptions { max_file_size: default_u64(max_file_size, 10 * 1024 * 1024), max_matches_per_file: max_matches_per_file as usize, - smart_case, + case_mode: if smart_case { fff::CaseMode::Smart } else { fff::CaseMode::Sensitive }, file_offset: file_offset as usize, page_limit: default_u32(page_limit, 50) as usize, mode: grep_mode_from_u8(mode), @@ -723,7 +723,7 @@ pub unsafe extern "C" fn fff_multi_grep( let options = fff::GrepSearchOptions { max_file_size: default_u64(max_file_size, 10 * 1024 * 1024), max_matches_per_file: max_matches_per_file as usize, - smart_case, + case_mode: if smart_case { fff::CaseMode::Smart } else { fff::CaseMode::Sensitive }, file_offset: file_offset as usize, page_limit: default_u32(page_limit, 50) as usize, mode: fff::GrepMode::PlainText, // ignored by multi_grep_search diff --git a/crates/fff-core/Cargo.toml b/crates/fff-core/Cargo.toml index 9e6bb0d7..e17dc684 100644 --- a/crates/fff-core/Cargo.toml +++ b/crates/fff-core/Cargo.toml @@ -42,7 +42,7 @@ tracing = { workspace = true } fff-query-parser = { workspace = true , version = "0.8.2" } blake3 = { workspace = true } dirs = { workspace = true } -libc = "0.2" +libc = { workspace = true } git2 = { workspace = true } glidesort = { workspace = true } globset = { workspace = true } diff --git a/crates/fff-core/src/grep.rs b/crates/fff-core/src/grep.rs index 44953bd4..fb8d4a33 100644 --- a/crates/fff-core/src/grep.rs +++ b/crates/fff-core/src/grep.rs @@ -234,6 +234,14 @@ fn replace_unescaped_newline_escapes(text: &str) -> String { String::from_utf8(result).unwrap_or_else(|_| text.to_string()) } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum CaseMode { + #[default] + Smart, + Sensitive, + Insensitive, +} + /// Controls how the grep pattern is interpreted. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum GrepMode { @@ -338,7 +346,7 @@ pub struct GrepResult<'a> { pub struct GrepSearchOptions { pub max_file_size: u64, pub max_matches_per_file: usize, - pub smart_case: bool, + pub case_mode: CaseMode, /// File-based pagination offset: index into the sorted/filtered file list /// to start searching from. Pass 0 for the first page, then use /// `GrepResult::next_file_offset` for subsequent pages. @@ -373,7 +381,7 @@ impl Default for GrepSearchOptions { Self { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: CaseMode::Smart, file_offset: 0, page_limit: 50, mode: GrepMode::default(), @@ -1053,11 +1061,10 @@ pub(crate) fn multi_grep_search<'a>( }; } - // Smart case: case-insensitive when all patterns are lowercase - let case_insensitive = if options.smart_case { - !patterns.iter().any(|p| p.chars().any(|c| c.is_uppercase())) - } else { - false + let case_insensitive = match options.case_mode { + CaseMode::Smart => !patterns.iter().any(|p| p.chars().any(|c| c.is_uppercase())), + CaseMode::Insensitive => true, + CaseMode::Sensitive => false, }; let ac = aho_corasick::AhoCorasickBuilder::new() @@ -1125,7 +1132,7 @@ const fn is_utf8_char_boundary(b: u8) -> bool { /// - The input is passed directly to the regex engine without escaping /// - Smart case still applies /// - Returns `None` for invalid regex patterns — the caller falls back to literal mode -fn build_regex(pattern: &str, smart_case: bool) -> Result { +fn build_regex(pattern: &str, case_mode: CaseMode) -> Result { if pattern.is_empty() { return Err("empty pattern".to_string()); } @@ -1136,10 +1143,10 @@ fn build_regex(pattern: &str, smart_case: bool) -> Result !pattern.chars().any(|c| c.is_uppercase()), + CaseMode::Insensitive => true, + CaseMode::Sensitive => false, }; regex::bytes::RegexBuilder::new(®ex_pattern) @@ -1912,10 +1919,10 @@ pub(crate) fn grep_search<'a>( }; } - let case_insensitive = if options.smart_case { - !grep_text.chars().any(|c| c.is_uppercase()) - } else { - false + let case_insensitive = match options.case_mode { + CaseMode::Smart => !grep_text.chars().any(|c| c.is_uppercase()), + CaseMode::Insensitive => true, + CaseMode::Sensitive => false, }; let mut regex_fallback_error: Option = None; @@ -2008,7 +2015,7 @@ pub(crate) fn grep_search<'a>( overflow_arena, ); } - GrepMode::Regex => build_regex(&grep_text, options.smart_case) + GrepMode::Regex => build_regex(&grep_text, options.case_mode) .inspect_err(|err| { tracing::warn!("Regex compilation failed for {}. Error {}", grep_text, err); @@ -2435,7 +2442,7 @@ mod tests { let options = super::GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 0, - smart_case: true, + case_mode: CaseMode::Smart, file_offset: 0, page_limit: 100, mode: super::GrepMode::PlainText, @@ -2619,7 +2626,7 @@ mod tests { let options = super::GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 0, - smart_case: true, + case_mode: CaseMode::Smart, file_offset: 0, page_limit: 100, mode: super::GrepMode::PlainText, diff --git a/crates/fff-core/tests/bigram_overlay_coherence_test.rs b/crates/fff-core/tests/bigram_overlay_coherence_test.rs index c87e2ef4..eca023cb 100644 --- a/crates/fff-core/tests/bigram_overlay_coherence_test.rs +++ b/crates/fff-core/tests/bigram_overlay_coherence_test.rs @@ -1307,7 +1307,7 @@ fn grep_opts() -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 500, mode: GrepMode::PlainText, diff --git a/crates/fff-core/tests/bigram_overlay_integration.rs b/crates/fff-core/tests/bigram_overlay_integration.rs index c663acf8..0f79250a 100644 --- a/crates/fff-core/tests/bigram_overlay_integration.rs +++ b/crates/fff-core/tests/bigram_overlay_integration.rs @@ -368,7 +368,7 @@ fn grep_opts() -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 200, mode: GrepMode::PlainText, diff --git a/crates/fff-core/tests/fuzz_file_operations.rs b/crates/fff-core/tests/fuzz_file_operations.rs index e39a6906..d7bc8062 100644 --- a/crates/fff-core/tests/fuzz_file_operations.rs +++ b/crates/fff-core/tests/fuzz_file_operations.rs @@ -626,7 +626,7 @@ fn grep_plain_opts() -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 500, mode: GrepMode::PlainText, diff --git a/crates/fff-core/tests/fuzz_git_watcher_stress.rs b/crates/fff-core/tests/fuzz_git_watcher_stress.rs index f7ca1e80..23b8b1f6 100644 --- a/crates/fff-core/tests/fuzz_git_watcher_stress.rs +++ b/crates/fff-core/tests/fuzz_git_watcher_stress.rs @@ -891,7 +891,7 @@ fn grep_plain_matches(shared: &SharedFilePicker, query: &str) -> Vec { let opts = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 500, mode: GrepMode::PlainText, @@ -928,7 +928,7 @@ fn grep_fuzzy_matches(shared: &SharedFilePicker, query: &str) -> Vec { let opts = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 500, mode: GrepMode::Fuzzy, @@ -960,7 +960,7 @@ fn grep_regex_matches(shared: &SharedFilePicker, query: &str) -> Vec { let opts = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 500, mode: GrepMode::Regex, diff --git a/crates/fff-core/tests/fuzz_real_repos.rs b/crates/fff-core/tests/fuzz_real_repos.rs index f83a0f52..8eae8146 100644 --- a/crates/fff-core/tests/fuzz_real_repos.rs +++ b/crates/fff-core/tests/fuzz_real_repos.rs @@ -226,7 +226,7 @@ fn grep_opts(mode: GrepMode) -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 500, mode, diff --git a/crates/fff-core/tests/grep_integration.rs b/crates/fff-core/tests/grep_integration.rs index 4efffca5..f0d5259f 100644 --- a/crates/fff-core/tests/grep_integration.rs +++ b/crates/fff-core/tests/grep_integration.rs @@ -31,7 +31,7 @@ fn plain_opts() -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 200, mode: GrepMode::PlainText, @@ -49,7 +49,7 @@ fn regex_opts() -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 200, mode: GrepMode::Regex, @@ -67,7 +67,7 @@ fn fuzzy_opts() -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 200, mode: GrepMode::Fuzzy, diff --git a/crates/fff-core/tests/new_directory_watcher_test.rs b/crates/fff-core/tests/new_directory_watcher_test.rs index 9cb9fc11..a7a3d518 100644 --- a/crates/fff-core/tests/new_directory_watcher_test.rs +++ b/crates/fff-core/tests/new_directory_watcher_test.rs @@ -132,7 +132,7 @@ fn grep_plain_count(picker: &FilePicker, query: &str) -> usize { let opts = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 500, mode: GrepMode::PlainText, diff --git a/crates/fff-core/tests/path_separator_constraint_test.rs b/crates/fff-core/tests/path_separator_constraint_test.rs index 9519ef83..ff3b3fdd 100644 --- a/crates/fff-core/tests/path_separator_constraint_test.rs +++ b/crates/fff-core/tests/path_separator_constraint_test.rs @@ -36,7 +36,7 @@ fn plain_opts() -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 200, mode: GrepMode::PlainText, diff --git a/crates/fff-nvim/benches/fuzzy_search_bench.rs b/crates/fff-nvim/benches/fuzzy_search_bench.rs index 8a1fcd57..93bd3a32 100644 --- a/crates/fff-nvim/benches/fuzzy_search_bench.rs +++ b/crates/fff-nvim/benches/fuzzy_search_bench.rs @@ -627,7 +627,7 @@ fn bench_grep_search(c: &mut Criterion) { let options = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 0, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 100, mode: GrepMode::PlainText, diff --git a/crates/fff-nvim/benches/grep_bench.rs b/crates/fff-nvim/benches/grep_bench.rs index 7e5ebbd6..0103e989 100644 --- a/crates/fff-nvim/benches/grep_bench.rs +++ b/crates/fff-nvim/benches/grep_bench.rs @@ -98,7 +98,7 @@ fn plain_options() -> GrepSearchOptions { GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 50, mode: GrepMode::PlainText, diff --git a/crates/fff-nvim/src/bin/bench_grep_query.rs b/crates/fff-nvim/src/bin/bench_grep_query.rs index 063d0816..31b02459 100644 --- a/crates/fff-nvim/src/bin/bench_grep_query.rs +++ b/crates/fff-nvim/src/bin/bench_grep_query.rs @@ -22,7 +22,7 @@ fn run_grep(picker: &FilePicker, query: &str, iters: usize) { let options = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: usize::MAX, mode: GrepMode::PlainText, diff --git a/crates/fff-nvim/src/bin/fuzzy_grep_test.rs b/crates/fff-nvim/src/bin/fuzzy_grep_test.rs index e3bceac8..163afada 100644 --- a/crates/fff-nvim/src/bin/fuzzy_grep_test.rs +++ b/crates/fff-nvim/src/bin/fuzzy_grep_test.rs @@ -27,7 +27,7 @@ fn run_fuzzy_query(picker: &FilePicker, query: &str, label: &str) { let options = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 100, mode: GrepMode::Fuzzy, diff --git a/crates/fff-nvim/src/bin/grep_profiler.rs b/crates/fff-nvim/src/bin/grep_profiler.rs index 4c0386b2..05a81477 100644 --- a/crates/fff-nvim/src/bin/grep_profiler.rs +++ b/crates/fff-nvim/src/bin/grep_profiler.rs @@ -88,7 +88,7 @@ impl<'a> GrepBench<'a> { options: GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 50, mode, @@ -409,7 +409,7 @@ fn main() { let opts = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset, page_limit: 50, mode: Default::default(), diff --git a/crates/fff-nvim/src/bin/grep_vs_rg.rs b/crates/fff-nvim/src/bin/grep_vs_rg.rs index f202fc1f..46de977c 100644 --- a/crates/fff-nvim/src/bin/grep_vs_rg.rs +++ b/crates/fff-nvim/src/bin/grep_vs_rg.rs @@ -162,7 +162,7 @@ fn run_fff_full(picker: &FilePicker, query: &str) -> (usize, Duration) { let options = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: usize::MAX, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: usize::MAX, mode: Default::default(), @@ -185,7 +185,7 @@ fn run_fff_page(picker: &FilePicker, query: &str) -> (usize, Duration) { let options = GrepSearchOptions { max_file_size: 10 * 1024 * 1024, max_matches_per_file: 200, - smart_case: true, + case_mode: fff::CaseMode::Smart, file_offset: 0, page_limit: 50, mode: Default::default(), diff --git a/crates/fff-nvim/src/lib.rs b/crates/fff-nvim/src/lib.rs index 46f47691..455bbefb 100644 --- a/crates/fff-nvim/src/lib.rs +++ b/crates/fff-nvim/src/lib.rs @@ -386,7 +386,11 @@ pub fn live_grep( let options = fff::GrepSearchOptions { max_file_size: max_file_size.unwrap_or(10 * 1024 * 1024), max_matches_per_file: max_matches_per_file.unwrap_or(200), - smart_case: smart_case.unwrap_or(true), + case_mode: if smart_case.unwrap_or(true) { + fff::CaseMode::Smart + } else { + fff::CaseMode::Sensitive + }, file_offset: file_offset.unwrap_or(0), page_limit: page_size.unwrap_or(50), mode,