|
| 1 | +//! Benchmarks for path resolver implementations. |
| 2 | +//! |
| 3 | +//! Tests performance on real filesystem paths using the current workspace. |
| 4 | +//! |
| 5 | +//! # Benchmark Groups |
| 6 | +//! |
| 7 | +//! - `resolvers`: Compares [`AllowedPathResolver`] and [`AllowedGlobResolver`] on the same paths |
| 8 | +//! - `multiple_bases`: Tests [`AllowedPathResolver`] with multiple base directories |
| 9 | +//! - `canonicalize`: Isolates `canonicalize` vs `soft_canonicalize` performance |
| 10 | +//! |
| 11 | +//! # Test Cases |
| 12 | +//! |
| 13 | +//! ```text |
| 14 | +//! | Case | Path | What it tests | |
| 15 | +//! |------------------------|----------------------------------------------------|------------------------------------------------| |
| 16 | +//! | existing_file | src/lib.rs | Fast path: file exists, canonicalize succeeds | |
| 17 | +//! | new_file_existing_dir | src/new_file_test.rs | Fast path: parent exists, canonicalize parent | |
| 18 | +//! | new_file_missing_dir | src/new_dir/nested/new_file_test.rs | Slow path: soft-canonicalize for non-existent | |
| 19 | +//! | policy_reject | benchmarks/new_file_test.rs | Early rejection via fast policy check | |
| 20 | +//! | deep_nested | src/llm-coding-tools-core/src/path/.../policy.rs | Longer path, more components to process | |
| 21 | +//! | traversal_reject | ../../../outside.txt | Early rejection via lexical escape check | |
| 22 | +//! ``` |
| 23 | +//! |
| 24 | +//! # Reference Results (Linux, optimized build) |
| 25 | +//! |
| 26 | +//! ```text |
| 27 | +//! resolvers/AllowedPathResolver/existing_file ~1.7 µs |
| 28 | +//! resolvers/AllowedPathResolver/new_file_existing_dir ~3.5 µs (optimized: parent canonicalize) |
| 29 | +//! resolvers/AllowedPathResolver/new_file_missing_dir ~9.7 µs (fallback: soft_canonicalize) |
| 30 | +//! resolvers/AllowedPathResolver/policy_reject ~8.0 µs (no policy, resolves normally) |
| 31 | +//! resolvers/AllowedPathResolver/deep_nested ~10.9 µs |
| 32 | +//! resolvers/AllowedPathResolver/traversal_reject ~20 ns |
| 33 | +//! |
| 34 | +//! resolvers/AllowedGlobResolver_simple_policy/existing_file ~1.8 µs (overhead: ~100 ns) |
| 35 | +//! resolvers/AllowedGlobResolver_simple_policy/new_file_existing_dir ~3.5 µs (overhead: ~40 ns) |
| 36 | +//! resolvers/AllowedGlobResolver_simple_policy/new_file_missing_dir ~9.8 µs (overhead: ~90 ns) |
| 37 | +//! resolvers/AllowedGlobResolver_simple_policy/policy_reject ~54 ns (150x faster!) |
| 38 | +//! resolvers/AllowedGlobResolver_simple_policy/deep_nested ~10.8 µs |
| 39 | +//! resolvers/AllowedGlobResolver_simple_policy/traversal_reject ~28 ns |
| 40 | +//! |
| 41 | +//! canonicalize/existing_file_canonicalize ~1.6 µs |
| 42 | +//! canonicalize/existing_file_soft_canonicalize ~4.4 µs (2.7x slower than canonicalize) |
| 43 | +//! canonicalize/new_file_shallow_soft_canonicalize ~5.9 µs |
| 44 | +//! canonicalize/new_file_deep_soft_canonicalize ~6.9 µs |
| 45 | +//! ``` |
| 46 | +//! |
| 47 | +//! # Platform Differences |
| 48 | +//! |
| 49 | +//! On Unix, new files in existing directories use the fast path (canonicalize parent + join filename). |
| 50 | +//! On Windows, the fast path uses `soft_canonicalize` due to complex path semantics. |
| 51 | +//! |
| 52 | +//! # Running Benchmarks |
| 53 | +//! |
| 54 | +//! Quick run (1s per benchmark): |
| 55 | +//! ```sh |
| 56 | +//! cargo bench -p llm-coding-tools-core --bench path_resolvers -- --sample-size 10 --measurement-time 1 --warm-up-time 1 |
| 57 | +//! ``` |
| 58 | +//! |
| 59 | +//! Full run with baseline comparison: |
| 60 | +//! ```sh |
| 61 | +//! cargo bench -p llm-coding-tools-core --bench path_resolvers -- --save-baseline main |
| 62 | +//! # make changes, then: |
| 63 | +//! cargo bench -p llm-coding-tools-core --bench path_resolvers -- --baseline main |
| 64 | +//! ``` |
| 65 | +
|
| 66 | +use core::hint::black_box; |
| 67 | +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; |
| 68 | +use llm_coding_tools_core::path::{ |
| 69 | + AllowedGlobResolver, AllowedPathResolver, GlobPolicy, GlobPolicyBuilder, PathResolver, |
| 70 | +}; |
| 71 | +use soft_canonicalize::soft_canonicalize; |
| 72 | +use std::fs; |
| 73 | +use tempfile::TempDir; |
| 74 | + |
| 75 | +const EXISTING_FILE: &str = "src/lib.rs"; |
| 76 | +const NEW_FILE_EXISTING_DIR: &str = "src/new_file_test.rs"; |
| 77 | +// Path that matches simple policy (src/**/*.rs) but has missing directories |
| 78 | +const NEW_FILE_MISSING_DIR: &str = "src/new_dir/nested/new_file_test.rs"; |
| 79 | +// Path that does NOT match simple policy - tests early rejection |
| 80 | +const POLICY_REJECT: &str = "benchmarks/new_file_test.rs"; |
| 81 | +const DEEP_NESTED: &str = "src/llm-coding-tools-core/src/path/allowed_glob/policy.rs"; |
| 82 | +const TRAVERSAL: &str = "../../../outside.txt"; |
| 83 | + |
| 84 | +fn build_policy<F>(f: F) -> llm_coding_tools_core::error::ToolResult<GlobPolicy> |
| 85 | +where |
| 86 | + F: FnOnce(GlobPolicyBuilder) -> llm_coding_tools_core::error::ToolResult<GlobPolicyBuilder>, |
| 87 | +{ |
| 88 | + f(GlobPolicy::builder()).and_then(|b| b.build()) |
| 89 | +} |
| 90 | + |
| 91 | +/// Benchmarks [`AllowedPathResolver`] and [`AllowedGlobResolver`] on the same paths. |
| 92 | +/// |
| 93 | +/// This group measures the core resolve operation under different conditions. |
| 94 | +/// |
| 95 | +/// # Resolvers Compared |
| 96 | +/// |
| 97 | +/// ```text |
| 98 | +/// | Resolver | Description | |
| 99 | +/// |---------------------------------|------------------------------------------| |
| 100 | +/// | AllowedPathResolver | Baseline: no glob policy | |
| 101 | +/// | AllowedGlobResolver_simple | Single rule: src/**/*.rs | |
| 102 | +/// | AllowedGlobResolver_complex | 10 rules: realistic project config | |
| 103 | +/// ``` |
| 104 | +/// |
| 105 | +/// # Expected Performance (Unix) |
| 106 | +/// |
| 107 | +/// ```text |
| 108 | +/// | Case | Expected Time | Why | |
| 109 | +/// |------------------------|---------------|----------------------------------------| |
| 110 | +/// | existing_file | 1-2 µs | canonicalize is fast for existing | |
| 111 | +/// | new_file_existing_dir | 3-4 µs | canonicalize parent, join filename | |
| 112 | +/// | new_file_missing_dir | 7-8 µs | soft-canonicalize walks filesystem | |
| 113 | +/// | deep_nested | 10-11 µs | more path components to process | |
| 114 | +/// | traversal_reject | ~20 ns | lexical check only, no filesystem I/O | |
| 115 | +/// ``` |
| 116 | +fn bench_resolvers_same_paths(c: &mut Criterion) { |
| 117 | + let mut group = c.benchmark_group("resolvers"); |
| 118 | + |
| 119 | + let current_dir = std::env::current_dir().unwrap(); |
| 120 | + |
| 121 | + // Baseline: AllowedPathResolver (no glob policy) |
| 122 | + let allowed = AllowedPathResolver::new(vec![current_dir.clone()]).unwrap(); |
| 123 | + |
| 124 | + // Simple policy: single glob pattern (src/**/*.rs) |
| 125 | + // This tests minimal glob matching overhead. |
| 126 | + let simple_policy = build_policy(|b| b.allow("src/**/*.rs")).unwrap(); |
| 127 | + |
| 128 | + // Complex policy: 10 rules simulating a realistic project configuration. |
| 129 | + // Tests last-match-wins semantics and rule iteration overhead. |
| 130 | + let complex_policy = build_policy(|b| { |
| 131 | + b.allow("src/**/*.rs")? |
| 132 | + .deny("target/**")? |
| 133 | + .allow("*.toml")? |
| 134 | + .deny("*.log")? |
| 135 | + .allow("benches/**")? |
| 136 | + .deny("**/test_data/**")? |
| 137 | + .allow("tests/**/*.rs")? |
| 138 | + .deny("node_modules/**")? |
| 139 | + .allow("examples/**") |
| 140 | + }) |
| 141 | + .unwrap(); |
| 142 | + |
| 143 | + let glob_simple = AllowedGlobResolver::new(vec![current_dir.clone()]) |
| 144 | + .unwrap() |
| 145 | + .with_policy(simple_policy); |
| 146 | + let glob_complex = AllowedGlobResolver::new(vec![current_dir.clone()]) |
| 147 | + .unwrap() |
| 148 | + .with_policy(complex_policy); |
| 149 | + |
| 150 | + group.throughput(Throughput::Elements(1)); |
| 151 | + |
| 152 | + for (case_name, path_input) in [ |
| 153 | + ("existing_file", EXISTING_FILE), |
| 154 | + ("new_file_existing_dir", NEW_FILE_EXISTING_DIR), |
| 155 | + ("new_file_missing_dir", NEW_FILE_MISSING_DIR), |
| 156 | + ("policy_reject", POLICY_REJECT), |
| 157 | + ("deep_nested", DEEP_NESTED), |
| 158 | + ("traversal_reject", TRAVERSAL), |
| 159 | + ] { |
| 160 | + group.bench_with_input( |
| 161 | + BenchmarkId::new("AllowedPathResolver", case_name), |
| 162 | + &allowed, |
| 163 | + |b, resolver| b.iter(|| resolver.resolve(black_box(path_input))), |
| 164 | + ); |
| 165 | + |
| 166 | + group.bench_with_input( |
| 167 | + BenchmarkId::new("AllowedGlobResolver_simple_policy", case_name), |
| 168 | + &glob_simple, |
| 169 | + |b, resolver| b.iter(|| resolver.resolve(black_box(path_input))), |
| 170 | + ); |
| 171 | + |
| 172 | + group.bench_with_input( |
| 173 | + BenchmarkId::new("AllowedGlobResolver_complex_policy", case_name), |
| 174 | + &glob_complex, |
| 175 | + |b, resolver| b.iter(|| resolver.resolve(black_box(path_input))), |
| 176 | + ); |
| 177 | + } |
| 178 | + |
| 179 | + group.finish(); |
| 180 | +} |
| 181 | + |
| 182 | +/// Benchmarks [`AllowedPathResolver`] with multiple base directories. |
| 183 | +/// |
| 184 | +/// Tests how the resolver performs when it must search through multiple |
| 185 | +/// allowed directories to find a match. |
| 186 | +/// |
| 187 | +/// # Setup |
| 188 | +/// |
| 189 | +/// ```text |
| 190 | +/// | Base | Directory | Contains | |
| 191 | +/// |---------|-------------------|--------------| |
| 192 | +/// | Base 1 | Current workspace | src/lib.rs | |
| 193 | +/// | Base 2 | Temp directory 1 | file1.txt | |
| 194 | +/// | Base 3 | Temp directory 2 | file2.txt | |
| 195 | +/// ``` |
| 196 | +/// |
| 197 | +/// # Test Cases |
| 198 | +/// |
| 199 | +/// ```text |
| 200 | +/// | Case | What it tests | |
| 201 | +/// |-------------|--------------------------------------------| |
| 202 | +/// | first_base | Path found in first base (fastest) | |
| 203 | +/// | second_base | Path found in second base (one miss, hit) | |
| 204 | +/// | third_base | Path found in third base (two misses, hit) | |
| 205 | +/// | not_found | Path not in any base (all bases tried) | |
| 206 | +/// ``` |
| 207 | +fn bench_multiple_bases(c: &mut Criterion) { |
| 208 | + let mut group = c.benchmark_group("multiple_bases"); |
| 209 | + |
| 210 | + let current_dir = std::env::current_dir().unwrap(); |
| 211 | + let temp1 = TempDir::new().unwrap(); |
| 212 | + let temp2 = TempDir::new().unwrap(); |
| 213 | + |
| 214 | + fs::write(temp1.path().join("file1.txt"), "content").unwrap(); |
| 215 | + fs::write(temp2.path().join("file2.txt"), "content").unwrap(); |
| 216 | + |
| 217 | + let resolver = AllowedPathResolver::new(vec![ |
| 218 | + current_dir.clone(), |
| 219 | + temp1.path().to_path_buf(), |
| 220 | + temp2.path().to_path_buf(), |
| 221 | + ]) |
| 222 | + .unwrap(); |
| 223 | + |
| 224 | + group.throughput(Throughput::Elements(1)); |
| 225 | + |
| 226 | + for (case_name, path_input) in [ |
| 227 | + ("first_base", "src/lib.rs"), |
| 228 | + ("second_base", "file1.txt"), |
| 229 | + ("third_base", "file2.txt"), |
| 230 | + ("not_found", "nonexistent.xyz"), |
| 231 | + ] { |
| 232 | + group.bench_function(case_name, |b| { |
| 233 | + b.iter(|| resolver.resolve(black_box(path_input))) |
| 234 | + }); |
| 235 | + } |
| 236 | + |
| 237 | + group.finish(); |
| 238 | +} |
| 239 | + |
| 240 | +/// Benchmarks `std::fs::canonicalize` vs `soft_canonicalize` directly. |
| 241 | +/// |
| 242 | +/// Isolates the core filesystem operation to understand where time is spent. |
| 243 | +/// |
| 244 | +/// # Test Cases |
| 245 | +/// |
| 246 | +/// ```text |
| 247 | +/// | Case | Path | canonicalize | soft_canonicalize | |
| 248 | +/// |-------------------|-------------------------------|--------------|-------------------| |
| 249 | +/// | existing_file | src/lib.rs (exists) | O(1) FS call | O(1) FS call | |
| 250 | +/// | new_file_shallow | new_file.rs (in root) | N/A | O(1) FS call | |
| 251 | +/// | new_file_deep | a/b/c/new_file.rs (3 levels) | N/A | O(4) FS calls | |
| 252 | +/// ``` |
| 253 | +fn bench_canonicalize_vs_soft(c: &mut Criterion) { |
| 254 | + let mut group = c.benchmark_group("canonicalize"); |
| 255 | + |
| 256 | + let current_dir = std::env::current_dir().unwrap(); |
| 257 | + let existing = current_dir.join("src/lib.rs"); |
| 258 | + let new_shallow = current_dir.join("new_file.rs"); |
| 259 | + let new_deep = current_dir.join("a/b/c/new_file.rs"); |
| 260 | + |
| 261 | + group.throughput(Throughput::Elements(1)); |
| 262 | + |
| 263 | + group.bench_function("existing_file_canonicalize", |b| { |
| 264 | + b.iter(|| existing.canonicalize().unwrap()) |
| 265 | + }); |
| 266 | + |
| 267 | + group.bench_function("existing_file_soft_canonicalize", |b| { |
| 268 | + b.iter(|| soft_canonicalize(&existing).unwrap()) |
| 269 | + }); |
| 270 | + |
| 271 | + group.bench_function("new_file_shallow_soft_canonicalize", |b| { |
| 272 | + b.iter(|| soft_canonicalize(&new_shallow).unwrap()) |
| 273 | + }); |
| 274 | + |
| 275 | + group.bench_function("new_file_deep_soft_canonicalize", |b| { |
| 276 | + b.iter(|| soft_canonicalize(&new_deep).unwrap()) |
| 277 | + }); |
| 278 | + |
| 279 | + group.finish(); |
| 280 | +} |
| 281 | + |
| 282 | +criterion_group!( |
| 283 | + benches, |
| 284 | + bench_resolvers_same_paths, |
| 285 | + bench_multiple_bases, |
| 286 | + bench_canonicalize_vs_soft |
| 287 | +); |
| 288 | + |
| 289 | +criterion_main!(benches); |
0 commit comments