Skip to content

Commit ffce67d

Browse files
authored
Merge pull request #90 from Sewer56/optimize-resolvers
Optimize path resolver performance with early rejection and fast path
2 parents 01ffc7b + 6bb4750 commit ffce67d

6 files changed

Lines changed: 537 additions & 25 deletions

File tree

src/llm-coding-tools-core/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,7 @@ rstest = "0.26"
120120
[[bench]]
121121
name = "model_catalog_builder"
122122
harness = false
123+
124+
[[bench]]
125+
name = "path_resolvers"
126+
harness = false
Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
//! Benchmarks for path resolver implementations.
2+
//!
3+
//! Tests performance on real filesystem paths using the current workspace.
4+
//!
5+
//! # Benchmark Groups
6+
//!
7+
//! - `resolvers`: Compares [`AllowedPathResolver`] and [`AllowedGlobResolver`] on the same paths
8+
//! - `multiple_bases`: Tests [`AllowedPathResolver`] with multiple base directories
9+
//! - `canonicalize`: Isolates `canonicalize` vs `soft_canonicalize` performance
10+
//!
11+
//! # Test Cases
12+
//!
13+
//! ```text
14+
//! | Case | Path | What it tests |
15+
//! |------------------------|----------------------------------------------------|------------------------------------------------|
16+
//! | existing_file | src/lib.rs | Fast path: file exists, canonicalize succeeds |
17+
//! | new_file_existing_dir | src/new_file_test.rs | Fast path: parent exists, canonicalize parent |
18+
//! | new_file_missing_dir | src/new_dir/nested/new_file_test.rs | Slow path: soft-canonicalize for non-existent |
19+
//! | policy_reject | benchmarks/new_file_test.rs | Early rejection via fast policy check |
20+
//! | deep_nested | src/llm-coding-tools-core/src/path/.../policy.rs | Longer path, more components to process |
21+
//! | traversal_reject | ../../../outside.txt | Early rejection via lexical escape check |
22+
//! ```
23+
//!
24+
//! # Reference Results (Linux, optimized build)
25+
//!
26+
//! ```text
27+
//! resolvers/AllowedPathResolver/existing_file ~1.7 µs
28+
//! resolvers/AllowedPathResolver/new_file_existing_dir ~3.5 µs (optimized: parent canonicalize)
29+
//! resolvers/AllowedPathResolver/new_file_missing_dir ~9.7 µs (fallback: soft_canonicalize)
30+
//! resolvers/AllowedPathResolver/policy_reject ~8.0 µs (no policy, resolves normally)
31+
//! resolvers/AllowedPathResolver/deep_nested ~10.9 µs
32+
//! resolvers/AllowedPathResolver/traversal_reject ~20 ns
33+
//!
34+
//! resolvers/AllowedGlobResolver_simple_policy/existing_file ~1.8 µs (overhead: ~100 ns)
35+
//! resolvers/AllowedGlobResolver_simple_policy/new_file_existing_dir ~3.5 µs (overhead: ~40 ns)
36+
//! resolvers/AllowedGlobResolver_simple_policy/new_file_missing_dir ~9.8 µs (overhead: ~90 ns)
37+
//! resolvers/AllowedGlobResolver_simple_policy/policy_reject ~54 ns (150x faster!)
38+
//! resolvers/AllowedGlobResolver_simple_policy/deep_nested ~10.8 µs
39+
//! resolvers/AllowedGlobResolver_simple_policy/traversal_reject ~28 ns
40+
//!
41+
//! canonicalize/existing_file_canonicalize ~1.6 µs
42+
//! canonicalize/existing_file_soft_canonicalize ~4.4 µs (2.7x slower than canonicalize)
43+
//! canonicalize/new_file_shallow_soft_canonicalize ~5.9 µs
44+
//! canonicalize/new_file_deep_soft_canonicalize ~6.9 µs
45+
//! ```
46+
//!
47+
//! # Platform Differences
48+
//!
49+
//! On Unix, new files in existing directories use the fast path (canonicalize parent + join filename).
50+
//! On Windows, the fast path uses `soft_canonicalize` due to complex path semantics.
51+
//!
52+
//! # Running Benchmarks
53+
//!
54+
//! Quick run (1s per benchmark):
55+
//! ```sh
56+
//! cargo bench -p llm-coding-tools-core --bench path_resolvers -- --sample-size 10 --measurement-time 1 --warm-up-time 1
57+
//! ```
58+
//!
59+
//! Full run with baseline comparison:
60+
//! ```sh
61+
//! cargo bench -p llm-coding-tools-core --bench path_resolvers -- --save-baseline main
62+
//! # make changes, then:
63+
//! cargo bench -p llm-coding-tools-core --bench path_resolvers -- --baseline main
64+
//! ```
65+
66+
use core::hint::black_box;
67+
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
68+
use llm_coding_tools_core::path::{
69+
AllowedGlobResolver, AllowedPathResolver, GlobPolicy, GlobPolicyBuilder, PathResolver,
70+
};
71+
use soft_canonicalize::soft_canonicalize;
72+
use std::fs;
73+
use tempfile::TempDir;
74+
75+
const EXISTING_FILE: &str = "src/lib.rs";
76+
const NEW_FILE_EXISTING_DIR: &str = "src/new_file_test.rs";
77+
// Path that matches simple policy (src/**/*.rs) but has missing directories
78+
const NEW_FILE_MISSING_DIR: &str = "src/new_dir/nested/new_file_test.rs";
79+
// Path that does NOT match simple policy - tests early rejection
80+
const POLICY_REJECT: &str = "benchmarks/new_file_test.rs";
81+
const DEEP_NESTED: &str = "src/llm-coding-tools-core/src/path/allowed_glob/policy.rs";
82+
const TRAVERSAL: &str = "../../../outside.txt";
83+
84+
fn build_policy<F>(f: F) -> llm_coding_tools_core::error::ToolResult<GlobPolicy>
85+
where
86+
F: FnOnce(GlobPolicyBuilder) -> llm_coding_tools_core::error::ToolResult<GlobPolicyBuilder>,
87+
{
88+
f(GlobPolicy::builder()).and_then(|b| b.build())
89+
}
90+
91+
/// Benchmarks [`AllowedPathResolver`] and [`AllowedGlobResolver`] on the same paths.
92+
///
93+
/// This group measures the core resolve operation under different conditions.
94+
///
95+
/// # Resolvers Compared
96+
///
97+
/// ```text
98+
/// | Resolver | Description |
99+
/// |---------------------------------|------------------------------------------|
100+
/// | AllowedPathResolver | Baseline: no glob policy |
101+
/// | AllowedGlobResolver_simple | Single rule: src/**/*.rs |
102+
/// | AllowedGlobResolver_complex | 10 rules: realistic project config |
103+
/// ```
104+
///
105+
/// # Expected Performance (Unix)
106+
///
107+
/// ```text
108+
/// | Case | Expected Time | Why |
109+
/// |------------------------|---------------|----------------------------------------|
110+
/// | existing_file | 1-2 µs | canonicalize is fast for existing |
111+
/// | new_file_existing_dir | 3-4 µs | canonicalize parent, join filename |
112+
/// | new_file_missing_dir | 7-8 µs | soft-canonicalize walks filesystem |
113+
/// | deep_nested | 10-11 µs | more path components to process |
114+
/// | traversal_reject | ~20 ns | lexical check only, no filesystem I/O |
115+
/// ```
116+
fn bench_resolvers_same_paths(c: &mut Criterion) {
117+
let mut group = c.benchmark_group("resolvers");
118+
119+
let current_dir = std::env::current_dir().unwrap();
120+
121+
// Baseline: AllowedPathResolver (no glob policy)
122+
let allowed = AllowedPathResolver::new(vec![current_dir.clone()]).unwrap();
123+
124+
// Simple policy: single glob pattern (src/**/*.rs)
125+
// This tests minimal glob matching overhead.
126+
let simple_policy = build_policy(|b| b.allow("src/**/*.rs")).unwrap();
127+
128+
// Complex policy: 10 rules simulating a realistic project configuration.
129+
// Tests last-match-wins semantics and rule iteration overhead.
130+
let complex_policy = build_policy(|b| {
131+
b.allow("src/**/*.rs")?
132+
.deny("target/**")?
133+
.allow("*.toml")?
134+
.deny("*.log")?
135+
.allow("benches/**")?
136+
.deny("**/test_data/**")?
137+
.allow("tests/**/*.rs")?
138+
.deny("node_modules/**")?
139+
.allow("examples/**")
140+
})
141+
.unwrap();
142+
143+
let glob_simple = AllowedGlobResolver::new(vec![current_dir.clone()])
144+
.unwrap()
145+
.with_policy(simple_policy);
146+
let glob_complex = AllowedGlobResolver::new(vec![current_dir.clone()])
147+
.unwrap()
148+
.with_policy(complex_policy);
149+
150+
group.throughput(Throughput::Elements(1));
151+
152+
for (case_name, path_input) in [
153+
("existing_file", EXISTING_FILE),
154+
("new_file_existing_dir", NEW_FILE_EXISTING_DIR),
155+
("new_file_missing_dir", NEW_FILE_MISSING_DIR),
156+
("policy_reject", POLICY_REJECT),
157+
("deep_nested", DEEP_NESTED),
158+
("traversal_reject", TRAVERSAL),
159+
] {
160+
group.bench_with_input(
161+
BenchmarkId::new("AllowedPathResolver", case_name),
162+
&allowed,
163+
|b, resolver| b.iter(|| resolver.resolve(black_box(path_input))),
164+
);
165+
166+
group.bench_with_input(
167+
BenchmarkId::new("AllowedGlobResolver_simple_policy", case_name),
168+
&glob_simple,
169+
|b, resolver| b.iter(|| resolver.resolve(black_box(path_input))),
170+
);
171+
172+
group.bench_with_input(
173+
BenchmarkId::new("AllowedGlobResolver_complex_policy", case_name),
174+
&glob_complex,
175+
|b, resolver| b.iter(|| resolver.resolve(black_box(path_input))),
176+
);
177+
}
178+
179+
group.finish();
180+
}
181+
182+
/// Benchmarks [`AllowedPathResolver`] with multiple base directories.
183+
///
184+
/// Tests how the resolver performs when it must search through multiple
185+
/// allowed directories to find a match.
186+
///
187+
/// # Setup
188+
///
189+
/// ```text
190+
/// | Base | Directory | Contains |
191+
/// |---------|-------------------|--------------|
192+
/// | Base 1 | Current workspace | src/lib.rs |
193+
/// | Base 2 | Temp directory 1 | file1.txt |
194+
/// | Base 3 | Temp directory 2 | file2.txt |
195+
/// ```
196+
///
197+
/// # Test Cases
198+
///
199+
/// ```text
200+
/// | Case | What it tests |
201+
/// |-------------|--------------------------------------------|
202+
/// | first_base | Path found in first base (fastest) |
203+
/// | second_base | Path found in second base (one miss, hit) |
204+
/// | third_base | Path found in third base (two misses, hit) |
205+
/// | not_found | Path not in any base (all bases tried) |
206+
/// ```
207+
fn bench_multiple_bases(c: &mut Criterion) {
208+
let mut group = c.benchmark_group("multiple_bases");
209+
210+
let current_dir = std::env::current_dir().unwrap();
211+
let temp1 = TempDir::new().unwrap();
212+
let temp2 = TempDir::new().unwrap();
213+
214+
fs::write(temp1.path().join("file1.txt"), "content").unwrap();
215+
fs::write(temp2.path().join("file2.txt"), "content").unwrap();
216+
217+
let resolver = AllowedPathResolver::new(vec![
218+
current_dir.clone(),
219+
temp1.path().to_path_buf(),
220+
temp2.path().to_path_buf(),
221+
])
222+
.unwrap();
223+
224+
group.throughput(Throughput::Elements(1));
225+
226+
for (case_name, path_input) in [
227+
("first_base", "src/lib.rs"),
228+
("second_base", "file1.txt"),
229+
("third_base", "file2.txt"),
230+
("not_found", "nonexistent.xyz"),
231+
] {
232+
group.bench_function(case_name, |b| {
233+
b.iter(|| resolver.resolve(black_box(path_input)))
234+
});
235+
}
236+
237+
group.finish();
238+
}
239+
240+
/// Benchmarks `std::fs::canonicalize` vs `soft_canonicalize` directly.
241+
///
242+
/// Isolates the core filesystem operation to understand where time is spent.
243+
///
244+
/// # Test Cases
245+
///
246+
/// ```text
247+
/// | Case | Path | canonicalize | soft_canonicalize |
248+
/// |-------------------|-------------------------------|--------------|-------------------|
249+
/// | existing_file | src/lib.rs (exists) | O(1) FS call | O(1) FS call |
250+
/// | new_file_shallow | new_file.rs (in root) | N/A | O(1) FS call |
251+
/// | new_file_deep | a/b/c/new_file.rs (3 levels) | N/A | O(4) FS calls |
252+
/// ```
253+
fn bench_canonicalize_vs_soft(c: &mut Criterion) {
254+
let mut group = c.benchmark_group("canonicalize");
255+
256+
let current_dir = std::env::current_dir().unwrap();
257+
let existing = current_dir.join("src/lib.rs");
258+
let new_shallow = current_dir.join("new_file.rs");
259+
let new_deep = current_dir.join("a/b/c/new_file.rs");
260+
261+
group.throughput(Throughput::Elements(1));
262+
263+
group.bench_function("existing_file_canonicalize", |b| {
264+
b.iter(|| existing.canonicalize().unwrap())
265+
});
266+
267+
group.bench_function("existing_file_soft_canonicalize", |b| {
268+
b.iter(|| soft_canonicalize(&existing).unwrap())
269+
});
270+
271+
group.bench_function("new_file_shallow_soft_canonicalize", |b| {
272+
b.iter(|| soft_canonicalize(&new_shallow).unwrap())
273+
});
274+
275+
group.bench_function("new_file_deep_soft_canonicalize", |b| {
276+
b.iter(|| soft_canonicalize(&new_deep).unwrap())
277+
});
278+
279+
group.finish();
280+
}
281+
282+
criterion_group!(
283+
benches,
284+
bench_resolvers_same_paths,
285+
bench_multiple_bases,
286+
bench_canonicalize_vs_soft
287+
);
288+
289+
criterion_main!(benches);

src/llm-coding-tools-core/src/path/allowed.rs

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Allowed directory path resolver implementation.
22
3-
use super::PathResolver;
3+
use super::{relative_path_escapes_base, resolve_new_file_fast, PathResolver};
44
use crate::context::PathMode;
55
use crate::error::{ToolError, ToolResult};
66
use soft_canonicalize::soft_canonicalize;
@@ -101,11 +101,18 @@ impl PathResolver for AllowedPathResolver {
101101
const PATH_MODE: PathMode = PathMode::Allowed;
102102

103103
fn resolve(&self, path: &str) -> ToolResult<PathBuf> {
104-
let input_path = PathBuf::from(path);
104+
let input_path = Path::new(path);
105+
106+
if relative_path_escapes_base(input_path) {
107+
return Err(ToolError::InvalidPath(format!(
108+
"path '{}' is not within allowed directories",
109+
path
110+
)));
111+
}
105112

106113
// Try each allowed base directory in order
107114
for base in self.allowed_paths.iter() {
108-
let candidate = base.join(&input_path);
115+
let candidate = base.join(input_path);
109116

110117
// Try to canonicalize for existing paths
111118
if let Ok(canonical) = candidate.canonicalize() {
@@ -117,8 +124,17 @@ impl PathResolver for AllowedPathResolver {
117124
continue;
118125
}
119126

120-
// Non-existent paths still need a resolved absolute target so we can
121-
// validate containment consistently across platforms.
127+
// Fast path for new files in existing directories.
128+
// Canonicalizes parent directory, then joins filename.
129+
// This avoids soft_canonicalize's expensive walk-up logic.
130+
if let Some(resolved) = resolve_new_file_fast(&candidate) {
131+
if resolved.starts_with(base) {
132+
return Ok(resolved);
133+
}
134+
continue;
135+
}
136+
137+
// Fallback for paths where parent doesn't exist.
122138
if let Ok(resolved) = soft_canonicalize(&candidate) {
123139
if resolved.starts_with(base) {
124140
return Ok(resolved);

0 commit comments

Comments
 (0)