From 63497792ea46e6867ea75bd3cd42dd9c7fb46689 Mon Sep 17 00:00:00 2001 From: Bart Waardenburg Date: Tue, 12 May 2026 18:27:24 +0200 Subject: [PATCH 1/2] refactor(core): drop redundant per-call FileId/path indices in detectors find_unused_exports, find_private_type_leaks, and find_duplicate_exports each rebuilt a FxHashMap (reachable_files set, module_by_id, module_idx_by_file_id, file_id_by_path) on every invocation. FileIds are assigned sequentially in discover/walk.rs:264, so graph.modules can be indexed directly via file_id.0 as usize, the same pattern find_unused_files already uses at unused_files.rs:73. Real-world wall-clock is in the noise across next.js (125k files), typescript (44k), vite (24k), vue-core (12k), and svelte (11k). Issue counts on next.js identical (24986). Shipped as the structural-cleanup branch of the issue's acceptance criterion. No behavior change. Fixes #333 --- crates/core/src/analyze/unused_exports.rs | 120 ++++++++-------------- 1 file changed, 41 insertions(+), 79 deletions(-) diff --git a/crates/core/src/analyze/unused_exports.rs b/crates/core/src/analyze/unused_exports.rs index 6cdfb7dd8..83053433f 100644 --- a/crates/core/src/analyze/unused_exports.rs +++ b/crates/core/src/analyze/unused_exports.rs @@ -243,15 +243,6 @@ pub fn find_unused_exports( None }; - // Precompute reachable FileIds so we can distinguish meaningful references - // (from reachable modules) from unreachable-to-unreachable references. - let reachable_files: FxHashSet = graph - .modules - .iter() - .filter(|m| m.is_reachable()) - .map(|m| m.file_id.0) - .collect(); - let module_results: Vec<(Vec, Vec, Vec)> = graph .modules .par_iter() @@ -311,10 +302,12 @@ pub fn find_unused_exports( let has_cross_file_ref = if module.is_reachable() { !export.references.is_empty() } else { - export - .references - .iter() - .any(|r| reachable_files.contains(&r.from_file.0)) + export.references.iter().any(|r| { + graph + .modules + .get(r.from_file.0 as usize) + .is_some_and(ModuleNode::is_reachable) + }) }; // Treat side-effect-registered exports (Lit @customElement, // customElements.define) as referenced even when no other file @@ -595,12 +588,6 @@ pub fn find_private_type_leaks( suppressions: &SuppressionContext<'_>, line_offsets_by_file: &LineOffsetsMap<'_>, ) -> Vec { - let module_by_id: FxHashMap = graph - .modules - .iter() - .map(|module| (module.file_id, module)) - .collect(); - let mut leaks = Vec::new(); for module_info in modules { if module_info.public_signature_type_references.is_empty() @@ -608,7 +595,7 @@ pub fn find_private_type_leaks( { continue; } - let Some(module) = module_by_id.get(&module_info.file_id) else { + let Some(module) = graph.modules.get(module_info.file_id.0 as usize) else { continue; }; if is_storybook_file(&module.path) || is_route_convention_file(&module.path, &config.root) { @@ -665,17 +652,17 @@ pub fn find_private_type_leaks( fn collect_dynamic_reexport_sources( resolved_modules: &[crate::resolve::ResolvedModule], graph: &ModuleGraph, - module_idx_by_file_id: &FxHashMap, re_export_sources: &mut FxHashMap>, ) { use crate::extract::ExportName; use fallow_types::extract::ImportedName; for resolved in resolved_modules { - let Some(&wrapper_idx) = module_idx_by_file_id.get(&resolved.file_id) else { + let wrapper_idx = resolved.file_id.0 as usize; + let Some(wrapper) = graph.modules.get(wrapper_idx) else { continue; }; - let wrapper_exports = &graph.modules[wrapper_idx].exports; + let wrapper_exports = &wrapper.exports; for dynamic_import in &resolved.resolved_dynamic_imports { let crate::resolve::ResolveResult::InternalModule(source_file_id) = @@ -698,9 +685,10 @@ fn collect_dynamic_reexport_sources( continue; } - let Some(&source_idx) = module_idx_by_file_id.get(source_file_id) else { + let source_idx = source_file_id.0 as usize; + if source_idx >= graph.modules.len() { continue; - }; + } re_export_sources .entry(wrapper_idx) .or_default() @@ -726,15 +714,9 @@ pub fn find_duplicate_exports( resolved_modules: &[crate::resolve::ResolvedModule], ) -> Vec { let ignore_matchers = config.compiled_ignore_exports.as_slice(); - // Build a map from FileId to module index for dynamic re-export source lookup. - let module_idx_by_file_id: FxHashMap = graph - .modules - .iter() - .enumerate() - .map(|(idx, m)| (m.file_id, idx)) - .collect(); - // Build a set of re-export relationships: (re-exporting module idx) -> set of (source module idx) + // Build a set of re-export relationships: (re-exporting module idx) -> set of (source module idx). + // FileIds are sequential (discover/walk.rs:264) so module_idx == file_id.0 as usize. let mut re_export_sources: FxHashMap> = FxHashMap::default(); for (idx, module) in graph.modules.iter().enumerate() { for re in &module.re_exports { @@ -753,12 +735,7 @@ pub fn find_duplicate_exports( // import targeting `InternalModule(S)` AND A also exports the name being // imported. The export-side check guards against false negatives where a // module dynamically imports something but does not actually re-export it. - collect_dynamic_reexport_sources( - resolved_modules, - graph, - &module_idx_by_file_id, - &mut re_export_sources, - ); + collect_dynamic_reexport_sources(resolved_modules, graph, &mut re_export_sources); struct ExportEntry { module_idx: usize, @@ -769,11 +746,6 @@ pub fn find_duplicate_exports( } let mut export_locations: FxHashMap> = FxHashMap::default(); - let file_id_by_path: FxHashMap<&std::path::Path, FileId> = graph - .modules - .iter() - .map(|module| (module.path.as_path(), module.file_id)) - .collect(); for (idx, module) in graph.modules.iter().enumerate() { if !module.is_reachable() || module.is_entry_point() { @@ -857,24 +829,28 @@ pub fn find_duplicate_exports( // For each pair (A, B), if A re-exports from B or B re-exports from A, // they are part of the same export chain, not true duplicates. let module_indices: FxHashSet = locations.iter().map(|e| e.module_idx).collect(); - let independent: Vec = locations - .into_iter() - .filter(|e| { - let sources = re_export_sources.get(&e.module_idx); - let has_source_in_set = - sources.is_some_and(|s| s.iter().any(|src| module_indices.contains(src))); - !has_source_in_set - }) - .map(|e| { - let (line, col) = - byte_offset_to_line_col(line_offsets_by_file, e.file_id, e.span_start); - DuplicateLocation { - path: e.path, - line, - col, - } - }) - .collect(); + let (independent_file_ids, independent): (Vec, Vec) = + locations + .into_iter() + .filter(|e| { + let sources = re_export_sources.get(&e.module_idx); + let has_source_in_set = sources + .is_some_and(|s| s.iter().any(|src| module_indices.contains(src))); + !has_source_in_set + }) + .map(|e| { + let (line, col) = + byte_offset_to_line_col(line_offsets_by_file, e.file_id, e.span_start); + ( + e.file_id, + DuplicateLocation { + path: e.path, + line, + col, + }, + ) + }) + .unzip(); if independent.len() <= 1 { return None; @@ -885,7 +861,7 @@ pub fn find_duplicate_exports( // modules in different directories) that happen to export the same name // are not actionable duplicates since they can never be confused at an // import site. - let has_shared_importer = has_common_importer(&independent, graph, &file_id_by_path); + let has_shared_importer = has_common_importer(&independent_file_ids, graph); if has_shared_importer { Some(DuplicateExport { export_name: name, @@ -904,21 +880,7 @@ pub fn find_duplicate_exports( /// from both. This filters out false positives from unrelated leaf modules (e.g., /// SvelteKit route files in different directories) that coincidentally export the /// same name but are never imported together. -fn has_common_importer( - locations: &[DuplicateLocation], - graph: &ModuleGraph, - file_id_by_path: &FxHashMap<&std::path::Path, FileId>, -) -> bool { - if locations.len() <= 1 { - return false; - } - - // Collect FileIds for the duplicate locations through a pre-built path index. - let file_ids: Vec = locations - .iter() - .filter_map(|loc| file_id_by_path.get(loc.path.as_path()).copied()) - .collect(); - +fn has_common_importer(file_ids: &[FileId], graph: &ModuleGraph) -> bool { if file_ids.len() <= 1 { return false; } @@ -926,7 +888,7 @@ fn has_common_importer( let duplicate_files: FxHashSet = file_ids.iter().copied().collect(); let mut importer_owner: FxHashMap = FxHashMap::default(); - for file_id in file_ids { + for &file_id in file_ids { let idx = file_id.0 as usize; if idx >= graph.reverse_deps.len() { continue; From 2c5733d5d3feab1accb87e8b64b6c158f2eb4ada Mon Sep 17 00:00:00 2001 From: Bart Waardenburg Date: Tue, 12 May 2026 20:03:34 +0200 Subject: [PATCH 2/2] refactor(core): pin ModuleGraph::modules FileId-as-index invariant Follow-up to 63497792 addressing panel-review feedback: 1. Document the invariant on ModuleGraph::modules: modules[file_id.0 as usize].file_id == file_id, anchored to discover/walk.rs and build::populate_edges. Single source of truth instead of duplicating the rationale in three detector comments. 2. Add debug_assert_eq! at each of the four index-by-FileId sites introduced in the parent commit (find_unused_exports cross-file ref check, find_private_type_leaks module lookup, find_duplicate_exports enumerate, collect_dynamic_reexport_sources wrapper lookup). Fires under cargo test if a future ModuleGraph::build change ever filters or sparsifies modules silently. Zero cost in release. 3. Confirmed issue-count parity on all five real-world fixtures (was verified on next.js only in the parent commit): next.js 24986, typescript 14622, vite 1582, vue-core 239, svelte 4855, identical BEFORE/AFTER. Refs #333 --- crates/core/src/analyze/unused_exports.rs | 26 ++++++++++++++++++----- crates/graph/src/graph/mod.rs | 8 +++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/crates/core/src/analyze/unused_exports.rs b/crates/core/src/analyze/unused_exports.rs index 83053433f..f1fbfb96a 100644 --- a/crates/core/src/analyze/unused_exports.rs +++ b/crates/core/src/analyze/unused_exports.rs @@ -303,10 +303,13 @@ pub fn find_unused_exports( !export.references.is_empty() } else { export.references.iter().any(|r| { - graph - .modules - .get(r.from_file.0 as usize) - .is_some_and(ModuleNode::is_reachable) + graph.modules.get(r.from_file.0 as usize).is_some_and(|m| { + debug_assert_eq!( + m.file_id, r.from_file, + "ModuleGraph::modules FileId-as-index invariant broken" + ); + m.is_reachable() + }) }) }; // Treat side-effect-registered exports (Lit @customElement, @@ -598,6 +601,10 @@ pub fn find_private_type_leaks( let Some(module) = graph.modules.get(module_info.file_id.0 as usize) else { continue; }; + debug_assert_eq!( + module.file_id, module_info.file_id, + "ModuleGraph::modules FileId-as-index invariant broken" + ); if is_storybook_file(&module.path) || is_route_convention_file(&module.path, &config.root) { continue; } @@ -662,6 +669,10 @@ fn collect_dynamic_reexport_sources( let Some(wrapper) = graph.modules.get(wrapper_idx) else { continue; }; + debug_assert_eq!( + wrapper.file_id, resolved.file_id, + "ModuleGraph::modules FileId-as-index invariant broken" + ); let wrapper_exports = &wrapper.exports; for dynamic_import in &resolved.resolved_dynamic_imports { @@ -716,9 +727,14 @@ pub fn find_duplicate_exports( let ignore_matchers = config.compiled_ignore_exports.as_slice(); // Build a set of re-export relationships: (re-exporting module idx) -> set of (source module idx). - // FileIds are sequential (discover/walk.rs:264) so module_idx == file_id.0 as usize. + // Module idx and `file_id.0 as usize` are interchangeable; see the + // invariant doc on `ModuleGraph::modules`. let mut re_export_sources: FxHashMap> = FxHashMap::default(); for (idx, module) in graph.modules.iter().enumerate() { + debug_assert_eq!( + module.file_id.0 as usize, idx, + "ModuleGraph::modules FileId-as-index invariant broken" + ); for re in &module.re_exports { re_export_sources .entry(idx) diff --git a/crates/graph/src/graph/mod.rs b/crates/graph/src/graph/mod.rs index 41c94ae9a..bcd9fa6d9 100644 --- a/crates/graph/src/graph/mod.rs +++ b/crates/graph/src/graph/mod.rs @@ -28,6 +28,14 @@ pub use types::{ExportSymbol, ModuleNode, ReExportEdge, ReferenceKind, SymbolRef #[derive(Debug)] pub struct ModuleGraph { /// All modules indexed by `FileId`. + /// + /// Invariant: `modules[file_id.0 as usize].file_id == file_id` for every + /// `FileId` in the graph. Holds because `discover/walk.rs` assigns FileIds + /// sequentially via `.enumerate()` after path-sorting, and + /// `build::populate_edges` pushes one `ModuleNode` per file in iteration + /// order. Detectors rely on this for O(1) FileId-to-module lookup + /// (`graph.modules.get(file_id.0 as usize)`) instead of building a + /// per-call `FxHashMap`. pub modules: Vec, /// Flat edge storage for cache-friendly iteration. edges: Vec,