From 358730455a57744f7f54130ee70d4972852078bb Mon Sep 17 00:00:00 2001 From: Jasper Krauter Date: Mon, 22 Sep 2025 13:17:13 +0300 Subject: [PATCH 1/2] feat: Use faster fxhash for a star --- Cargo.lock | 8 +++++++- generic_a_star/Cargo.toml | 2 +- generic_a_star/src/lib.rs | 14 +++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 43486272..9c950658 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -365,9 +365,9 @@ version = "0.18.1" dependencies = [ "binary-heap-plus", "compare", - "deterministic_default_hasher", "extend_map", "num-traits", + "rustc-hash", "serde", ] @@ -866,6 +866,12 @@ version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustybuzz" version = "0.20.1" diff --git a/generic_a_star/Cargo.toml b/generic_a_star/Cargo.toml index 3d88d280..194c4f81 100644 --- a/generic_a_star/Cargo.toml +++ b/generic_a_star/Cargo.toml @@ -12,8 +12,8 @@ serde = ["dep:serde"] [dependencies] binary-heap-plus = "0.5.0" -deterministic_default_hasher = "0.14.2" num-traits.workspace = true serde = { workspace = true, features = ["derive"], optional = true } extend_map = "0.14.4" compare = "0.1.0" +rustc-hash = "2.1.1" diff --git a/generic_a_star/src/lib.rs b/generic_a_star/src/lib.rs index 429ee632..5d6b45ab 100644 --- a/generic_a_star/src/lib.rs +++ b/generic_a_star/src/lib.rs @@ -11,10 +11,10 @@ use binary_heap_plus::BinaryHeap; use comparator::AStarNodeComparator; use compare::Compare; use cost::AStarCost; -use deterministic_default_hasher::DeterministicDefaultHasher; use extend_map::ExtendFilter; use num_traits::{Bounded, Zero}; use reset::Reset; +use rustc_hash::FxSeededState; mod comparator; pub mod cost; @@ -127,18 +127,14 @@ pub enum AStarState { pub struct AStar { state: AStarState<::Identifier, ::Cost>, context: Context, - closed_list: HashMap< - ::Identifier, - Context::Node, - DeterministicDefaultHasher, - >, + closed_list: HashMap<::Identifier, Context::Node, FxSeededState>, open_list: BinaryHeap, performance_counters: AStarPerformanceCounters, } #[derive(Debug)] pub struct AStarBuffers { - closed_list: HashMap, + closed_list: HashMap, open_list: BinaryHeap, } @@ -182,7 +178,7 @@ impl AStar { Self { state: AStarState::Empty, context, - closed_list: Default::default(), + closed_list: HashMap::with_hasher(FxSeededState::with_seed(0)), open_list: BinaryHeap::from_vec(Vec::new()), performance_counters: Default::default(), } @@ -605,7 +601,7 @@ impl Iterator for BacktrackingIteratorWithCost<'_, Contex impl Default for AStarBuffers { fn default() -> Self { Self { - closed_list: Default::default(), + closed_list: HashMap::with_hasher(FxSeededState::with_seed(0)), open_list: BinaryHeap::from_vec(Vec::new()), } } From c8963f1f754eba2fb7c4f2bd7c6d593c2de0104c Mon Sep 17 00:00:00 2001 From: Jasper Krauter Date: Tue, 23 Sep 2025 11:31:36 +0300 Subject: [PATCH 2/2] fix: Also use fxhash in min length strategy --- Cargo.lock | 8 +------- generic_a_star/clippy.toml | 2 ++ generic_a_star/src/lib.rs | 11 +++++------ generic_a_star/src/reset.rs | 4 ++-- lib_tsalign/Cargo.toml | 2 +- lib_tsalign/src/a_star_aligner.rs | 6 +++++- .../strategies/template_switch_min_length.rs | 16 ++++++++++++---- 7 files changed, 28 insertions(+), 21 deletions(-) create mode 100644 generic_a_star/clippy.toml diff --git a/Cargo.lock b/Cargo.lock index 9c950658..3e4541a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -252,12 +252,6 @@ dependencies = [ "powerfmt", ] -[[package]] -name = "deterministic_default_hasher" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae65f7e8e3580cb22a297790831a22567a52ddcf8a5367e70991512ec1d01716" - [[package]] name = "either" version = "1.15.0" @@ -468,7 +462,6 @@ version = "0.18.1" dependencies = [ "binary-heap-plus", "compact-genome", - "deterministic_default_hasher", "extend_map", "generic_a_star", "log", @@ -476,6 +469,7 @@ dependencies = [ "noisy_float", "nom", "num-traits", + "rustc-hash", "seed_chain", "serde", "thiserror", diff --git a/generic_a_star/clippy.toml b/generic_a_star/clippy.toml new file mode 100644 index 00000000..75b1301b --- /dev/null +++ b/generic_a_star/clippy.toml @@ -0,0 +1,2 @@ +disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"] + diff --git a/generic_a_star/src/lib.rs b/generic_a_star/src/lib.rs index 5d6b45ab..7baf8fd0 100644 --- a/generic_a_star/src/lib.rs +++ b/generic_a_star/src/lib.rs @@ -2,7 +2,6 @@ use std::{ cmp::Ordering, - collections::HashMap, fmt::{Debug, Display}, hash::Hash, }; @@ -14,7 +13,7 @@ use cost::AStarCost; use extend_map::ExtendFilter; use num_traits::{Bounded, Zero}; use reset::Reset; -use rustc_hash::FxSeededState; +use rustc_hash::{FxHashMapSeed, FxSeededState}; mod comparator; pub mod cost; @@ -127,14 +126,14 @@ pub enum AStarState { pub struct AStar { state: AStarState<::Identifier, ::Cost>, context: Context, - closed_list: HashMap<::Identifier, Context::Node, FxSeededState>, + closed_list: FxHashMapSeed<::Identifier, Context::Node>, open_list: BinaryHeap, performance_counters: AStarPerformanceCounters, } #[derive(Debug)] pub struct AStarBuffers { - closed_list: HashMap, + closed_list: FxHashMapSeed, open_list: BinaryHeap, } @@ -178,7 +177,7 @@ impl AStar { Self { state: AStarState::Empty, context, - closed_list: HashMap::with_hasher(FxSeededState::with_seed(0)), + closed_list: FxHashMapSeed::with_hasher(FxSeededState::with_seed(0)), open_list: BinaryHeap::from_vec(Vec::new()), performance_counters: Default::default(), } @@ -601,7 +600,7 @@ impl Iterator for BacktrackingIteratorWithCost<'_, Contex impl Default for AStarBuffers { fn default() -> Self { Self { - closed_list: HashMap::with_hasher(FxSeededState::with_seed(0)), + closed_list: FxHashMapSeed::with_hasher(FxSeededState::with_seed(0)), open_list: BinaryHeap::from_vec(Vec::new()), } } diff --git a/generic_a_star/src/reset.rs b/generic_a_star/src/reset.rs index ae4cb35f..89b9e95e 100644 --- a/generic_a_star/src/reset.rs +++ b/generic_a_star/src/reset.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use rustc_hash::FxHashMapSeed; pub trait Reset { fn reset(&mut self); @@ -8,7 +8,7 @@ impl Reset for () { fn reset(&mut self) {} } -impl Reset for HashMap { +impl Reset for FxHashMapSeed { fn reset(&mut self) { self.clear(); } diff --git a/lib_tsalign/Cargo.toml b/lib_tsalign/Cargo.toml index 5b0e9a51..7fd23f5f 100644 --- a/lib_tsalign/Cargo.toml +++ b/lib_tsalign/Cargo.toml @@ -28,6 +28,6 @@ serde = { workspace = true, features = ["derive"], optional = true } noisy_float = { version = "0.2.0" } generic_a_star = { version = "0.18.1", path = "../generic_a_star" } log.workspace = true +rustc-hash = "2.1.1" seed_chain = { version = "0.18.1", path = "../seed_chain" } -deterministic_default_hasher = "0.14.2" extend_map = "0.14.3" diff --git a/lib_tsalign/src/a_star_aligner.rs b/lib_tsalign/src/a_star_aligner.rs index 97a841fc..ac6ffb3f 100644 --- a/lib_tsalign/src/a_star_aligner.rs +++ b/lib_tsalign/src/a_star_aligner.rs @@ -11,6 +11,7 @@ use template_switch_distance::{ AlignmentStrategySelector, chaining::ChainingStrategy, primary_match::AllowPrimaryMatchStrategy, shortcut::NoShortcutStrategy, template_switch_count::TemplateSwitchCountStrategy, + template_switch_min_length::TemplateSwitchMinLengthStrategy, }, }; use traitsequence::interface::Sequence; @@ -167,7 +168,10 @@ where Strategies::Cost: From, { let memory = Memory { - template_switch_min_length: Default::default(), + template_switch_min_length: + >::initialise_memory(), chaining: <::Chaining as ChainingStrategy< ::Cost, >>::initialise_memory(reference, query, &config, 20), diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance/strategies/template_switch_min_length.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance/strategies/template_switch_min_length.rs index ec0b4334..7d0850ee 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance/strategies/template_switch_min_length.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance/strategies/template_switch_min_length.rs @@ -1,10 +1,10 @@ -use std::{collections::HashMap, marker::PhantomData, mem}; +use std::{marker::PhantomData, mem}; use compact_genome::interface::sequence::GenomeSequence; -use deterministic_default_hasher::DeterministicDefaultHasher; use generic_a_star::cost::AStarCost; use generic_a_star::reset::Reset; use generic_a_star::{AStar, AStarContext, AStarNode, AStarResult}; +use rustc_hash::{FxHashMapSeed, FxSeededState}; use crate::a_star_aligner::template_switch_distance::{AlignmentType, TemplateSwitchDirection}; use crate::a_star_aligner::template_switch_distance::{ @@ -17,7 +17,9 @@ use super::{AlignmentStrategy, AlignmentStrategySelector}; pub trait TemplateSwitchMinLengthStrategy: AlignmentStrategy { /// The type used to memorise lookahead results. - type Memory: Default + Reset; + type Memory: Reset; + + fn initialise_memory() -> Self::Memory; /// Takes the template switch entrance node and provides a lower bound for its costs depending on the minimum length of a template switch. /// The modified entrance node is returned in the iterator along with further nodes that were created while computing the lower bound. @@ -61,6 +63,8 @@ impl TemplateSwitchMinLengthStrategy { type Memory = (); + fn initialise_memory() -> Self::Memory {} + fn template_switch_min_length_lookahead< Strategies: AlignmentStrategySelector, SubsequenceType: compact_genome::interface::sequence::GenomeSequence< @@ -88,7 +92,11 @@ pub struct LookaheadMemoryKey { impl TemplateSwitchMinLengthStrategy for LookaheadTemplateSwitchMinLengthStrategy { - type Memory = HashMap; + type Memory = FxHashMapSeed; + + fn initialise_memory() -> Self::Memory { + FxHashMapSeed::with_hasher(FxSeededState::with_seed(0)) + } fn template_switch_min_length_lookahead< Strategies: AlignmentStrategySelector,