diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 50c689d0..2c34e631 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -73,9 +73,9 @@ jobs: strategy: matrix: platform: - - runner: macos-13 + - runner: macos-latest target: x86_64 - - runner: macos-14 + - runner: macos-latest target: aarch64 steps: - uses: actions/checkout@v4 diff --git a/.vscode/settings.json b/.vscode/settings.json index 8b978f69..ad4029f4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,5 +20,6 @@ "Cargo.toml", "*.fa", "*.toml" - ] + ], + "rust-analyzer.check.command": "clippy" } \ No newline at end of file diff --git a/generic_a_star/src/lib.rs b/generic_a_star/src/lib.rs index 9f0baa1e..58faef6a 100644 --- a/generic_a_star/src/lib.rs +++ b/generic_a_star/src/lib.rs @@ -100,6 +100,12 @@ pub trait AStarContext: Reset { } } +/// The closed list for the A* algorithm. +pub trait AStarClosedList: Reset {} + +/// The open list for the A* algorithm. +pub trait AStarOpenList: Reset {} + #[derive(Debug, Default)] pub struct AStarPerformanceCounters { pub opened_nodes: usize, diff --git a/lib_ts_chainalign/src/alignment/ts_kind.rs b/lib_ts_chainalign/src/alignment/ts_kind.rs index d9e987d5..2cfbb92f 100644 --- a/lib_ts_chainalign/src/alignment/ts_kind.rs +++ b/lib_ts_chainalign/src/alignment/ts_kind.rs @@ -44,6 +44,24 @@ impl TsKind { [Self::TS11, Self::TS12, Self::TS21, Self::TS22].into_iter() } + /// Index of this [`TsKind`] in the iterator produced by [`Self::iter()`]. + /// + /// ```rust + /// # use lib_ts_chainalign::alignment::ts_kind::TsKind; + /// assert_eq!( + /// TsKind::iter().enumerate().collect::>(), + /// TsKind::iter().map(|ts_kind| (ts_kind.index(), ts_kind)).collect::>() + /// ); + /// ``` + pub fn index(&self) -> usize { + match (self.ancestor, self.descendant) { + (TsAncestor::Seq1, TsDescendant::Seq1) => 0, + (TsAncestor::Seq1, TsDescendant::Seq2) => 1, + (TsAncestor::Seq2, TsDescendant::Seq1) => 2, + (TsAncestor::Seq2, TsDescendant::Seq2) => 3, + } + } + pub fn digits(&self) -> &'static str { match (self.ancestor, self.descendant) { (TsAncestor::Seq1, TsDescendant::Seq1) => "11", diff --git a/lib_ts_chainalign/src/anchors.rs b/lib_ts_chainalign/src/anchors.rs index 65078085..a4ea442e 100644 --- a/lib_ts_chainalign/src/anchors.rs +++ b/lib_ts_chainalign/src/anchors.rs @@ -7,14 +7,16 @@ use crate::{ alignment::{ coordinates::AlignmentCoordinates, sequences::AlignmentSequences, - ts_kind::{TsAncestor, TsDescendant, TsKind}, + ts_kind::{TsDescendant, TsKind}, }, anchors::{ + index::AnchorIndex, kmer_matches::find_kmer_matches, kmers::{Kmer, KmerStore}, }, }; +pub mod index; pub mod kmer_matches; pub mod kmers; #[cfg(test)] @@ -22,11 +24,8 @@ mod tests; #[derive(Debug, PartialEq, Eq)] pub struct Anchors { - pub primary: Vec, - pub secondary_11: Vec, - pub secondary_12: Vec, - pub secondary_21: Vec, - pub secondary_22: Vec, + primary: Vec, + secondaries: [Vec; 4], } #[derive(Debug, PartialEq, Eq)] @@ -110,34 +109,35 @@ impl Anchors { .into_iter() .map(|(seq1, seq2)| PrimaryAnchor { seq1, seq2 }) .collect(); - let mut secondary_11: Vec<_> = find_kmer_matches(&s1_rc_kmers, &s1_kmers) + let secondary_11: Vec<_> = find_kmer_matches(&s1_rc_kmers, &s1_kmers) .into_iter() .map(|(ancestor, descendant)| SecondaryAnchor { ancestor: s1.len() - ancestor, descendant, }) .collect(); - let mut secondary_12: Vec<_> = find_kmer_matches(&s1_rc_kmers, &s2_kmers) + let secondary_12: Vec<_> = find_kmer_matches(&s1_rc_kmers, &s2_kmers) .into_iter() .map(|(ancestor, descendant)| SecondaryAnchor { ancestor: s1.len() - ancestor, descendant, }) .collect(); - let mut secondary_21: Vec<_> = find_kmer_matches(&s2_rc_kmers, &s1_kmers) + let secondary_21: Vec<_> = find_kmer_matches(&s2_rc_kmers, &s1_kmers) .into_iter() .map(|(ancestor, descendant)| SecondaryAnchor { ancestor: s2.len() - ancestor, descendant, }) .collect(); - let mut secondary_22: Vec<_> = find_kmer_matches(&s2_rc_kmers, &s2_kmers) + let secondary_22: Vec<_> = find_kmer_matches(&s2_rc_kmers, &s2_kmers) .into_iter() .map(|(ancestor, descendant)| SecondaryAnchor { ancestor: s2.len() - ancestor, descendant, }) .collect(); + let mut secondaries = [secondary_11, secondary_12, secondary_21, secondary_22]; // Sort anchors. primary.sort_unstable_by_key(|primary_anchor| { @@ -147,65 +147,67 @@ impl Anchors { primary_anchor.seq2, ) }); - secondary_11.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); - secondary_12.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); - secondary_21.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); - secondary_22.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); + for secondary in &mut secondaries { + secondary.sort_unstable_by_key(|secondary_anchor| { + ( + secondary_anchor.ancestor.min(secondary_anchor.descendant), + secondary_anchor.ancestor, + secondary_anchor.descendant, + ) + }); + } info!( "Found {} anchors ({} + {} + {} + {} + {})", - primary.len() - + secondary_11.len() - + secondary_12.len() - + secondary_21.len() - + secondary_22.len(), + primary.len() + secondaries.iter().map(Vec::len).sum::(), primary.len(), - secondary_11.len(), - secondary_12.len(), - secondary_21.len(), - secondary_22.len(), + secondaries[0].len(), + secondaries[1].len(), + secondaries[2].len(), + secondaries[3].len(), ); Self { primary, - secondary_11, - secondary_12, - secondary_21, - secondary_22, + secondaries, } } - pub fn secondary(&self, ts_kind: TsKind) -> &[SecondaryAnchor] { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => &self.secondary_11, - (TsAncestor::Seq1, TsDescendant::Seq2) => &self.secondary_12, - (TsAncestor::Seq2, TsDescendant::Seq1) => &self.secondary_21, - (TsAncestor::Seq2, TsDescendant::Seq2) => &self.secondary_22, - } + pub fn primary(&self, index: AnchorIndex) -> &PrimaryAnchor { + &self.primary[index.as_usize()] + } + + pub fn primary_len(&self) -> AnchorIndex { + self.primary.len().into() + } + + pub fn enumerate_primaries(&self) -> impl Iterator { + self.primary + .iter() + .enumerate() + .map(|(index, anchor)| (index.into(), anchor)) + } + + fn secondary_anchor_vec(&self, ts_kind: TsKind) -> &Vec { + &self.secondaries[ts_kind.index()] + } + + pub fn secondary(&self, index: AnchorIndex, ts_kind: TsKind) -> &SecondaryAnchor { + &self.secondary_anchor_vec(ts_kind)[index.as_usize()] + } + + pub fn secondary_len(&self, ts_kind: TsKind) -> AnchorIndex { + self.secondary_anchor_vec(ts_kind).len().into() + } + + pub fn enumerate_secondaries( + &self, + ts_kind: TsKind, + ) -> impl Iterator { + self.secondary_anchor_vec(ts_kind) + .iter() + .enumerate() + .map(|(index, anchor)| (index.into(), anchor)) } } @@ -419,53 +421,26 @@ impl Display for Anchors { } writeln!(f, "]")?; - write!(f, "S11: [")?; - let mut once = true; - for secondary_anchor in &self.secondary_11 { - if once { - once = false; - } else { - write!(f, ", ")?; - } - write!(f, "{secondary_anchor}")?; - } - writeln!(f, "]")?; - - write!(f, "S12: [")?; - let mut once = true; - for secondary_anchor in &self.secondary_12 { - if once { - once = false; - } else { - write!(f, ", ")?; - } - write!(f, "{secondary_anchor}")?; - } - writeln!(f, "]")?; - - write!(f, "S21: [")?; - let mut once = true; - for secondary_anchor in &self.secondary_21 { - if once { - once = false; + let mut ts_kind_once = true; + for ts_kind in TsKind::iter() { + if ts_kind_once { + ts_kind_once = false; } else { - write!(f, ", ")?; + writeln!(f)?; } - write!(f, "{secondary_anchor}")?; - } - writeln!(f, "]")?; - write!(f, "S22: [")?; - let mut once = true; - for secondary_anchor in &self.secondary_22 { - if once { - once = false; - } else { - write!(f, ", ")?; + write!(f, "S{}: [", ts_kind.digits())?; + let mut once = true; + for secondary_anchor in self.secondary_anchor_vec(ts_kind) { + if once { + once = false; + } else { + write!(f, ", ")?; + } + write!(f, "{secondary_anchor}")?; } - write!(f, "{secondary_anchor}")?; + write!(f, "]")?; } - write!(f, "]")?; Ok(()) } diff --git a/lib_ts_chainalign/src/anchors/index.rs b/lib_ts_chainalign/src/anchors/index.rs new file mode 100644 index 00000000..dd1286d2 --- /dev/null +++ b/lib_ts_chainalign/src/anchors/index.rs @@ -0,0 +1,77 @@ +use std::{ + fmt::Display, + ops::{Add, Sub}, +}; + +use num_traits::Zero; + +type IndexType = u32; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct AnchorIndex(IndexType); + +impl AnchorIndex { + pub fn as_usize(self) -> usize { + self.0.try_into().unwrap() + } +} + +impl From for AnchorIndex { + fn from(value: IndexType) -> Self { + Self(value) + } +} + +impl From for AnchorIndex { + fn from(value: usize) -> Self { + Self(value.try_into().unwrap()) + } +} + +impl Add for AnchorIndex { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self(self.0.checked_add(rhs.0).unwrap()) + } +} + +impl Sub for AnchorIndex { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Self(self.0.checked_sub(rhs.0).unwrap()) + } +} + +impl Add for AnchorIndex { + type Output = Self; + + fn add(self, rhs: IndexType) -> Self::Output { + self + Self::from(rhs) + } +} + +impl Sub for AnchorIndex { + type Output = Self; + + fn sub(self, rhs: IndexType) -> Self::Output { + self - Self::from(rhs) + } +} + +impl Zero for AnchorIndex { + fn zero() -> Self { + Self(0) + } + + fn is_zero(&self) -> bool { + self.0.is_zero() + } +} + +impl Display for AnchorIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/lib_ts_chainalign/src/anchors/tests.rs b/lib_ts_chainalign/src/anchors/tests.rs index 03a8eabb..1fe7bee6 100644 --- a/lib_ts_chainalign/src/anchors/tests.rs +++ b/lib_ts_chainalign/src/anchors/tests.rs @@ -1,7 +1,7 @@ use lib_tsalign::a_star_aligner::alignment_geometry::AlignmentRange; use crate::{ - alignment::sequences::AlignmentSequences, + alignment::{sequences::AlignmentSequences, ts_kind::TsKind}, anchors::{Anchors, PrimaryAnchor, SecondaryAnchor}, }; @@ -23,18 +23,18 @@ fn test_coordinates() { let anchors = Anchors::new(&sequences, range, k, &rc_fn); assert_eq!(anchors.primary, [(0, 0), (2, 0)].map(PrimaryAnchor::from)); - assert!(anchors.secondary_11.is_empty()); + assert!(anchors.secondary_anchor_vec(TsKind::TS11).is_empty()); assert_eq!( - anchors.secondary_12, - [(2, 2), (4, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS12), + &[(2, 2), (4, 2)].map(SecondaryAnchor::from) ); assert_eq!( - anchors.secondary_21, - [(4, 0), (4, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS21), + &[(4, 0), (4, 2)].map(SecondaryAnchor::from) ); assert_eq!( - anchors.secondary_22, - [(4, 0), (3, 1), (2, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS22), + &[(4, 0), (3, 1), (2, 2)].map(SecondaryAnchor::from) ); } @@ -46,17 +46,17 @@ fn test_coordinates_rev() { let anchors = Anchors::new(&sequences, range, k, &rc_fn); assert_eq!(anchors.primary, [(0, 0), (0, 2)].map(PrimaryAnchor::from)); - assert!(anchors.secondary_22.is_empty()); + assert!(anchors.secondary_anchor_vec(TsKind::TS22).is_empty()); assert_eq!( - anchors.secondary_21, - [(2, 2), (4, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS21), + &[(2, 2), (4, 2)].map(SecondaryAnchor::from) ); assert_eq!( - anchors.secondary_12, - [(4, 0), (4, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS12), + &[(4, 0), (4, 2)].map(SecondaryAnchor::from) ); assert_eq!( - anchors.secondary_11, - [(4, 0), (3, 1), (2, 2)].map(SecondaryAnchor::from) + anchors.secondary_anchor_vec(TsKind::TS11), + &[(4, 0), (3, 1), (2, 2)].map(SecondaryAnchor::from) ); } diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index 4e6666a9..35af6cde 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -12,6 +12,7 @@ use lib_tsalign::a_star_aligner::{ template_switch_distance::{EqualCostRange, TemplateSwitchDirection}, }; use log::{debug, trace}; +use num_traits::Zero; use std::{ fmt::Write, iter, @@ -34,11 +35,19 @@ use crate::{ mod chainer; +pub struct AlignmentParameters { + /// The step width for generating successors during chaining. + /// + /// At most `max_successors` will be generated at a time, but at least all with minimum chaining cost. + pub max_successors: usize, +} + #[expect(clippy::too_many_arguments)] pub fn align( sequences: &AlignmentSequences, start: AlignmentCoordinates, end: AlignmentCoordinates, + parameters: &AlignmentParameters, alignment_costs: &AlignmentCosts, rc_fn: &dyn Fn(u8) -> u8, max_match_run: u32, @@ -58,6 +67,7 @@ pub fn align( chaining_cost_function, &alignment_costs.ts_limits, k, + parameters.max_successors, ); let mut astar = AStar::new(context); let mut chaining_execution_count = 0; @@ -104,21 +114,55 @@ pub fn align( } match identifier { Identifier::Start => write!(s, "start").unwrap(), - Identifier::Primary { index } => { - write!(s, "P{}", anchors.primary[*index]).unwrap() + Identifier::StartToPrimary { offset } => { + write!(s, "start-to-primary-{offset}").unwrap() + } + Identifier::StartToSecondary { ts_kind, offset } => { + write!(s, "start-to-secondary{}-{offset}", ts_kind.digits()) + .unwrap() } - Identifier::Secondary { + Identifier::PrimaryToPrimary { index, offset } => { + write!(s, "P{}-to-primary-{offset}", anchors.primary(*index)) + .unwrap() + } + Identifier::PrimaryToSecondary { + index, + ts_kind, + offset, + } => write!( + s, + "P{}-to-secondary{}-{offset}", + anchors.primary(*index), + ts_kind.digits() + ) + .unwrap(), + Identifier::SecondaryToSecondary { index, ts_kind, first_secondary_index, + offset, } => write!( s, - "S{}{}->{}", + "S{}{}->{}-to-secondary-{offset}", ts_kind.digits(), - anchors.secondary(*ts_kind)[*first_secondary_index], - anchors.secondary(*ts_kind)[*index], + anchors.secondary(*first_secondary_index, *ts_kind), + anchors.secondary(*index, *ts_kind), ) .unwrap(), + Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset, + } => write!( + s, + "S{}{}->{}-to-primary-{offset}", + ts_kind.digits(), + anchors.secondary(*first_secondary_index, *ts_kind), + anchors.secondary(*index, *ts_kind), + ) + .unwrap(), + Identifier::End => write!(s, "end").unwrap(), } } @@ -305,9 +349,29 @@ fn evaluate_chain( let k = usize::try_from(max_match_run + 1).unwrap(); let mut current_upper_bound = Cost::zero(); let mut alignments = Vec::new(); - for window in chain.windows(2) { - let from_anchor = window[0]; - let to_anchor = window[1]; + let mut current_from_index = 0; + + loop { + let from_anchor = chain[current_from_index]; + let Some((to_anchor_index, to_anchor)) = chain + .iter() + .copied() + .enumerate() + .skip(current_from_index + 1) + .find(|(_, identifier)| match identifier { + Identifier::Start => true, + Identifier::StartToPrimary { .. } => false, + Identifier::StartToSecondary { .. } => false, + Identifier::PrimaryToPrimary { offset, .. } + | Identifier::PrimaryToSecondary { offset, .. } + | Identifier::SecondaryToSecondary { offset, .. } + | Identifier::SecondaryToPrimary { offset, .. } => offset.is_zero(), + Identifier::End => true, + }) + else { + break; + }; + current_from_index = to_anchor_index; match (from_anchor, to_anchor) { (Identifier::Start, Identifier::End) => { @@ -328,8 +392,12 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.start_to_end(); } - (Identifier::Start, Identifier::Primary { index }) => { - let end = anchors.primary[index].start(); + ( + Identifier::Start, + Identifier::PrimaryToPrimary { index, .. } + | Identifier::PrimaryToSecondary { index, .. }, + ) => { + let end = anchors.primary(index).start(); if final_evaluation || !chaining_cost_function.is_primary_from_start_exact(index) { let alignment = GapAffineAlignment::new( start, @@ -341,7 +409,7 @@ fn evaluate_chain( ); trace!( "Aligning from start to P{index}{} costs {}", - anchors.primary[index], + anchors.primary(index), alignment.cost() ); if !final_evaluation { @@ -355,8 +423,12 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.primary_from_start(index); } - (Identifier::Start, Identifier::Secondary { index, ts_kind, .. }) => { - let end = anchors.secondary(ts_kind)[index].start(ts_kind); + ( + Identifier::Start, + Identifier::SecondaryToPrimary { index, ts_kind, .. } + | Identifier::SecondaryToSecondary { index, ts_kind, .. }, + ) => { + let end = anchors.secondary(index, ts_kind).start(ts_kind); if final_evaluation || !chaining_cost_function.is_jump_12_from_start_exact(index, ts_kind) { @@ -371,7 +443,7 @@ fn evaluate_chain( trace!( "Aligning from start to S{}[{index}]{} costs {}", ts_kind.digits(), - anchors.secondary(ts_kind)[index], + anchors.secondary(index, ts_kind), alignment.cost() ); if !final_evaluation { @@ -386,8 +458,8 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.jump_12_from_start(index, ts_kind); } - (Identifier::Primary { index }, Identifier::End) => { - let start = anchors.primary[index].end(k); + (Identifier::PrimaryToPrimary { index, .. }, Identifier::End) => { + let start = anchors.primary(index).end(k); if final_evaluation || !chaining_cost_function.is_primary_to_end_exact(index) { let alignment = GapAffineAlignment::new( start, @@ -399,7 +471,7 @@ fn evaluate_chain( ); trace!( "Aligning from P{index}{} to end costs {}", - anchors.primary[index], + anchors.primary(index), alignment.cost() ); if !final_evaluation { @@ -410,8 +482,8 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.primary_to_end(index); } - (Identifier::Secondary { index, ts_kind, .. }, Identifier::End) => { - let start = anchors.secondary(ts_kind)[index].end(ts_kind, k); + (Identifier::SecondaryToPrimary { index, ts_kind, .. }, Identifier::End) => { + let start = anchors.secondary(index, ts_kind).end(ts_kind, k); if final_evaluation || !chaining_cost_function.is_jump_34_to_end_exact(index, ts_kind) { @@ -426,7 +498,7 @@ fn evaluate_chain( trace!( "Aligning from S{}[{index}]{} to end costs {}", ts_kind.digits(), - anchors.secondary(ts_kind)[index], + anchors.secondary(index, ts_kind), alignment.cost() ); if !final_evaluation { @@ -443,17 +515,26 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.jump_34_to_end(index, ts_kind); } ( - Identifier::Primary { index: from_index }, - Identifier::Primary { index: to_index }, + Identifier::PrimaryToPrimary { + index: from_index, .. + }, + Identifier::PrimaryToPrimary { + index: to_index, .. + } + | Identifier::PrimaryToSecondary { + index: to_index, .. + }, ) => { - if anchors.primary[from_index].is_direct_predecessor_of(&anchors.primary[to_index]) + if anchors + .primary(from_index) + .is_direct_predecessor_of(anchors.primary(to_index)) { alignments.push(Alignment::from(vec![AlignmentType::Match])); continue; } - let start = anchors.primary[from_index].end(k); - let end = anchors.primary[to_index].start(); + let start = anchors.primary(from_index).end(k); + let end = anchors.primary(to_index).start(); if final_evaluation || !chaining_cost_function.is_primary_exact(from_index, to_index) { @@ -467,8 +548,8 @@ fn evaluate_chain( ); trace!( "Aligning from P{from_index}{} to P{to_index}{} costs {}", - anchors.primary[from_index], - anchors.primary[to_index], + anchors.primary(from_index), + anchors.primary(to_index), alignment.cost() ); if !final_evaluation { @@ -485,15 +566,22 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.primary(from_index, to_index); } ( - Identifier::Primary { index: from_index }, - Identifier::Secondary { + Identifier::PrimaryToSecondary { + index: from_index, .. + }, + Identifier::SecondaryToSecondary { + index: to_index, + ts_kind, + .. + } + | Identifier::SecondaryToPrimary { index: to_index, ts_kind, .. }, ) => { - let start = anchors.primary[from_index].end(k); - let end = anchors.secondary(ts_kind)[to_index].start(ts_kind); + let start = anchors.primary(from_index).end(k); + let end = anchors.secondary(to_index, ts_kind).start(ts_kind); if final_evaluation || !chaining_cost_function.is_jump_12_exact(from_index, to_index, ts_kind) { @@ -516,9 +604,9 @@ fn evaluate_chain( } trace!( "Aligning from P{from_index}{} to S{}[{to_index}]{} costs {}", - anchors.primary[from_index], + anchors.primary(from_index), ts_kind.digits(), - anchors.secondary(ts_kind)[to_index], + anchors.secondary(to_index, ts_kind), alignment.cost() ); alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); @@ -528,26 +616,32 @@ fn evaluate_chain( chaining_cost_function.jump_12(from_index, to_index, ts_kind); } ( - Identifier::Secondary { + Identifier::SecondaryToSecondary { index: from_index, ts_kind, .. }, - Identifier::Secondary { + Identifier::SecondaryToSecondary { + index: to_index, + ts_kind: to_ts_kind, + .. + } + | Identifier::SecondaryToPrimary { index: to_index, ts_kind: to_ts_kind, .. }, ) => { assert_eq!(ts_kind, to_ts_kind); - if anchors.secondary(ts_kind)[from_index] - .is_direct_predecessor_of(&anchors.secondary(ts_kind)[to_index]) + if anchors + .secondary(from_index, ts_kind) + .is_direct_predecessor_of(anchors.secondary(to_index, ts_kind)) { alignments.push(Alignment::from(vec![AlignmentType::Match])); continue; } - let start = anchors.secondary(ts_kind)[from_index].end(ts_kind, k); - let end = anchors.secondary(ts_kind)[to_index].start(ts_kind); + let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); + let end = anchors.secondary(to_index, ts_kind).start(ts_kind); if final_evaluation || !chaining_cost_function.is_secondary_exact(from_index, to_index, ts_kind) { @@ -562,9 +656,9 @@ fn evaluate_chain( trace!( "Aligning from S{}[{from_index}]{} to S{}[{to_index}]{} costs {}", ts_kind.digits(), - anchors.secondary(ts_kind)[from_index], + anchors.secondary(from_index, ts_kind), ts_kind.digits(), - anchors.secondary(ts_kind)[to_index], + anchors.secondary(to_index, ts_kind), alignment.cost() ); if !final_evaluation { @@ -583,15 +677,20 @@ fn evaluate_chain( chaining_cost_function.secondary(from_index, to_index, ts_kind); } ( - Identifier::Secondary { + Identifier::SecondaryToPrimary { index: from_index, ts_kind, .. }, - Identifier::Primary { index: to_index }, + Identifier::PrimaryToPrimary { + index: to_index, .. + } + | Identifier::PrimaryToSecondary { + index: to_index, .. + }, ) => { - let start = anchors.secondary(ts_kind)[from_index].end(ts_kind, k); - let end = anchors.primary[to_index].start(); + let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); + let end = anchors.primary(to_index).start(); if final_evaluation || !chaining_cost_function.is_jump_34_exact(from_index, to_index, ts_kind) { @@ -615,8 +714,8 @@ fn evaluate_chain( trace!( "Aligning from S{}[{from_index}]{} to P{to_index}{} (S{} to P{}) costs {}", ts_kind.digits(), - anchors.secondary(ts_kind)[from_index], - anchors.primary[to_index], + anchors.secondary(from_index, ts_kind), + anchors.primary(to_index), start, end, alignment.cost() @@ -627,7 +726,22 @@ fn evaluate_chain( current_upper_bound += chaining_cost_function.jump_34(from_index, to_index, ts_kind); } - (Identifier::End, _) | (_, Identifier::Start) => unreachable!(), + (Identifier::End, _) + | (_, Identifier::Start) + | ( + Identifier::SecondaryToPrimary { .. } | Identifier::PrimaryToPrimary { .. }, + Identifier::SecondaryToPrimary { .. } | Identifier::SecondaryToSecondary { .. }, + ) + | ( + Identifier::PrimaryToSecondary { .. } | Identifier::SecondaryToSecondary { .. }, + Identifier::PrimaryToPrimary { .. } + | Identifier::PrimaryToSecondary { .. } + | Identifier::End, + ) + | (Identifier::StartToPrimary { .. } | Identifier::StartToSecondary { .. }, _) + | (_, Identifier::StartToPrimary { .. } | Identifier::StartToSecondary { .. }) => { + unreachable!() + } } } diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 2aec206f..1b744228 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -1,12 +1,18 @@ use std::{fmt::Display, iter}; use generic_a_star::{AStarContext, AStarNode, cost::AStarCost, reset::Reset}; +use num_traits::Zero; use crate::{ - alignment::ts_kind::TsKind, anchors::Anchors, chaining_cost_function::ChainingCostFunction, + alignment::ts_kind::TsKind, + anchors::{Anchors, index::AnchorIndex}, + chain_align::chainer::max_successors_iterator::MaxSuccessorsIterator, + chaining_cost_function::ChainingCostFunction, costs::TsLimits, }; +mod max_successors_iterator; + const DEBUG_CHAINER: bool = false; pub struct Context<'anchors, 'chaining_cost_function, 'ts_limits, Cost> { @@ -14,6 +20,7 @@ pub struct Context<'anchors, 'chaining_cost_function, 'ts_limits, Cost> { pub chaining_cost_function: &'chaining_cost_function mut ChainingCostFunction, pub ts_limits: &'ts_limits TsLimits, pub k: usize, + pub max_successors: usize, } #[derive(Debug, Clone, Copy, Eq, PartialEq)] @@ -26,16 +33,39 @@ pub struct Node { #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash)] pub enum Identifier { Start, - Primary { - index: usize, + StartToPrimary { + offset: AnchorIndex, + }, + StartToSecondary { + ts_kind: TsKind, + offset: AnchorIndex, + }, + PrimaryToPrimary { + index: AnchorIndex, + offset: AnchorIndex, + }, + PrimaryToSecondary { + index: AnchorIndex, + ts_kind: TsKind, + offset: AnchorIndex, + }, + SecondaryToSecondary { + index: AnchorIndex, + ts_kind: TsKind, + /// The first secondary anchor that is part of the current template switch. + /// + /// Used to estimate the length of the resulting template switch. + first_secondary_index: AnchorIndex, + offset: AnchorIndex, }, - Secondary { - index: usize, + SecondaryToPrimary { + index: AnchorIndex, ts_kind: TsKind, /// The first secondary anchor that is part of the current template switch. /// /// Used to estimate the length of the resulting template switch. - first_secondary_index: usize, + first_secondary_index: AnchorIndex, + offset: AnchorIndex, }, End, } @@ -48,12 +78,14 @@ impl<'anchors, 'chaining_cost_function, 'ts_limits, Cost> chaining_cost_function: &'chaining_cost_function mut ChainingCostFunction, ts_limits: &'ts_limits TsLimits, k: usize, + max_successors: usize, ) -> Self { Self { anchors, chaining_cost_function, ts_limits, k, + max_successors, } } } @@ -72,274 +104,336 @@ impl AStarContext for Context<'_, '_, '_, Cost> { fn generate_successors(&mut self, node: &Self::Node, output: &mut impl Extend) { let predecessor = Some(node.identifier); let predecessor_cost = node.cost; + let primary_end_anchor_index = self.anchors.primary_len(); if DEBUG_CHAINER { println!("Generating successors of {node}"); } match node.identifier { - Identifier::Start => { + Identifier::Start => output.extend( + iter::once(Identifier::StartToPrimary { + offset: AnchorIndex::zero(), + }) + .chain(TsKind::iter().map(|ts_kind| Identifier::StartToSecondary { + ts_kind, + offset: AnchorIndex::zero(), + })) + .map(|identifier| Node { + identifier, + predecessor, + cost: predecessor_cost, + }), + ), + + Identifier::StartToPrimary { offset } => { + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_primary_from_start_in_cost_order(offset), + self.max_successors, + ); + output.extend( - (0..self.anchors.primary.len()) - .flat_map(|successor_index| { + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { if DEBUG_CHAINER { - println!( - "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] - ); + if successor_index == primary_end_anchor_index { + println!("Checking anchor end"); + } else { + println!( + "Checking anchor P-{successor_index}: {}", + self.anchors.primary(successor_index), + ); + } } - let cost = predecessor_cost - .checked_add( - &self - .chaining_cost_function - .primary_from_start(successor_index), - ) - .unwrap(); + let cost = predecessor_cost.checked_add(&chaining_cost)?; if DEBUG_CHAINER { println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); } - debug_assert_ne!(cost, Cost::max_value()); - Some(Node { - identifier: Identifier::Primary { - index: successor_index, - }, + Some(generate_primary_successors( + successor_index, predecessor, cost, - }) + primary_end_anchor_index, + )) }) - .chain(TsKind::iter().flat_map(|ts_kind| { - (0..self.anchors.secondary(ts_kind).len()) - .zip(iter::repeat(&self)) - .flat_map(move |(successor_index, context)| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - context.anchors.secondary(ts_kind)[successor_index] - ); - } - - let cost = predecessor_cost.checked_add( - &context - .chaining_cost_function - .jump_12_from_start(successor_index, ts_kind), - )?; - if DEBUG_CHAINER { - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } - - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index: successor_index, - }, - predecessor, - cost, - }) - }) - })) - .chain(iter::once({ - let cost = predecessor_cost - .checked_add(&self.chaining_cost_function.start_to_end()) - .unwrap(); + .flatten(), + ); + + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::StartToPrimary { + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); + } + } + + Identifier::StartToSecondary { ts_kind, offset } => { + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_jump_12_from_start_in_cost_order(ts_kind, offset), + self.max_successors, + ); + + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + self.anchors.secondary(successor_index, ts_kind), + ); + } + + let cost = predecessor_cost.checked_add(&chaining_cost)?; if DEBUG_CHAINER { - println!("Checking anchor end"); println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, + + Some(generate_secondary_successors( + successor_index, + ts_kind, + successor_index, predecessor, cost, - } - })), + self.ts_limits.length_23.contains(&self.k), + )) + }) + .flatten(), ); + + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::StartToSecondary { + ts_kind, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); + } } - Identifier::Primary { index } => output.extend( - (0..self.anchors.primary.len()) - .flat_map(|successor_index| { - if DEBUG_CHAINER { - println!( - "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] - ); - } - - let cost = predecessor_cost.checked_add( - &self.chaining_cost_function.primary(index, successor_index), - )?; - if DEBUG_CHAINER { - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Primary { - index: successor_index, - }, - predecessor, - cost, - }) - }) - .chain(TsKind::iter().flat_map(|ts_kind| { - (0..self.anchors.secondary(ts_kind).len()) - .zip(iter::repeat(&self)) - .flat_map(move |(successor_index, context)| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - context.anchors.secondary(ts_kind)[successor_index] - ); - } - let cost = predecessor_cost.checked_add( - &context.chaining_cost_function.jump_12( - index, - successor_index, - ts_kind, - ), - )?; - if DEBUG_CHAINER { + Identifier::PrimaryToPrimary { index, offset } => { + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_primary_in_cost_order(index, offset), + self.max_successors, + ); + + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + if successor_index == primary_end_anchor_index { + println!("Checking anchor end"); + } else { println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost + "Checking anchor P-{successor_index}: {}", + self.anchors.primary(successor_index), ); } + } - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, - ts_kind, - first_secondary_index: successor_index, - }, - predecessor, - cost, - }) - }) - })) - .chain(iter::once({ - let cost = predecessor_cost - .checked_add(&self.chaining_cost_function.primary_to_end(index)) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })), - ), - Identifier::Secondary { + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + + Some(generate_primary_successors( + successor_index, + predecessor, + cost, + primary_end_anchor_index, + )) + }) + .flatten(), + ); + + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::PrimaryToPrimary { + index, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); + } + } + + Identifier::PrimaryToSecondary { + index, + ts_kind, + offset, + } => { + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_jump_12_in_cost_order(index, ts_kind, offset), + self.max_successors, + ); + + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + self.anchors.secondary(successor_index, ts_kind), + ); + } + + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + + Some(generate_secondary_successors( + successor_index, + ts_kind, + successor_index, + predecessor, + cost, + self.ts_limits.length_23.contains(&self.k), + )) + }) + .flatten(), + ); + + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::PrimaryToSecondary { + index, + ts_kind, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); + } + } + + Identifier::SecondaryToSecondary { index, ts_kind, first_secondary_index, + offset, } => { - output.extend((0..self.anchors.secondary(ts_kind).len()).flat_map( - |successor_index| { - if DEBUG_CHAINER { - println!( - "Checking anchor S{}-{successor_index}: {}", - ts_kind.digits(), - self.anchors.secondary(ts_kind)[successor_index] + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_secondary_in_cost_order(index, ts_kind, offset), + self.max_successors, + ); + + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + println!( + "Checking anchor S{}-{successor_index}: {}", + ts_kind.digits(), + self.anchors.secondary(successor_index, ts_kind), + ); + } + + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } + + let first_anchor = + &self.anchors.secondary(first_secondary_index, ts_kind); + let ts_length = first_anchor.ts_length_until( + self.anchors.secondary(successor_index, ts_kind), + ts_kind, + self.k, ); - } - - let cost = predecessor_cost.checked_add( - &self - .chaining_cost_function - .secondary(index, successor_index, ts_kind), - )?; - if DEBUG_CHAINER { - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Secondary { - index: successor_index, + + Some(generate_secondary_successors( + successor_index, ts_kind, first_secondary_index, - }, - predecessor, - cost, + predecessor, + cost, + self.ts_limits.length_23.contains(&ts_length), + )) }) - }, - )); + .flatten(), + ); - let first_anchor = &self.anchors.secondary(ts_kind)[first_secondary_index]; - let ts_length = first_anchor.ts_length_until( - &self.anchors.secondary(ts_kind)[index], - ts_kind, - self.k, + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::SecondaryToSecondary { + index, + ts_kind, + first_secondary_index, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); + } + } + + Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset, + } => { + let mut iter = MaxSuccessorsIterator::new( + self.chaining_cost_function + .iter_jump_34_in_cost_order(index, ts_kind, offset), + self.max_successors, ); - if self.ts_limits.length_23.contains(&ts_length) { - output.extend( - (0..self.anchors.primary.len()) - .flat_map(|successor_index| { - if DEBUG_CHAINER { + output.extend( + iter.by_ref() + .map_while(|(successor_index, chaining_cost)| { + if DEBUG_CHAINER { + if successor_index == primary_end_anchor_index { + println!("Checking anchor end"); + } else { println!( "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] + self.anchors.primary(successor_index), ); } + } - let cost = predecessor_cost.checked_add( - &self.chaining_cost_function.jump_34( - index, - successor_index, - ts_kind, - ), - )?; - if DEBUG_CHAINER { - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } + let cost = predecessor_cost.checked_add(&chaining_cost)?; + if DEBUG_CHAINER { + println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); + } - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Primary { - index: successor_index, - }, - predecessor, - cost, - }) - }) - .chain(iter::once({ - let cost = predecessor_cost - .checked_add( - &self.chaining_cost_function.jump_34_to_end(index, ts_kind), - ) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!( - "Cost: {}+{}", - predecessor_cost, - cost - predecessor_cost - ); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })), - ); + Some(generate_primary_successors( + successor_index, + predecessor, + cost, + primary_end_anchor_index, + )) + }) + .flatten(), + ); + + if let Some(next_cost) = iter.next_cost() { + output.extend(iter::once(Node { + identifier: Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset: offset + AnchorIndex::from(iter.successor_count()), + }, + predecessor, + cost: next_cost, + })); } } @@ -360,6 +454,72 @@ impl AStarContext for Context<'_, '_, '_, Cost> { } } +fn generate_primary_successors( + index: AnchorIndex, + predecessor: Option, + cost: Cost, + primary_end_anchor_index: AnchorIndex, +) -> impl Iterator> { + (index == primary_end_anchor_index) + .then_some(Identifier::End) + .into_iter() + .chain( + (index != primary_end_anchor_index) + .then(move || { + [Identifier::PrimaryToPrimary { + index, + offset: AnchorIndex::zero(), + }] + .into_iter() + .chain( + TsKind::iter().map(move |ts_kind| Identifier::PrimaryToSecondary { + index, + ts_kind, + offset: AnchorIndex::zero(), + }), + ) + }) + .into_iter() + .flatten(), + ) + .map(move |identifier| Node { + identifier, + predecessor, + cost, + }) +} + +fn generate_secondary_successors( + index: AnchorIndex, + ts_kind: TsKind, + first_secondary_index: AnchorIndex, + predecessor: Option, + cost: Cost, + can_34_jump: bool, +) -> impl Iterator> { + [ + can_34_jump.then(|| Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset: AnchorIndex::zero(), + }), + Some(Identifier::SecondaryToSecondary { + index, + ts_kind, + first_secondary_index, + offset: AnchorIndex::zero(), + }), + ] + .into_iter() + .flatten() + .map(move |identifier| Node { + identifier, + predecessor, + cost, + }) +} + impl Reset for Context<'_, '_, '_, Cost> { fn reset(&mut self) { // Nothing to do. @@ -420,12 +580,43 @@ impl Display for Identifier { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Identifier::Start => write!(f, "start"), - Identifier::Primary { index } => write!(f, "P-{index}"), - Identifier::Secondary { + Identifier::StartToPrimary { offset } => write!(f, "start-to-primary-{offset}"), + Identifier::StartToSecondary { ts_kind, offset } => { + write!(f, "start-to-secondary{}-{offset}", ts_kind.digits()) + } + Identifier::PrimaryToPrimary { index, offset } => { + write!(f, "primary-to-primary-{index}-{offset}") + } + Identifier::PrimaryToSecondary { + index, + ts_kind, + offset, + } => write!( + f, + "primary-to-secondary{}-{index}-{offset}", + ts_kind.digits() + ), + Identifier::SecondaryToSecondary { index, ts_kind, first_secondary_index, - } => write!(f, "S{}-{first_secondary_index}-{index}", ts_kind.digits()), + offset, + } => write!( + f, + "secondary{}-to-secondary{}-{first_secondary_index}-{index}-{offset}", + ts_kind.digits(), + ts_kind.digits() + ), + Identifier::SecondaryToPrimary { + index, + ts_kind, + first_secondary_index, + offset, + } => write!( + f, + "secondary{}-to-primary-{first_secondary_index}-{index}-{offset}", + ts_kind.digits() + ), Identifier::End => write!(f, "end"), } } diff --git a/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs b/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs new file mode 100644 index 00000000..0c0c5112 --- /dev/null +++ b/lib_ts_chainalign/src/chain_align/chainer/max_successors_iterator.rs @@ -0,0 +1,59 @@ +use generic_a_star::cost::AStarCost; + +use crate::anchors::index::AnchorIndex; + +pub struct MaxSuccessorsIterator, Cost> { + iter: Iter, + count: usize, + max_count: usize, + initial_cost: Cost, + next_cost: Option, +} + +impl, Cost: AStarCost> + MaxSuccessorsIterator +{ + pub fn new(iter: Iter, max_count: usize) -> Self { + Self { + iter, + count: 0, + max_count, + initial_cost: Cost::zero(), + next_cost: None, + } + } + + pub fn successor_count(&self) -> usize { + self.count + } + + pub fn next_cost(&self) -> Option { + self.next_cost + } +} + +impl, Cost: AStarCost> Iterator + for MaxSuccessorsIterator +{ + type Item = (AnchorIndex, Cost); + + fn next(&mut self) -> Option { + let (anchor_index, cost) = self.iter.next()?; + + if cost == Cost::max_value() { + return None; + } + + if self.count == 0 { + self.count += 1; + self.initial_cost = cost; + Some((anchor_index, cost)) + } else if self.count < self.max_count || cost == self.initial_cost { + self.count += 1; + Some((anchor_index, cost)) + } else { + self.next_cost = Some(cost); + None + } + } +} diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index a27f45e3..9635f7e2 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -1,31 +1,21 @@ use generic_a_star::cost::AStarCost; +use itertools::Itertools; +use num_traits::Zero; use crate::{ - alignment::{ - coordinates::AlignmentCoordinates, - ts_kind::{TsAncestor, TsDescendant, TsKind}, - }, - anchors::Anchors, - chaining_cost_function::cost_array::CostArray2D, + alignment::{coordinates::AlignmentCoordinates, ts_kind::TsKind}, + anchors::{Anchors, index::AnchorIndex}, + chaining_cost_function::cost_array::ChainingCostArray, chaining_lower_bounds::ChainingLowerBounds, }; mod cost_array; pub struct ChainingCostFunction { - primary: CostArray2D, - secondary_11: CostArray2D, - secondary_12: CostArray2D, - secondary_21: CostArray2D, - secondary_22: CostArray2D, - jump_12_to_11: CostArray2D, - jump_12_to_12: CostArray2D, - jump_12_to_21: CostArray2D, - jump_12_to_22: CostArray2D, - jump_34_from_11: CostArray2D, - jump_34_from_12: CostArray2D, - jump_34_from_21: CostArray2D, - jump_34_from_22: CostArray2D, + primary: ChainingCostArray, + secondaries: [ChainingCostArray; 4], + jump_12s: [ChainingCostArray; 4], + jump_34s: [ChainingCostArray; 4], } impl ChainingCostFunction { @@ -36,24 +26,28 @@ impl ChainingCostFunction { end: AlignmentCoordinates, ) -> Self { let k = usize::try_from(chaining_lower_bounds.max_match_run() + 1).unwrap(); + let primary_anchor_amount = anchors.primary_len() + 2; + let primary_start_anchor_index = AnchorIndex::zero(); + let primary_end_anchor_index = primary_anchor_amount - 1; - let mut primary = CostArray2D::new_from_cost( - [anchors.primary.len() + 2, anchors.primary.len() + 2], + let mut primary = ChainingCostArray::new_from_cost( + [primary_anchor_amount, primary_anchor_amount], Cost::max_value(), ); let gap1 = end.primary_ordinate_a().unwrap() - start.primary_ordinate_a().unwrap(); let gap2 = end.primary_ordinate_b().unwrap() - start.primary_ordinate_b().unwrap(); - primary[[0, anchors.primary.len() + 1]] = + primary[[primary_start_anchor_index, primary_end_anchor_index]] = chaining_lower_bounds.primary_lower_bound(gap1, gap2); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { + for (from_index, from_anchor) in anchors.enumerate_primaries() { let from_index = from_index + 1; let (gap1, gap2) = from_anchor.chaining_gaps_from_start(start); - primary[[0, from_index]] = chaining_lower_bounds.primary_lower_bound(gap1, gap2); + primary[[primary_start_anchor_index, from_index]] = + chaining_lower_bounds.primary_lower_bound(gap1, gap2); let (gap1, gap2) = from_anchor.chaining_gaps_to_end(end, k); - primary[[from_index, anchors.primary.len() + 1]] = + primary[[from_index, primary_end_anchor_index]] = chaining_lower_bounds.primary_lower_bound(gap1, gap2); - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { + for (to_index, to_anchor) in anchors.enumerate_primaries() { let to_index = to_index + 1; if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, k) { primary[[from_index, to_index]] = @@ -65,553 +59,259 @@ impl ChainingCostFunction { } } - let mut secondary_11 = CostArray2D::new_from_cost( - [anchors.secondary_11.len(), anchors.secondary_11.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_11.iter().enumerate() { - for (to_index, to_anchor) in anchors.secondary_11.iter().enumerate() { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, TsKind::TS11, k) { - secondary_11[[from_index, to_index]] = - chaining_lower_bounds.secondary_lower_bound(gap1, gap2); - } - if from_anchor.is_direct_predecessor_of(to_anchor) { - secondary_11[[from_index, to_index]] = Cost::zero(); - } - } - } - - let mut secondary_12 = CostArray2D::new_from_cost( - [anchors.secondary_12.len(), anchors.secondary_12.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_12.iter().enumerate() { - for (to_index, to_anchor) in anchors.secondary_12.iter().enumerate() { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, TsKind::TS12, k) { - secondary_12[[from_index, to_index]] = - chaining_lower_bounds.secondary_lower_bound(gap1, gap2); - } - if from_anchor.is_direct_predecessor_of(to_anchor) { - secondary_12[[from_index, to_index]] = Cost::zero(); - } - } - } - - let mut secondary_21 = CostArray2D::new_from_cost( - [anchors.secondary_21.len(), anchors.secondary_21.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_21.iter().enumerate() { - for (to_index, to_anchor) in anchors.secondary_21.iter().enumerate() { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, TsKind::TS21, k) { - secondary_21[[from_index, to_index]] = - chaining_lower_bounds.secondary_lower_bound(gap1, gap2); - } - if from_anchor.is_direct_predecessor_of(to_anchor) { - secondary_21[[from_index, to_index]] = Cost::zero(); - } - } - } - - let mut secondary_22 = CostArray2D::new_from_cost( - [anchors.secondary_22.len(), anchors.secondary_22.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_22.iter().enumerate() { - for (to_index, to_anchor) in anchors.secondary_22.iter().enumerate() { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, TsKind::TS22, k) { - secondary_22[[from_index, to_index]] = - chaining_lower_bounds.secondary_lower_bound(gap1, gap2); - } - if from_anchor.is_direct_predecessor_of(to_anchor) { - secondary_22[[from_index, to_index]] = Cost::zero(); - } - } - } - - let mut jump_12_to_11 = CostArray2D::new_from_cost( - [anchors.primary.len() + 2, anchors.secondary_11.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { - let from_index = from_index + 1; - for (to_index, to_anchor) in anchors.secondary_11.iter().enumerate() { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS11, k) { - jump_12_to_11[[from_index, to_index]] = - chaining_lower_bounds.jump_12_lower_bound(gap); - } - } - } - - let mut jump_12_to_12 = CostArray2D::new_from_cost( - [anchors.primary.len() + 2, anchors.secondary_12.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { - let from_index = from_index + 1; - for (to_index, to_anchor) in anchors.secondary_12.iter().enumerate() { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS12, k) { - jump_12_to_12[[from_index, to_index]] = - chaining_lower_bounds.jump_12_lower_bound(gap); - } - } - } - - let mut jump_12_to_21 = CostArray2D::new_from_cost( - [anchors.primary.len() + 2, anchors.secondary_21.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { - let from_index = from_index + 1; - for (to_index, to_anchor) in anchors.secondary_21.iter().enumerate() { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS21, k) { - jump_12_to_21[[from_index, to_index]] = - chaining_lower_bounds.jump_12_lower_bound(gap); - } - } - } - - let mut jump_12_to_22 = CostArray2D::new_from_cost( - [anchors.primary.len() + 2, anchors.secondary_22.len()], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.primary.iter().enumerate() { - let from_index = from_index + 1; - for (to_index, to_anchor) in anchors.secondary_22.iter().enumerate() { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS22, k) { - jump_12_to_22[[from_index, to_index]] = - chaining_lower_bounds.jump_12_lower_bound(gap); - } - } - } - - let mut jump_34_from_11 = CostArray2D::new_from_cost( - [anchors.secondary_11.len(), anchors.primary.len() + 2], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_11.iter().enumerate() { - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { - let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS11, k) { - jump_34_from_11[[from_index, to_index]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - } - } - - let mut jump_34_from_12 = CostArray2D::new_from_cost( - [anchors.secondary_12.len(), anchors.primary.len() + 2], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_12.iter().enumerate() { - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { - let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS12, k) { - jump_34_from_12[[from_index, to_index]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - } - } - - let mut jump_34_from_21 = CostArray2D::new_from_cost( - [anchors.secondary_21.len(), anchors.primary.len() + 2], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_21.iter().enumerate() { - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { - let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS21, k) { - jump_34_from_21[[from_index, to_index]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - } - } - - let mut jump_34_from_22 = CostArray2D::new_from_cost( - [anchors.secondary_22.len(), anchors.primary.len() + 2], - Cost::max_value(), - ); - for (from_index, from_anchor) in anchors.secondary_22.iter().enumerate() { - for (to_index, to_anchor) in anchors.primary.iter().enumerate() { - let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, TsKind::TS22, k) { - jump_34_from_22[[from_index, to_index]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } + let mut secondaries = TsKind::iter() + .map(|ts_kind| { + ChainingCostArray::new_from_cost( + [ + anchors.secondary_len(ts_kind), + anchors.secondary_len(ts_kind), + ], + Cost::max_value(), + ) + }) + .collect_array() + .unwrap(); + for (ts_kind, secondary) in TsKind::iter().zip(&mut secondaries) { + for (from_index, from_anchor) in anchors.enumerate_secondaries(ts_kind) { + for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { + if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, ts_kind, k) { + secondary[[from_index, to_index]] = + chaining_lower_bounds.secondary_lower_bound(gap1, gap2); + } + if from_anchor.is_direct_predecessor_of(to_anchor) { + secondary[[from_index, to_index]] = Cost::zero(); + } + } + } + } + + let mut jump_12s = TsKind::iter() + .map(|ts_kind| { + ChainingCostArray::new_from_cost( + [primary_anchor_amount, anchors.secondary_len(ts_kind)], + Cost::max_value(), + ) + }) + .collect_array() + .unwrap(); + for (ts_kind, jump_12) in TsKind::iter().zip(&mut jump_12s) { + for (from_index, from_anchor) in anchors.enumerate_primaries() { + let from_index = from_index + 1; + for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { + if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, ts_kind, k) { + jump_12[[from_index, to_index]] = + chaining_lower_bounds.jump_12_lower_bound(gap); + } + } + } + } + + let mut jump_34s = TsKind::iter() + .map(|ts_kind| { + ChainingCostArray::new_from_cost( + [anchors.secondary_len(ts_kind), primary_anchor_amount], + Cost::max_value(), + ) + }) + .collect_array() + .unwrap(); + for (ts_kind, jump_34) in TsKind::iter().zip(&mut jump_34s) { + for (from_index, from_anchor) in anchors.enumerate_secondaries(ts_kind) { + for (to_index, to_anchor) in anchors.enumerate_primaries() { + let to_index = to_index + 1; + if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, ts_kind, k) { + jump_34[[from_index, to_index]] = + chaining_lower_bounds.jump_34_lower_bound(gap); + } + } + } + } + + for (ts_kind, (jump_12, jump_34)) in + TsKind::iter().zip(jump_12s.iter_mut().zip(&mut jump_34s)) + { + for (index, anchor) in anchors.enumerate_secondaries(ts_kind) { + let gap = anchor.chaining_jump_gap_from_start(start, ts_kind); + jump_12[[primary_start_anchor_index, index]] = + chaining_lower_bounds.jump_12_lower_bound(gap); + let gap = anchor.chaining_jump_gap_to_end(end, ts_kind, k); + jump_34[[index, primary_end_anchor_index]] = + chaining_lower_bounds.jump_34_lower_bound(gap); } } - for (index, anchor) in anchors.secondary_11.iter().enumerate() { - let gap = anchor.chaining_jump_gap_from_start(start, TsKind::TS11); - jump_12_to_11[[0, index]] = chaining_lower_bounds.jump_12_lower_bound(gap); - let gap = anchor.chaining_jump_gap_to_end(end, TsKind::TS11, k); - jump_34_from_11[[index, anchors.primary.len() + 1]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - - for (index, anchor) in anchors.secondary_12.iter().enumerate() { - let gap = anchor.chaining_jump_gap_from_start(start, TsKind::TS12); - jump_12_to_12[[0, index]] = chaining_lower_bounds.jump_12_lower_bound(gap); - let gap = anchor.chaining_jump_gap_to_end(end, TsKind::TS12, k); - jump_34_from_12[[index, anchors.primary.len() + 1]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - - for (index, anchor) in anchors.secondary_21.iter().enumerate() { - let gap = anchor.chaining_jump_gap_from_start(start, TsKind::TS21); - jump_12_to_21[[0, index]] = chaining_lower_bounds.jump_12_lower_bound(gap); - let gap = anchor.chaining_jump_gap_to_end(end, TsKind::TS21, k); - jump_34_from_21[[index, anchors.primary.len() + 1]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - - for (index, anchor) in anchors.secondary_22.iter().enumerate() { - let gap = anchor.chaining_jump_gap_from_start(start, TsKind::TS22); - jump_12_to_22[[0, index]] = chaining_lower_bounds.jump_12_lower_bound(gap); - let gap = anchor.chaining_jump_gap_to_end(end, TsKind::TS22, k); - jump_34_from_22[[index, anchors.primary.len() + 1]] = - chaining_lower_bounds.jump_34_lower_bound(gap); - } - Self { primary, - secondary_11, - secondary_12, - secondary_21, - secondary_22, - jump_12_to_11, - jump_12_to_12, - jump_12_to_21, - jump_12_to_22, - jump_34_from_11, - jump_34_from_12, - jump_34_from_21, - jump_34_from_22, + secondaries, + jump_12s, + jump_34s, } } - pub fn primary(&self, from_primary_index: usize, to_primary_index: usize) -> Cost { - self.primary[[from_primary_index + 1, to_primary_index + 1]] + fn primary_start_anchor_index() -> AnchorIndex { + AnchorIndex::zero() } - pub fn primary_from_start(&self, primary_index: usize) -> Cost { - self.primary[[0, primary_index + 1]] + fn primary_end_anchor_index(&self) -> AnchorIndex { + self.primary.dim().0 - 1 } - pub fn primary_to_end(&self, primary_index: usize) -> Cost { - self.primary[[primary_index + 1, self.primary.dim().1 - 1]] + fn secondary_array(&self, ts_kind: TsKind) -> &ChainingCostArray { + &self.secondaries[ts_kind.index()] } - pub fn start_to_end(&self) -> Cost { - self.primary[[0, self.primary.dim().1 - 1]] + fn secondary_array_mut(&mut self, ts_kind: TsKind) -> &mut ChainingCostArray { + &mut self.secondaries[ts_kind.index()] } - pub fn jump_12_to_11(&self, from_primary_index: usize, to_secondary_11_index: usize) -> Cost { - self.jump_12_to_11[[from_primary_index + 1, to_secondary_11_index]] + fn jump_12_array(&self, ts_kind: TsKind) -> &ChainingCostArray { + &self.jump_12s[ts_kind.index()] } - pub fn jump_12_to_12(&self, from_primary_index: usize, to_secondary_12_index: usize) -> Cost { - self.jump_12_to_12[[from_primary_index + 1, to_secondary_12_index]] + fn jump_12_array_mut(&mut self, ts_kind: TsKind) -> &mut ChainingCostArray { + &mut self.jump_12s[ts_kind.index()] } - pub fn jump_12_to_21(&self, from_primary_index: usize, to_secondary_21_index: usize) -> Cost { - self.jump_12_to_21[[from_primary_index + 1, to_secondary_21_index]] + fn jump_34_array(&self, ts_kind: TsKind) -> &ChainingCostArray { + &self.jump_34s[ts_kind.index()] } - pub fn jump_12_to_22(&self, from_primary_index: usize, to_secondary_22_index: usize) -> Cost { - self.jump_12_to_22[[from_primary_index + 1, to_secondary_22_index]] + fn jump_34_array_mut(&mut self, ts_kind: TsKind) -> &mut ChainingCostArray { + &mut self.jump_34s[ts_kind.index()] } - pub fn jump_12( - &self, - from_primary_index: usize, - to_secondary_index: usize, - ts_kind: TsKind, - ) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.jump_12_to_11(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.jump_12_to_12(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.jump_12_to_21(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.jump_12_to_22(from_primary_index, to_secondary_index) - } - } + pub fn primary(&self, from_primary_index: AnchorIndex, to_primary_index: AnchorIndex) -> Cost { + self.primary[[from_primary_index + 1, to_primary_index + 1]] } - pub fn jump_12_to_11_from_start(&self, to_secondary_11_index: usize) -> Cost { - self.jump_12_to_11[[0, to_secondary_11_index]] + pub fn primary_from_start(&self, primary_index: AnchorIndex) -> Cost { + self.primary[[Self::primary_start_anchor_index(), primary_index + 1]] } - pub fn jump_12_to_12_from_start(&self, to_secondary_12_index: usize) -> Cost { - self.jump_12_to_12[[0, to_secondary_12_index]] + pub fn primary_to_end(&self, primary_index: AnchorIndex) -> Cost { + self.primary[[primary_index + 1, self.primary_end_anchor_index()]] } - pub fn jump_12_to_21_from_start(&self, to_secondary_21_index: usize) -> Cost { - self.jump_12_to_21[[0, to_secondary_21_index]] + pub fn start_to_end(&self) -> Cost { + self.primary[[ + Self::primary_start_anchor_index(), + self.primary_end_anchor_index(), + ]] } - pub fn jump_12_to_22_from_start(&self, to_secondary_22_index: usize) -> Cost { - self.jump_12_to_22[[0, to_secondary_22_index]] + pub fn jump_12( + &self, + from_primary_index: AnchorIndex, + to_secondary_index: AnchorIndex, + ts_kind: TsKind, + ) -> Cost { + self.jump_12_array(ts_kind)[[from_primary_index + 1, to_secondary_index]] } - pub fn jump_12_from_start(&self, to_secondary_index: usize, ts_kind: TsKind) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.jump_12_to_11_from_start(to_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.jump_12_to_12_from_start(to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.jump_12_to_21_from_start(to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.jump_12_to_22_from_start(to_secondary_index) - } - } + pub fn jump_12_from_start(&self, to_secondary_index: AnchorIndex, ts_kind: TsKind) -> Cost { + self.jump_12_array(ts_kind)[[Self::primary_start_anchor_index(), to_secondary_index]] } pub fn secondary( &self, - from_secondary_index: usize, - to_secondary_index: usize, + from_secondary_index: AnchorIndex, + to_secondary_index: AnchorIndex, ts_kind: TsKind, ) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.secondary_11[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.secondary_12[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.secondary_21[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.secondary_22[[from_secondary_index, to_secondary_index]] - } - } + self.secondary_array(ts_kind)[[from_secondary_index, to_secondary_index]] } pub fn jump_34( &self, - from_secondary_index: usize, - to_primary_index: usize, + from_secondary_index: AnchorIndex, + to_primary_index: AnchorIndex, ts_kind: TsKind, ) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.jump_34_from_11[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.jump_34_from_12[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.jump_34_from_21[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.jump_34_from_22[[from_secondary_index, to_primary_index + 1]] - } - } + self.jump_34_array(ts_kind)[[from_secondary_index, to_primary_index + 1]] } - pub fn jump_34_to_end(&self, from_secondary_index: usize, ts_kind: TsKind) -> Cost { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.jump_34_from_11[[from_secondary_index, self.jump_34_from_11.dim().1 - 1]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.jump_34_from_12[[from_secondary_index, self.jump_34_from_12.dim().1 - 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.jump_34_from_21[[from_secondary_index, self.jump_34_from_21.dim().1 - 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.jump_34_from_22[[from_secondary_index, self.jump_34_from_22.dim().1 - 1]] - } - } + pub fn jump_34_to_end(&self, from_secondary_index: AnchorIndex, ts_kind: TsKind) -> Cost { + self.jump_34_array(ts_kind)[[from_secondary_index, self.primary_end_anchor_index()]] } - pub fn is_primary_exact(&self, from_primary_index: usize, to_primary_index: usize) -> bool { + pub fn is_primary_exact( + &self, + from_primary_index: AnchorIndex, + to_primary_index: AnchorIndex, + ) -> bool { self.primary .is_exact(from_primary_index + 1, to_primary_index + 1) } - pub fn is_primary_from_start_exact(&self, primary_index: usize) -> bool { - self.primary.is_exact(0, primary_index + 1) + pub fn is_primary_from_start_exact(&self, primary_index: AnchorIndex) -> bool { + self.primary + .is_exact(Self::primary_start_anchor_index(), primary_index + 1) } - pub fn is_primary_to_end_exact(&self, primary_index: usize) -> bool { + pub fn is_primary_to_end_exact(&self, primary_index: AnchorIndex) -> bool { self.primary - .is_exact(primary_index + 1, self.primary.dim().1 - 1) + .is_exact(primary_index + 1, self.primary_end_anchor_index()) } pub fn is_start_to_end_exact(&self) -> bool { - self.primary.is_exact(0, self.primary.dim().1 - 1) - } - - pub fn is_jump_12_to_11_exact( - &self, - from_primary_index: usize, - to_secondary_11_index: usize, - ) -> bool { - self.jump_12_to_11 - .is_exact(from_primary_index + 1, to_secondary_11_index) - } - - pub fn is_jump_12_to_12_exact( - &self, - from_primary_index: usize, - to_secondary_12_index: usize, - ) -> bool { - self.jump_12_to_12 - .is_exact(from_primary_index + 1, to_secondary_12_index) + self.primary.is_exact( + Self::primary_start_anchor_index(), + self.primary_end_anchor_index(), + ) } - pub fn is_jump_12_to_21_exact( - &self, - from_primary_index: usize, - to_secondary_21_index: usize, - ) -> bool { - self.jump_12_to_21 - .is_exact(from_primary_index + 1, to_secondary_21_index) - } - - pub fn is_jump_12_to_22_exact( + pub fn is_jump_12_exact( &self, - from_primary_index: usize, - to_secondary_22_index: usize, + from_primary_index: AnchorIndex, + to_secondary_index: AnchorIndex, + ts_kind: TsKind, ) -> bool { - self.jump_12_to_22 - .is_exact(from_primary_index + 1, to_secondary_22_index) + self.jump_12_array(ts_kind) + .is_exact(from_primary_index + 1, to_secondary_index) } - pub fn is_jump_12_exact( + pub fn is_jump_12_from_start_exact( &self, - from_primary_index: usize, - to_secondary_index: usize, + to_secondary_index: AnchorIndex, ts_kind: TsKind, ) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.is_jump_12_to_11_exact(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.is_jump_12_to_12_exact(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.is_jump_12_to_21_exact(from_primary_index, to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.is_jump_12_to_22_exact(from_primary_index, to_secondary_index) - } - } - } - - pub fn is_jump_12_to_11_from_start_exact(&self, to_secondary_11_index: usize) -> bool { - self.jump_12_to_11.is_exact(0, to_secondary_11_index) - } - - pub fn is_jump_12_to_12_from_start_exact(&self, to_secondary_12_index: usize) -> bool { - self.jump_12_to_12.is_exact(0, to_secondary_12_index) - } - - pub fn is_jump_12_to_21_from_start_exact(&self, to_secondary_21_index: usize) -> bool { - self.jump_12_to_21.is_exact(0, to_secondary_21_index) - } - - pub fn is_jump_12_to_22_from_start_exact(&self, to_secondary_22_index: usize) -> bool { - self.jump_12_to_22.is_exact(0, to_secondary_22_index) - } - - pub fn is_jump_12_from_start_exact(&self, to_secondary_index: usize, ts_kind: TsKind) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - self.is_jump_12_to_11_from_start_exact(to_secondary_index) - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - self.is_jump_12_to_12_from_start_exact(to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - self.is_jump_12_to_21_from_start_exact(to_secondary_index) - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - self.is_jump_12_to_22_from_start_exact(to_secondary_index) - } - } + self.jump_12_array(ts_kind) + .is_exact(Self::primary_start_anchor_index(), to_secondary_index) } pub fn is_secondary_exact( &self, - from_secondary_index: usize, - to_secondary_index: usize, + from_secondary_index: AnchorIndex, + to_secondary_index: AnchorIndex, ts_kind: TsKind, ) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self - .secondary_11 - .is_exact(from_secondary_index, to_secondary_index), - (TsAncestor::Seq1, TsDescendant::Seq2) => self - .secondary_12 - .is_exact(from_secondary_index, to_secondary_index), - (TsAncestor::Seq2, TsDescendant::Seq1) => self - .secondary_21 - .is_exact(from_secondary_index, to_secondary_index), - (TsAncestor::Seq2, TsDescendant::Seq2) => self - .secondary_22 - .is_exact(from_secondary_index, to_secondary_index), - } + self.secondary_array(ts_kind) + .is_exact(from_secondary_index, to_secondary_index) } pub fn is_jump_34_exact( &self, - from_secondary_index: usize, - to_primary_index: usize, + from_secondary_index: AnchorIndex, + to_primary_index: AnchorIndex, ts_kind: TsKind, ) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self - .jump_34_from_11 - .is_exact(from_secondary_index, to_primary_index + 1), - (TsAncestor::Seq1, TsDescendant::Seq2) => self - .jump_34_from_12 - .is_exact(from_secondary_index, to_primary_index + 1), - (TsAncestor::Seq2, TsDescendant::Seq1) => self - .jump_34_from_21 - .is_exact(from_secondary_index, to_primary_index + 1), - (TsAncestor::Seq2, TsDescendant::Seq2) => self - .jump_34_from_22 - .is_exact(from_secondary_index, to_primary_index + 1), - } + self.jump_34_array(ts_kind) + .is_exact(from_secondary_index, to_primary_index + 1) } - pub fn is_jump_34_to_end_exact(&self, from_secondary_index: usize, ts_kind: TsKind) -> bool { - match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => self - .jump_34_from_11 - .is_exact(from_secondary_index, self.jump_34_from_11.dim().1 - 1), - (TsAncestor::Seq1, TsDescendant::Seq2) => self - .jump_34_from_12 - .is_exact(from_secondary_index, self.jump_34_from_12.dim().1 - 1), - (TsAncestor::Seq2, TsDescendant::Seq1) => self - .jump_34_from_21 - .is_exact(from_secondary_index, self.jump_34_from_21.dim().1 - 1), - (TsAncestor::Seq2, TsDescendant::Seq2) => self - .jump_34_from_22 - .is_exact(from_secondary_index, self.jump_34_from_22.dim().1 - 1), - } + pub fn is_jump_34_to_end_exact( + &self, + from_secondary_index: AnchorIndex, + ts_kind: TsKind, + ) -> bool { + self.jump_34_array(ts_kind) + .is_exact(from_secondary_index, self.primary_end_anchor_index()) } pub fn update_primary( &mut self, - from_primary_index: usize, - to_primary_index: usize, + from_primary_index: AnchorIndex, + to_primary_index: AnchorIndex, cost: Cost, is_exact: bool, ) -> bool { @@ -628,14 +328,15 @@ impl ChainingCostFunction { pub fn update_primary_from_start( &mut self, - primary_index: usize, + primary_index: AnchorIndex, cost: Cost, is_exact: bool, ) -> bool { if is_exact { - self.primary.set_exact(0, primary_index + 1); + self.primary + .set_exact(Self::primary_start_anchor_index(), primary_index + 1); } - let target = &mut self.primary[[0, primary_index + 1]]; + let target = &mut self.primary[[Self::primary_start_anchor_index(), primary_index + 1]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -644,11 +345,11 @@ impl ChainingCostFunction { pub fn update_primary_to_end( &mut self, - primary_index: usize, + primary_index: AnchorIndex, cost: Cost, is_exact: bool, ) -> bool { - let end_index = self.primary.dim().1 - 1; + let end_index = self.primary_end_anchor_index(); if is_exact { self.primary.set_exact(primary_index + 1, end_index); } @@ -662,9 +363,10 @@ impl ChainingCostFunction { pub fn update_start_to_end(&mut self, cost: Cost, is_exact: bool) -> bool { let end_index = self.primary.dim().1 - 1; if is_exact { - self.primary.set_exact(0, end_index); + self.primary + .set_exact(Self::primary_start_anchor_index(), end_index); } - let target = &mut self.primary[[0, end_index]]; + let target = &mut self.primary[[Self::primary_start_anchor_index(), end_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -673,42 +375,18 @@ impl ChainingCostFunction { pub fn update_jump_12( &mut self, - from_primary_index: usize, - to_secondary_index: usize, + from_primary_index: AnchorIndex, + to_secondary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - if is_exact { - self.jump_12_to_11 - .set_exact(from_primary_index + 1, to_secondary_index); - } - &mut self.jump_12_to_11[[from_primary_index + 1, to_secondary_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - if is_exact { - self.jump_12_to_12 - .set_exact(from_primary_index + 1, to_secondary_index); - } - &mut self.jump_12_to_12[[from_primary_index + 1, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - if is_exact { - self.jump_12_to_21 - .set_exact(from_primary_index + 1, to_secondary_index); - } - &mut self.jump_12_to_21[[from_primary_index + 1, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - if is_exact { - self.jump_12_to_22 - .set_exact(from_primary_index + 1, to_secondary_index); - } - &mut self.jump_12_to_22[[from_primary_index + 1, to_secondary_index]] - } - }; + if is_exact { + self.jump_12_array_mut(ts_kind) + .set_exact(from_primary_index + 1, to_secondary_index); + } + let target = + &mut self.jump_12_array_mut(ts_kind)[[from_primary_index + 1, to_secondary_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -717,37 +395,17 @@ impl ChainingCostFunction { pub fn update_jump_12_from_start( &mut self, - to_secondary_index: usize, + to_secondary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - if is_exact { - self.jump_12_to_11.set_exact(0, to_secondary_index); - } - &mut self.jump_12_to_11[[0, to_secondary_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - if is_exact { - self.jump_12_to_12.set_exact(0, to_secondary_index); - } - &mut self.jump_12_to_12[[0, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - if is_exact { - self.jump_12_to_21.set_exact(0, to_secondary_index); - } - &mut self.jump_12_to_21[[0, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - if is_exact { - self.jump_12_to_22.set_exact(0, to_secondary_index); - } - &mut self.jump_12_to_22[[0, to_secondary_index]] - } - }; + if is_exact { + self.jump_12_array_mut(ts_kind) + .set_exact(Self::primary_start_anchor_index(), to_secondary_index); + } + let target = &mut self.jump_12_array_mut(ts_kind) + [[Self::primary_start_anchor_index(), to_secondary_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -756,42 +414,18 @@ impl ChainingCostFunction { pub fn update_secondary( &mut self, - from_secondary_index: usize, - to_secondary_index: usize, + from_secondary_index: AnchorIndex, + to_secondary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - if is_exact { - self.secondary_11 - .set_exact(from_secondary_index, to_secondary_index); - } - &mut self.secondary_11[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - if is_exact { - self.secondary_12 - .set_exact(from_secondary_index, to_secondary_index); - } - &mut self.secondary_12[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - if is_exact { - self.secondary_21 - .set_exact(from_secondary_index, to_secondary_index); - } - &mut self.secondary_21[[from_secondary_index, to_secondary_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - if is_exact { - self.secondary_22 - .set_exact(from_secondary_index, to_secondary_index); - } - &mut self.secondary_22[[from_secondary_index, to_secondary_index]] - } - }; + if is_exact { + self.secondary_array_mut(ts_kind) + .set_exact(from_secondary_index, to_secondary_index); + } + let target = + &mut self.secondary_array_mut(ts_kind)[[from_secondary_index, to_secondary_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -800,42 +434,18 @@ impl ChainingCostFunction { pub fn update_jump_34( &mut self, - from_secondary_index: usize, - to_primary_index: usize, + from_secondary_index: AnchorIndex, + to_primary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - if is_exact { - self.jump_34_from_11 - .set_exact(from_secondary_index, to_primary_index + 1); - } - &mut self.jump_34_from_11[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - if is_exact { - self.jump_34_from_12 - .set_exact(from_secondary_index, to_primary_index + 1); - } - &mut self.jump_34_from_12[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - if is_exact { - self.jump_34_from_21 - .set_exact(from_secondary_index, to_primary_index + 1); - } - &mut self.jump_34_from_21[[from_secondary_index, to_primary_index + 1]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - if is_exact { - self.jump_34_from_22 - .set_exact(from_secondary_index, to_primary_index + 1); - } - &mut self.jump_34_from_22[[from_secondary_index, to_primary_index + 1]] - } - }; + if is_exact { + self.jump_34_array_mut(ts_kind) + .set_exact(from_secondary_index, to_primary_index + 1); + } + let target = + &mut self.jump_34_array_mut(ts_kind)[[from_secondary_index, to_primary_index + 1]]; assert!(*target <= cost); let result = *target < cost; *target = cost; @@ -844,48 +454,100 @@ impl ChainingCostFunction { pub fn update_jump_34_to_end( &mut self, - from_secondary_index: usize, + from_secondary_index: AnchorIndex, ts_kind: TsKind, cost: Cost, is_exact: bool, ) -> bool { - let target = match (ts_kind.ancestor, ts_kind.descendant) { - (TsAncestor::Seq1, TsDescendant::Seq1) => { - let end_index = self.jump_34_from_11.dim().1 - 1; - if is_exact { - self.jump_34_from_11 - .set_exact(from_secondary_index, end_index); - } - &mut self.jump_34_from_11[[from_secondary_index, end_index]] - } - (TsAncestor::Seq1, TsDescendant::Seq2) => { - let end_index = self.jump_34_from_12.dim().1 - 1; - if is_exact { - self.jump_34_from_12 - .set_exact(from_secondary_index, end_index); - } - &mut self.jump_34_from_12[[from_secondary_index, end_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq1) => { - let end_index = self.jump_34_from_21.dim().1 - 1; - if is_exact { - self.jump_34_from_21 - .set_exact(from_secondary_index, end_index); - } - &mut self.jump_34_from_21[[from_secondary_index, end_index]] - } - (TsAncestor::Seq2, TsDescendant::Seq2) => { - let end_index = self.jump_34_from_22.dim().1 - 1; - if is_exact { - self.jump_34_from_22 - .set_exact(from_secondary_index, end_index); - } - &mut self.jump_34_from_22[[from_secondary_index, end_index]] - } - }; + let end_index = self.primary_end_anchor_index(); + if is_exact { + self.jump_34_array_mut(ts_kind) + .set_exact(from_secondary_index, end_index); + } + let target = &mut self.jump_34_array_mut(ts_kind)[[from_secondary_index, end_index]]; assert!(*target <= cost); let result = *target < cost; *target = cost; result } + + pub fn iter_primary_in_cost_order( + &mut self, + from_primary_index: AnchorIndex, + offset: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.primary + .iter_in_cost_order_from(from_primary_index + 1, offset) + .filter(|(to_primary_index, _)| *to_primary_index != AnchorIndex::zero()) + .map(|(to_primary_index, chaining_cost)| (to_primary_index - 1, chaining_cost)) + } + + pub fn iter_primary_from_start_in_cost_order( + &mut self, + offset: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.primary + .iter_in_cost_order_from(AnchorIndex::zero(), offset) + .filter(|(to_primary_index, _)| *to_primary_index != AnchorIndex::zero()) + .map(|(to_primary_index, chaining_cost)| (to_primary_index - 1, chaining_cost)) + } + + pub fn iter_jump_12_in_cost_order( + &mut self, + from_primary_index: AnchorIndex, + ts_kind: TsKind, + offset: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.jump_12_array_mut(ts_kind) + .iter_in_cost_order_from(from_primary_index + 1, offset) + } + + pub fn iter_jump_12_from_start_in_cost_order( + &mut self, + ts_kind: TsKind, + offset: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.jump_12_array_mut(ts_kind) + .iter_in_cost_order_from(Self::primary_start_anchor_index(), offset) + } + + pub fn iter_secondary_in_cost_order( + &mut self, + from_secondary_index: AnchorIndex, + ts_kind: TsKind, + offset: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.secondary_array_mut(ts_kind) + .iter_in_cost_order_from(from_secondary_index, offset) + } + + pub fn iter_jump_34_in_cost_order( + &mut self, + from_secondary_index: AnchorIndex, + ts_kind: TsKind, + offset: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.jump_34_array_mut(ts_kind) + .iter_in_cost_order_from(from_secondary_index, offset) + .filter(|(to_primary_index, _)| *to_primary_index != AnchorIndex::zero()) + .map(|(to_primary_index, chaining_cost)| (to_primary_index - 1, chaining_cost)) + } } diff --git a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs index 8189c0a7..6f75e327 100644 --- a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs +++ b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs @@ -1,55 +1,109 @@ use std::ops::{Index, IndexMut}; use bitvec::{bitvec, order::LocalBits, vec::BitVec}; +use num_traits::Zero; -pub struct CostArray2D { - len: [usize; 2], +use crate::anchors::index::AnchorIndex; + +pub struct ChainingCostArray { + len: [AnchorIndex; 2], data: Vec, + /// For each ordinate 0, store the indices of the corresponding ordinates 1, ordered by their cost. + cost_order_permutation: Vec>, is_exact: BitVec, } -impl CostArray2D { - pub fn new_from_cost(len: [usize; 2], cost: Cost) -> Self +impl ChainingCostArray { + pub fn new_from_cost(dim: [impl Into; 2], cost: Cost) -> Self where Cost: Clone, { + let dim = dim.map(Into::into); + Self { - len, - data: vec![cost; len[0] * len[1]], - is_exact: bitvec![usize, LocalBits; 0; len[0] * len[1]], + len: dim, + data: vec![cost; dim[0].as_usize() * dim[1].as_usize()], + cost_order_permutation: vec![Vec::new(); dim[0].as_usize()], + is_exact: bitvec![usize, LocalBits; 0; dim[0].as_usize() * dim[1].as_usize()], } } - pub fn is_exact(&self, c1: usize, c2: usize) -> bool { + pub fn is_exact(&self, c1: AnchorIndex, c2: AnchorIndex) -> bool { self.is_exact[coordinates_to_index(c1, c2, self.len)] } - pub fn set_exact(&mut self, c1: usize, c2: usize) { + pub fn set_exact(&mut self, c1: AnchorIndex, c2: AnchorIndex) { debug_assert!(!self.is_exact(c1, c2)); self.is_exact .set(coordinates_to_index(c1, c2, self.len), true); } - pub fn dim(&self) -> (usize, usize) { + pub fn dim(&self) -> (AnchorIndex, AnchorIndex) { (self.len[0], self.len[1]) } + + fn compute_cost_order_permutation_if_missing(&mut self, c1: AnchorIndex) + where + Cost: Copy + Ord, + { + if self.cost_order_permutation[c1.as_usize()].is_empty() { + self.cost_order_permutation[c1.as_usize()].extend( + ((0..c1.as_usize()).chain(c1.as_usize() + 1..)) + .take(self.len[1].as_usize() - 1) + .map(AnchorIndex::from), + ); + self.cost_order_permutation[c1.as_usize()] + .sort_unstable_by_key(|c2| self.data[coordinates_to_index(c1, *c2, self.len)]); + } + } + + /// Iterate over all ordinates `c2` that belong to this `c1` in order of their cost. + #[expect(dead_code)] + pub fn iter_in_cost_order( + &mut self, + c1: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.iter_in_cost_order_from(c1, AnchorIndex::zero()) + } + + pub fn iter_in_cost_order_from( + &mut self, + c1: AnchorIndex, + offset: AnchorIndex, + ) -> impl Iterator + where + Cost: Copy + Ord, + { + self.compute_cost_order_permutation_if_missing(c1); + let data = &self.data; + let len = self.len; + self.cost_order_permutation[c1.as_usize()] + .iter() + .copied() + .skip(offset.as_usize()) + .map(move |c2| (c2, data[coordinates_to_index(c1, c2, len)])) + } } -impl Index<[usize; 2]> for CostArray2D { +impl Index<[AnchorIndex; 2]> for ChainingCostArray { type Output = Cost; - fn index(&self, index: [usize; 2]) -> &Self::Output { + fn index(&self, index: [AnchorIndex; 2]) -> &Self::Output { &self.data[coordinates_to_index(index[0], index[1], self.len)] } } -impl IndexMut<[usize; 2]> for CostArray2D { - fn index_mut(&mut self, index: [usize; 2]) -> &mut Self::Output { +impl IndexMut<[AnchorIndex; 2]> for ChainingCostArray { + fn index_mut(&mut self, index: [AnchorIndex; 2]) -> &mut Self::Output { + self.cost_order_permutation[index[0].as_usize()].clear(); &mut self.data[coordinates_to_index(index[0], index[1], self.len)] } } #[inline] -fn coordinates_to_index(c1: usize, c2: usize, len: [usize; 2]) -> usize { - c1 * len[1] + c2 +fn coordinates_to_index(c1: AnchorIndex, c2: AnchorIndex, len: [AnchorIndex; 2]) -> usize { + c1.as_usize() * len[1].as_usize() + c2.as_usize() } diff --git a/lib_ts_chainalign/src/lib.rs b/lib_ts_chainalign/src/lib.rs index cbfe68e1..d0086366 100644 --- a/lib_ts_chainalign/src/lib.rs +++ b/lib_ts_chainalign/src/lib.rs @@ -9,6 +9,7 @@ use log::{debug, info, trace}; use crate::{ alignment::{coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, anchors::Anchors, + chain_align::AlignmentParameters, chaining_cost_function::ChainingCostFunction, chaining_lower_bounds::ChainingLowerBounds, costs::AlignmentCosts, @@ -36,10 +37,12 @@ pub fn preprocess( ChainingLowerBounds::new(max_n, max_match_run, alignment_costs) } +#[expect(clippy::too_many_arguments)] pub fn align( reference: Vec, query: Vec, range: AlignmentRange, + parameters: &AlignmentParameters, rc_fn: &dyn Fn(u8) -> u8, reference_name: &str, query_name: &str, @@ -71,6 +74,7 @@ pub fn align( &sequences, start, end, + parameters, chaining_lower_bounds.alignment_costs(), rc_fn, chaining_lower_bounds.max_match_run(), diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 60175142..2289e565 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.85.1" +channel = "1.92.0" components = ["rust-analyzer"] diff --git a/tsalign/src/align.rs b/tsalign/src/align.rs index 1bda7dca..1793a9e6 100644 --- a/tsalign/src/align.rs +++ b/tsalign/src/align.rs @@ -109,6 +109,13 @@ pub struct Cli { #[clap(long, default_value = "maximise")] ts_total_length_strategy: TemplateSwitchTotalLengthStrategySelector, + /// The maximum amount of successors to generate while processing a node during chaining. + /// Can be tuned to optimise performance. + /// + /// This applies only to tschainalign. + #[clap(long, default_value = "5")] + max_chaining_successors: usize, + /// If set, template switches are not allowed. /// /// Use this to compare a template switch alignment against an alignment with out template switches. diff --git a/tsalign/src/align/a_star_chain_ts.rs b/tsalign/src/align/a_star_chain_ts.rs index 9620611c..1d0a9caa 100644 --- a/tsalign/src/align/a_star_chain_ts.rs +++ b/tsalign/src/align/a_star_chain_ts.rs @@ -2,7 +2,10 @@ use compact_genome::interface::{ alphabet::{Alphabet, AlphabetCharacter}, sequence::GenomeSequence, }; -use lib_ts_chainalign::{chaining_lower_bounds::ChainingLowerBounds, costs::AlignmentCosts}; +use lib_ts_chainalign::{ + chain_align::AlignmentParameters, chaining_lower_bounds::ChainingLowerBounds, + costs::AlignmentCosts, +}; use lib_tsalign::{ a_star_aligner::alignment_geometry::AlignmentRange, config::TemplateSwitchConfig, }; @@ -90,10 +93,15 @@ pub fn align_a_star_chain_ts< let reference = reference.clone_as_vec(); let query = query.clone_as_vec(); + let parameters = AlignmentParameters { + max_successors: cli.max_chaining_successors, + }; + let alignment = lib_ts_chainalign::align::( reference, query, range, + ¶meters, &|c| { AlphabetType::character_to_ascii( AlphabetType::ascii_to_character(c).unwrap().complement(),