From 1fe99a2bdf1bd96af14df1c999615178753c3e2a Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 09:21:08 +0200 Subject: [PATCH 01/13] Compute statistics about gap fillings per chain. --- lib_ts_chainalign/src/chain_align.rs | 41 ++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index b7d5acb..f5d40fe 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -195,6 +195,8 @@ fn actually_align< ); let mut astar = AStar::<_, ClosedList, OpenList>::new(context); let mut chaining_execution_count = 0; + let mut total_chain_gap_fillings = 0; + let mut total_chain_gaps = 0; let mut current_lower_bound = Cost::zero(); let mut current_upper_bound = Cost::max_value(); let mut total_chaining_opened_nodes = 0; @@ -314,6 +316,8 @@ fn actually_align< rc_fn, max_match_run, astar.context_mut().chaining_cost_function, + &mut total_chain_gap_fillings, + &mut total_chain_gaps, false, ); let cost_increased = evaluated_cost > current_lower_bound; @@ -338,6 +342,11 @@ fn actually_align< total_chaining_suboptimal_opened_nodes as f64 / total_chaining_opened_nodes as f64 * 100.0, ); debug!("Chaining closed nodes: {total_chaining_closed_nodes}"); + debug!("Total chain gaps: {total_chain_gaps}"); + debug!( + "Total chain gap fillings: {total_chain_gap_fillings} ({:.0}%)", + total_chain_gap_fillings as f64 / total_chain_gaps as f64 * 100.0 + ); let mut tsalign_alignment = lib_tsalign::a_star_aligner::alignment_result::alignment::Alignment::new(); @@ -355,6 +364,8 @@ fn actually_align< rc_fn, max_match_run, astar.context_mut().chaining_cost_function, + &mut 0, + &mut 0, true, ); assert_eq!(evaluated_cost, result.cost()); @@ -468,6 +479,8 @@ fn evaluate_chain( rc_fn: &dyn Fn(u8) -> u8, max_match_run: u32, chaining_cost_function: &mut ChainingCostFunction, + total_chain_gap_fillings: &mut u64, + total_chain_gaps: &mut u64, final_evaluation: bool, ) -> (Cost, Vec) { let k = usize::try_from(max_match_run + 1).unwrap(); @@ -496,6 +509,7 @@ fn evaluate_chain( break; }; current_from_index = to_anchor_index; + *total_chain_gaps += 1; match (from_anchor, to_anchor) { (Identifier::Start, Identifier::End) => { @@ -508,6 +522,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); + *total_chain_gap_fillings += 1; trace!("Aligning from start to end costs {}", alignment.cost()); if !final_evaluation { chaining_cost_function.update_start_to_end(alignment.cost(), true); @@ -531,6 +546,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); + *total_chain_gap_fillings += 1; trace!( "Aligning from start to P{index}{} costs {}", anchors.primary(index), @@ -564,6 +580,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); + *total_chain_gap_fillings += 1; trace!( "Aligning from start to S{}[{index}]{} costs {}", ts_kind.digits(), @@ -593,6 +610,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); + *total_chain_gap_fillings += 1; trace!( "Aligning from P{index}{} to end costs {}", anchors.primary(index), @@ -619,6 +637,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); + *total_chain_gap_fillings += 1; trace!( "Aligning from S{}[{index}]{} to end costs {}", ts_kind.digits(), @@ -670,6 +689,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); + *total_chain_gap_fillings += 1; trace!( "Aligning from P{from_index}{} to P{to_index}{} (from {start} to {end}) costs {}", anchors.primary(from_index), @@ -728,6 +748,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); + *total_chain_gap_fillings += 1; if !final_evaluation { chaining_cost_function.update_jump_12( from_index, @@ -788,6 +809,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); + *total_chain_gap_fillings += 1; trace!( "Aligning from S{}[{from_index}]{} to S{}[{to_index}]{} costs {}", ts_kind.digits(), @@ -837,15 +859,7 @@ fn evaluate_chain( rc_fn, max_match_run, ); - if !final_evaluation { - chaining_cost_function.update_jump_34( - from_index, - to_index, - ts_kind, - alignment.cost(), - true, - ); - } + *total_chain_gap_fillings += 1; trace!( "Aligning from S{}[{from_index}]{} to P{to_index}{} (S{} to P{}) costs {}", ts_kind.digits(), @@ -855,6 +869,15 @@ fn evaluate_chain( end, alignment.cost() ); + if !final_evaluation { + chaining_cost_function.update_jump_34( + from_index, + to_index, + ts_kind, + alignment.cost(), + true, + ); + } alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment.alignment().clone()); } From ac405fbe096960b9391aee67a0621c710292f22e Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 10:15:05 +0200 Subject: [PATCH 02/13] Refactor and make exact chaining caching. --- lib_ts_chainalign/src/alignment.rs | 12 +- lib_ts_chainalign/src/chain_align.rs | 467 +----------------- .../src/chain_align/evaluation.rs | 407 +++++++++++++++ .../src/exact_chaining/gap_affine.rs | 94 ++-- .../src/exact_chaining/gap_affine/tests.rs | 115 ++--- .../src/exact_chaining/ts_12_jump.rs | 91 ++-- .../src/exact_chaining/ts_12_jump/tests.rs | 83 ++-- .../src/exact_chaining/ts_34_jump.rs | 96 ++-- .../src/exact_chaining/ts_34_jump/tests.rs | 83 ++-- 9 files changed, 690 insertions(+), 758 deletions(-) create mode 100644 lib_ts_chainalign/src/chain_align/evaluation.rs diff --git a/lib_ts_chainalign/src/alignment.rs b/lib_ts_chainalign/src/alignment.rs index 001f72d..c98b2b8 100644 --- a/lib_ts_chainalign/src/alignment.rs +++ b/lib_ts_chainalign/src/alignment.rs @@ -1,6 +1,6 @@ //! Representation of an alignment. -use std::fmt::Display; +use std::{fmt::Display, iter}; use crate::alignment::ts_kind::TsKind; @@ -32,6 +32,16 @@ pub struct Alignment { pub alignment: Vec<(usize, AlignmentType)>, } +impl Alignment { + pub fn iter_unpacked(&self) -> impl Iterator { + self.alignment + .iter() + .flat_map(|(multiplicity, alignment_type)| { + iter::repeat_n(*alignment_type, *multiplicity) + }) + } +} + impl Display for GapType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index f5d40fe..6ba0acc 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -20,29 +20,25 @@ use lib_tsalign::a_star_aligner::{ template_switch_distance::{EqualCostRange, TemplateSwitchDirection}, }; use log::{debug, trace}; -use num_traits::Zero; use rustc_hash::FxHashMapSeed; use std::{ fmt::Write, - iter, time::{Duration, Instant}, }; use crate::{ - alignment::{ - Alignment, AlignmentType, coordinates::AlignmentCoordinates, sequences::AlignmentSequences, - }, + alignment::{AlignmentType, coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, anchors::Anchors, - chain_align::chainer::{Context, Identifier, Node, closed_list::ChainerClosedList}, + chain_align::{ + chainer::{Context, Identifier, Node, closed_list::ChainerClosedList}, + evaluation::ChainEvaluator, + }, chaining_cost_function::ChainingCostFunction, costs::AlignmentCosts, - exact_chaining::{ - gap_affine::GapAffineAlignment, ts_12_jump::Ts12JumpAlignment, - ts_34_jump::Ts34JumpAlignment, - }, }; mod chainer; +mod evaluation; pub struct AlignmentParameters { /// The step width for generating successors during chaining. @@ -194,6 +190,9 @@ fn actually_align< parameters.max_successors, ); let mut astar = AStar::<_, ClosedList, OpenList>::new(context); + + let mut chain_evaluator = ChainEvaluator::new(sequences, alignment_costs, rc_fn, max_match_run); + let mut chaining_execution_count = 0; let mut total_chain_gap_fillings = 0; let mut total_chain_gaps = 0; @@ -306,14 +305,11 @@ fn actually_align< let evaluation_start_time = Instant::now(); - let (evaluated_cost, _) = evaluate_chain( + let (evaluated_cost, _) = chain_evaluator.evaluate_chain( anchors, &chain, - sequences, start, end, - alignment_costs, - rc_fn, max_match_run, astar.context_mut().chaining_cost_function, &mut total_chain_gap_fillings, @@ -354,14 +350,11 @@ fn actually_align< let mut anti_primary_gap = 0isize; debug!("Evaluating final chain"); - let (evaluated_cost, alignments) = evaluate_chain( + let (evaluated_cost, alignments) = chain_evaluator.evaluate_chain( anchors, &chain, - sequences, start, end, - alignment_costs, - rc_fn, max_match_run, astar.context_mut().chaining_cost_function, &mut 0, @@ -467,441 +460,3 @@ fn actually_align< sequences.seq2().len(), ) } - -#[expect(clippy::too_many_arguments)] -fn evaluate_chain( - anchors: &Anchors, - chain: &[Identifier], - sequences: &AlignmentSequences, - start: AlignmentCoordinates, - end: AlignmentCoordinates, - alignment_costs: &AlignmentCosts, - rc_fn: &dyn Fn(u8) -> u8, - max_match_run: u32, - chaining_cost_function: &mut ChainingCostFunction, - total_chain_gap_fillings: &mut u64, - total_chain_gaps: &mut u64, - final_evaluation: bool, -) -> (Cost, Vec) { - let k = usize::try_from(max_match_run + 1).unwrap(); - let mut current_upper_bound = Cost::zero(); - let mut alignments = Vec::new(); - let mut current_from_index = 0; - - loop { - let from_anchor = chain[current_from_index]; - let Some((to_anchor_index, to_anchor)) = chain - .iter() - .copied() - .enumerate() - .skip(current_from_index + 1) - .find(|(_, identifier)| match identifier { - Identifier::Start => true, - Identifier::StartToPrimary { .. } => false, - Identifier::StartToSecondary { .. } => false, - Identifier::PrimaryToPrimary { offset, .. } - | Identifier::PrimaryToSecondary { offset, .. } - | Identifier::SecondaryToSecondary { offset, .. } - | Identifier::SecondaryToPrimary { offset, .. } => offset.is_zero(), - Identifier::End => true, - }) - else { - break; - }; - current_from_index = to_anchor_index; - *total_chain_gaps += 1; - - match (from_anchor, to_anchor) { - (Identifier::Start, Identifier::End) => { - if final_evaluation || !chaining_cost_function.is_start_to_end_exact() { - let alignment = GapAffineAlignment::new( - start, - end, - sequences, - &alignment_costs.primary_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - trace!("Aligning from start to end costs {}", alignment.cost()); - if !final_evaluation { - chaining_cost_function.update_start_to_end(alignment.cost(), true); - } - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += chaining_cost_function.start_to_end(); - } - ( - Identifier::Start, - Identifier::PrimaryToPrimary { index, .. } - | Identifier::PrimaryToSecondary { index, .. }, - ) => { - let end = anchors.primary(index).start(); - if final_evaluation || !chaining_cost_function.is_primary_from_start_exact(index) { - let alignment = GapAffineAlignment::new( - start, - end, - sequences, - &alignment_costs.primary_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - trace!( - "Aligning from start to P{index}{} costs {}", - anchors.primary(index), - alignment.cost() - ); - if !final_evaluation { - chaining_cost_function.update_primary_from_start( - index, - alignment.cost(), - true, - ); - } - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += chaining_cost_function.primary_from_start(index); - } - ( - Identifier::Start, - Identifier::SecondaryToPrimary { index, ts_kind, .. } - | Identifier::SecondaryToSecondary { index, ts_kind, .. }, - ) => { - let end = anchors.secondary(index, ts_kind).start(ts_kind); - if final_evaluation - || !chaining_cost_function.is_jump_12_from_start_exact(index, ts_kind) - { - let alignment = Ts12JumpAlignment::new( - start, - end, - sequences, - alignment_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - trace!( - "Aligning from start to S{}[{index}]{} costs {}", - ts_kind.digits(), - anchors.secondary(index, ts_kind), - alignment.cost() - ); - if !final_evaluation { - chaining_cost_function.update_jump_12_from_start( - index, - ts_kind, - alignment.cost(), - true, - ); - } - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += chaining_cost_function.jump_12_from_start(index, ts_kind); - } - (Identifier::PrimaryToPrimary { index, .. }, Identifier::End) => { - let start = anchors.primary(index).end(k); - if final_evaluation || !chaining_cost_function.is_primary_to_end_exact(index) { - let alignment = GapAffineAlignment::new( - start, - end, - sequences, - &alignment_costs.primary_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - trace!( - "Aligning from P{index}{} to end costs {}", - anchors.primary(index), - alignment.cost() - ); - if !final_evaluation { - chaining_cost_function.update_primary_to_end(index, alignment.cost(), true); - } - alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += chaining_cost_function.primary_to_end(index); - } - (Identifier::SecondaryToPrimary { index, ts_kind, .. }, Identifier::End) => { - let start = anchors.secondary(index, ts_kind).end(ts_kind, k); - if final_evaluation - || !chaining_cost_function.is_jump_34_to_end_exact(index, ts_kind) - { - let alignment = Ts34JumpAlignment::new( - start, - end, - sequences, - alignment_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - trace!( - "Aligning from S{}[{index}]{} to end costs {}", - ts_kind.digits(), - anchors.secondary(index, ts_kind), - alignment.cost() - ); - if !final_evaluation { - chaining_cost_function.update_jump_34_to_end( - index, - ts_kind, - alignment.cost(), - true, - ); - } - alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += chaining_cost_function.jump_34_to_end(index, ts_kind); - } - ( - Identifier::PrimaryToPrimary { - index: from_index, .. - }, - Identifier::PrimaryToPrimary { - index: to_index, .. - } - | Identifier::PrimaryToSecondary { - index: to_index, .. - }, - ) => { - if anchors - .primary(from_index) - .is_direct_predecessor_of(anchors.primary(to_index)) - { - alignments.push(Alignment::from(vec![AlignmentType::Match])); - continue; - } - - let start = anchors.primary(from_index).end(k); - let end = anchors.primary(to_index).start(); - if final_evaluation - || !chaining_cost_function.is_primary_exact(from_index, to_index) - { - let alignment = GapAffineAlignment::new( - start, - end, - sequences, - &alignment_costs.primary_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - trace!( - "Aligning from P{from_index}{} to P{to_index}{} (from {start} to {end}) costs {}", - anchors.primary(from_index), - anchors.primary(to_index), - alignment.cost() - ); - - if end.primary_ordinate_a().unwrap() - start.primary_ordinate_a().unwrap() - > usize::try_from(max_match_run).unwrap() - { - assert!( - !alignment.cost().is_zero(), - "Alignment is longer than max_match_run, but has zero cost: {}", - alignment.alignment() - ); - } - - if !final_evaluation { - chaining_cost_function.update_primary( - from_index, - to_index, - alignment.cost(), - true, - ); - } - alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += chaining_cost_function.primary(from_index, to_index); - } - ( - Identifier::PrimaryToSecondary { - index: from_index, .. - }, - Identifier::SecondaryToSecondary { - index: to_index, - ts_kind, - .. - } - | Identifier::SecondaryToPrimary { - index: to_index, - ts_kind, - .. - }, - ) => { - let start = anchors.primary(from_index).end(k); - let end = anchors.secondary(to_index, ts_kind).start(ts_kind); - if final_evaluation - || !chaining_cost_function.is_jump_12_exact(from_index, to_index, ts_kind) - { - let alignment = Ts12JumpAlignment::new( - start, - end, - sequences, - alignment_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - if !final_evaluation { - chaining_cost_function.update_jump_12( - from_index, - to_index, - ts_kind, - alignment.cost(), - true, - ); - } - trace!( - "Aligning from P{from_index}{} to S{}[{to_index}]{} costs {}", - anchors.primary(from_index), - ts_kind.digits(), - anchors.secondary(to_index, ts_kind), - alignment.cost() - ); - alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += - chaining_cost_function.jump_12(from_index, to_index, ts_kind); - } - ( - Identifier::SecondaryToSecondary { - index: from_index, - ts_kind, - .. - }, - Identifier::SecondaryToSecondary { - index: to_index, - ts_kind: to_ts_kind, - .. - } - | Identifier::SecondaryToPrimary { - index: to_index, - ts_kind: to_ts_kind, - .. - }, - ) => { - assert_eq!(ts_kind, to_ts_kind); - if anchors - .secondary(from_index, ts_kind) - .is_direct_predecessor_of(anchors.secondary(to_index, ts_kind)) - { - alignments.push(Alignment::from(vec![AlignmentType::Match])); - continue; - } - let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); - let end = anchors.secondary(to_index, ts_kind).start(ts_kind); - if final_evaluation - || !chaining_cost_function.is_secondary_exact(from_index, to_index, ts_kind) - { - let alignment = GapAffineAlignment::new( - start, - end, - sequences, - &alignment_costs.secondary_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - trace!( - "Aligning from S{}[{from_index}]{} to S{}[{to_index}]{} costs {}", - ts_kind.digits(), - anchors.secondary(from_index, ts_kind), - ts_kind.digits(), - anchors.secondary(to_index, ts_kind), - alignment.cost() - ); - if !final_evaluation { - chaining_cost_function.update_secondary( - from_index, - to_index, - ts_kind, - alignment.cost(), - true, - ); - } - alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += - chaining_cost_function.secondary(from_index, to_index, ts_kind); - } - ( - Identifier::SecondaryToPrimary { - index: from_index, - ts_kind, - .. - }, - Identifier::PrimaryToPrimary { - index: to_index, .. - } - | Identifier::PrimaryToSecondary { - index: to_index, .. - }, - ) => { - let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); - let end = anchors.primary(to_index).start(); - if final_evaluation - || !chaining_cost_function.is_jump_34_exact(from_index, to_index, ts_kind) - { - let alignment = Ts34JumpAlignment::new( - start, - end, - sequences, - alignment_costs, - rc_fn, - max_match_run, - ); - *total_chain_gap_fillings += 1; - trace!( - "Aligning from S{}[{from_index}]{} to P{to_index}{} (S{} to P{}) costs {}", - ts_kind.digits(), - anchors.secondary(from_index, ts_kind), - anchors.primary(to_index), - start, - end, - alignment.cost() - ); - if !final_evaluation { - chaining_cost_function.update_jump_34( - from_index, - to_index, - ts_kind, - alignment.cost(), - true, - ); - } - alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); - alignments.push(alignment.alignment().clone()); - } - current_upper_bound += - chaining_cost_function.jump_34(from_index, to_index, ts_kind); - } - (Identifier::End, _) - | (_, Identifier::Start) - | ( - Identifier::SecondaryToPrimary { .. } | Identifier::PrimaryToPrimary { .. }, - Identifier::SecondaryToPrimary { .. } | Identifier::SecondaryToSecondary { .. }, - ) - | ( - Identifier::PrimaryToSecondary { .. } | Identifier::SecondaryToSecondary { .. }, - Identifier::PrimaryToPrimary { .. } - | Identifier::PrimaryToSecondary { .. } - | Identifier::End, - ) - | (Identifier::StartToPrimary { .. } | Identifier::StartToSecondary { .. }, _) - | (_, Identifier::StartToPrimary { .. } | Identifier::StartToSecondary { .. }) => { - unreachable!() - } - } - } - - (current_upper_bound, alignments) -} diff --git a/lib_ts_chainalign/src/chain_align/evaluation.rs b/lib_ts_chainalign/src/chain_align/evaluation.rs new file mode 100644 index 0000000..274b833 --- /dev/null +++ b/lib_ts_chainalign/src/chain_align/evaluation.rs @@ -0,0 +1,407 @@ +use std::iter; + +use generic_a_star::cost::AStarCost; +use log::trace; +use num_traits::Zero; + +use crate::{ + alignment::{ + Alignment, AlignmentType, coordinates::AlignmentCoordinates, sequences::AlignmentSequences, + }, + anchors::Anchors, + chain_align::chainer::Identifier, + chaining_cost_function::ChainingCostFunction, + costs::AlignmentCosts, + exact_chaining::{ + gap_affine::GapAffineAligner, ts_12_jump::Ts12JumpAligner, ts_34_jump::Ts34JumpAligner, + }, +}; + +pub struct ChainEvaluator<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> { + primary_aligner: GapAffineAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, + secondary_aligner: GapAffineAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, + ts_12_jump_aligner: Ts12JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, + ts_34_jump_aligner: Ts34JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, +} + +impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> + ChainEvaluator<'sequences, 'alignment_costs, 'rc_fn, Cost> +{ + pub fn new( + sequences: &'sequences AlignmentSequences, + alignment_costs: &'alignment_costs AlignmentCosts, + rc_fn: &'rc_fn dyn Fn(u8) -> u8, + max_match_run: u32, + ) -> Self { + Self { + primary_aligner: GapAffineAligner::new( + sequences, + &alignment_costs.primary_costs, + rc_fn, + max_match_run, + ), + secondary_aligner: GapAffineAligner::new( + sequences, + &alignment_costs.secondary_costs, + rc_fn, + max_match_run, + ), + ts_12_jump_aligner: Ts12JumpAligner::new( + sequences, + alignment_costs, + rc_fn, + max_match_run, + ), + ts_34_jump_aligner: Ts34JumpAligner::new( + sequences, + alignment_costs, + rc_fn, + max_match_run, + ), + } + } + + #[expect(clippy::too_many_arguments)] + pub fn evaluate_chain( + &mut self, + anchors: &Anchors, + chain: &[Identifier], + start: AlignmentCoordinates, + end: AlignmentCoordinates, + max_match_run: u32, + chaining_cost_function: &mut ChainingCostFunction, + total_chain_gap_fillings: &mut u64, + total_chain_gaps: &mut u64, + final_evaluation: bool, + ) -> (Cost, Vec) { + let k = usize::try_from(max_match_run + 1).unwrap(); + let mut current_upper_bound = Cost::zero(); + let mut alignments = Vec::new(); + let mut current_from_index = 0; + + loop { + let from_anchor = chain[current_from_index]; + let Some((to_anchor_index, to_anchor)) = chain + .iter() + .copied() + .enumerate() + .skip(current_from_index + 1) + .find(|(_, identifier)| match identifier { + Identifier::Start => true, + Identifier::StartToPrimary { .. } => false, + Identifier::StartToSecondary { .. } => false, + Identifier::PrimaryToPrimary { offset, .. } + | Identifier::PrimaryToSecondary { offset, .. } + | Identifier::SecondaryToSecondary { offset, .. } + | Identifier::SecondaryToPrimary { offset, .. } => offset.is_zero(), + Identifier::End => true, + }) + else { + break; + }; + current_from_index = to_anchor_index; + *total_chain_gaps += 1; + + match (from_anchor, to_anchor) { + (Identifier::Start, Identifier::End) => { + if final_evaluation || !chaining_cost_function.is_start_to_end_exact() { + let (cost, alignment) = self.primary_aligner.align(start, end); + *total_chain_gap_fillings += 1; + trace!("Aligning from start to end costs {}", cost); + if !final_evaluation { + chaining_cost_function.update_start_to_end(cost, true); + } + alignments.push(alignment); + } + current_upper_bound += chaining_cost_function.start_to_end(); + } + ( + Identifier::Start, + Identifier::PrimaryToPrimary { index, .. } + | Identifier::PrimaryToSecondary { index, .. }, + ) => { + let end = anchors.primary(index).start(); + if final_evaluation + || !chaining_cost_function.is_primary_from_start_exact(index) + { + let (cost, alignment) = self.primary_aligner.align(start, end); + *total_chain_gap_fillings += 1; + trace!( + "Aligning from start to P{index}{} costs {}", + anchors.primary(index), + cost + ); + if !final_evaluation { + chaining_cost_function.update_primary_from_start(index, cost, true); + } + alignments.push(alignment); + } + current_upper_bound += chaining_cost_function.primary_from_start(index); + } + ( + Identifier::Start, + Identifier::SecondaryToPrimary { index, ts_kind, .. } + | Identifier::SecondaryToSecondary { index, ts_kind, .. }, + ) => { + let end = anchors.secondary(index, ts_kind).start(ts_kind); + if final_evaluation + || !chaining_cost_function.is_jump_12_from_start_exact(index, ts_kind) + { + let (cost, alignment) = self.ts_12_jump_aligner.align(start, end); + *total_chain_gap_fillings += 1; + trace!( + "Aligning from start to S{}[{index}]{} costs {}", + ts_kind.digits(), + anchors.secondary(index, ts_kind), + cost + ); + if !final_evaluation { + chaining_cost_function + .update_jump_12_from_start(index, ts_kind, cost, true); + } + alignments.push(alignment); + } + current_upper_bound += + chaining_cost_function.jump_12_from_start(index, ts_kind); + } + (Identifier::PrimaryToPrimary { index, .. }, Identifier::End) => { + let start = anchors.primary(index).end(k); + if final_evaluation || !chaining_cost_function.is_primary_to_end_exact(index) { + let (cost, alignment) = self.primary_aligner.align(start, end); + *total_chain_gap_fillings += 1; + trace!( + "Aligning from P{index}{} to end costs {}", + anchors.primary(index), + cost + ); + if !final_evaluation { + chaining_cost_function.update_primary_to_end(index, cost, true); + } + alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); + alignments.push(alignment); + } + current_upper_bound += chaining_cost_function.primary_to_end(index); + } + (Identifier::SecondaryToPrimary { index, ts_kind, .. }, Identifier::End) => { + let start = anchors.secondary(index, ts_kind).end(ts_kind, k); + if final_evaluation + || !chaining_cost_function.is_jump_34_to_end_exact(index, ts_kind) + { + let (cost, alignment) = self.ts_34_jump_aligner.align(start, end); + *total_chain_gap_fillings += 1; + trace!( + "Aligning from S{}[{index}]{} to end costs {}", + ts_kind.digits(), + anchors.secondary(index, ts_kind), + cost + ); + if !final_evaluation { + chaining_cost_function + .update_jump_34_to_end(index, ts_kind, cost, true); + } + alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); + alignments.push(alignment); + } + current_upper_bound += chaining_cost_function.jump_34_to_end(index, ts_kind); + } + ( + Identifier::PrimaryToPrimary { + index: from_index, .. + }, + Identifier::PrimaryToPrimary { + index: to_index, .. + } + | Identifier::PrimaryToSecondary { + index: to_index, .. + }, + ) => { + if anchors + .primary(from_index) + .is_direct_predecessor_of(anchors.primary(to_index)) + { + alignments.push(Alignment::from(vec![AlignmentType::Match])); + continue; + } + + let start = anchors.primary(from_index).end(k); + let end = anchors.primary(to_index).start(); + if final_evaluation + || !chaining_cost_function.is_primary_exact(from_index, to_index) + { + let (cost, alignment) = self.primary_aligner.align(start, end); + *total_chain_gap_fillings += 1; + trace!( + "Aligning from P{from_index}{} to P{to_index}{} (from {start} to {end}) costs {}", + anchors.primary(from_index), + anchors.primary(to_index), + cost + ); + + if end.primary_ordinate_a().unwrap() - start.primary_ordinate_a().unwrap() + > usize::try_from(max_match_run).unwrap() + { + assert!( + !cost.is_zero(), + "Alignment is longer than max_match_run, but has zero cost: {}", + alignment + ); + } + + if !final_evaluation { + chaining_cost_function.update_primary(from_index, to_index, cost, true); + } + alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); + alignments.push(alignment); + } + current_upper_bound += chaining_cost_function.primary(from_index, to_index); + } + ( + Identifier::PrimaryToSecondary { + index: from_index, .. + }, + Identifier::SecondaryToSecondary { + index: to_index, + ts_kind, + .. + } + | Identifier::SecondaryToPrimary { + index: to_index, + ts_kind, + .. + }, + ) => { + let start = anchors.primary(from_index).end(k); + let end = anchors.secondary(to_index, ts_kind).start(ts_kind); + if final_evaluation + || !chaining_cost_function.is_jump_12_exact(from_index, to_index, ts_kind) + { + let (cost, alignment) = self.ts_12_jump_aligner.align(start, end); + *total_chain_gap_fillings += 1; + if !final_evaluation { + chaining_cost_function + .update_jump_12(from_index, to_index, ts_kind, cost, true); + } + trace!( + "Aligning from P{from_index}{} to S{}[{to_index}]{} costs {}", + anchors.primary(from_index), + ts_kind.digits(), + anchors.secondary(to_index, ts_kind), + cost + ); + alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); + alignments.push(alignment); + } + current_upper_bound += + chaining_cost_function.jump_12(from_index, to_index, ts_kind); + } + ( + Identifier::SecondaryToSecondary { + index: from_index, + ts_kind, + .. + }, + Identifier::SecondaryToSecondary { + index: to_index, + ts_kind: to_ts_kind, + .. + } + | Identifier::SecondaryToPrimary { + index: to_index, + ts_kind: to_ts_kind, + .. + }, + ) => { + assert_eq!(ts_kind, to_ts_kind); + if anchors + .secondary(from_index, ts_kind) + .is_direct_predecessor_of(anchors.secondary(to_index, ts_kind)) + { + alignments.push(Alignment::from(vec![AlignmentType::Match])); + continue; + } + let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); + let end = anchors.secondary(to_index, ts_kind).start(ts_kind); + if final_evaluation + || !chaining_cost_function.is_secondary_exact(from_index, to_index, ts_kind) + { + let (cost, alignment) = self.secondary_aligner.align(start, end); + *total_chain_gap_fillings += 1; + trace!( + "Aligning from S{}[{from_index}]{} to S{}[{to_index}]{} costs {}", + ts_kind.digits(), + anchors.secondary(from_index, ts_kind), + ts_kind.digits(), + anchors.secondary(to_index, ts_kind), + cost + ); + if !final_evaluation { + chaining_cost_function + .update_secondary(from_index, to_index, ts_kind, cost, true); + } + alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); + alignments.push(alignment); + } + current_upper_bound += + chaining_cost_function.secondary(from_index, to_index, ts_kind); + } + ( + Identifier::SecondaryToPrimary { + index: from_index, + ts_kind, + .. + }, + Identifier::PrimaryToPrimary { + index: to_index, .. + } + | Identifier::PrimaryToSecondary { + index: to_index, .. + }, + ) => { + let start = anchors.secondary(from_index, ts_kind).end(ts_kind, k); + let end = anchors.primary(to_index).start(); + if final_evaluation + || !chaining_cost_function.is_jump_34_exact(from_index, to_index, ts_kind) + { + let (cost, alignment) = self.ts_34_jump_aligner.align(start, end); + *total_chain_gap_fillings += 1; + trace!( + "Aligning from S{}[{from_index}]{} to P{to_index}{} (S{} to P{}) costs {}", + ts_kind.digits(), + anchors.secondary(from_index, ts_kind), + anchors.primary(to_index), + start, + end, + cost, + ); + if !final_evaluation { + chaining_cost_function + .update_jump_34(from_index, to_index, ts_kind, cost, true); + } + alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); + alignments.push(alignment); + } + current_upper_bound += + chaining_cost_function.jump_34(from_index, to_index, ts_kind); + } + (Identifier::End, _) + | (_, Identifier::Start) + | ( + Identifier::SecondaryToPrimary { .. } | Identifier::PrimaryToPrimary { .. }, + Identifier::SecondaryToPrimary { .. } | Identifier::SecondaryToSecondary { .. }, + ) + | ( + Identifier::PrimaryToSecondary { .. } | Identifier::SecondaryToSecondary { .. }, + Identifier::PrimaryToPrimary { .. } + | Identifier::PrimaryToSecondary { .. } + | Identifier::End, + ) + | (Identifier::StartToPrimary { .. } | Identifier::StartToSecondary { .. }, _) + | (_, Identifier::StartToPrimary { .. } | Identifier::StartToSecondary { .. }) => { + unreachable!() + } + } + } + + (current_upper_bound, alignments) + } +} diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine.rs index 1f19336..fa151ec 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine.rs @@ -1,73 +1,73 @@ -use generic_a_star::{AStar, AStarResult, cost::AStarCost}; +use generic_a_star::{AStar, AStarBuffers, AStarResult, cost::AStarCost}; use crate::{ alignment::{Alignment, coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, costs::GapAffineCosts, - exact_chaining::gap_affine::algo::Context, + exact_chaining::gap_affine::algo::{Context, Node}, }; mod algo; #[cfg(test)] mod tests; -pub struct GapAffineAlignment { - start: AlignmentCoordinates, - end: AlignmentCoordinates, - alignment: Alignment, - cost: Cost, +pub struct GapAffineAligner<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> { + a_star_buffers: Option>>, + sequences: &'sequences AlignmentSequences, + cost_table: &'cost_table GapAffineCosts, + rc_fn: &'rc_fn dyn Fn(u8) -> u8, + max_match_run: u32, } -impl GapAffineAlignment { +impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> + GapAffineAligner<'sequences, 'cost_table, 'rc_fn, Cost> +{ pub fn new( - start: AlignmentCoordinates, - end: AlignmentCoordinates, - sequences: &AlignmentSequences, - cost_table: &GapAffineCosts, - rc_fn: &dyn Fn(u8) -> u8, + sequences: &'sequences AlignmentSequences, + cost_table: &'cost_table GapAffineCosts, + rc_fn: &'rc_fn dyn Fn(u8) -> u8, max_match_run: u32, ) -> Self { + Self { + a_star_buffers: Some(Default::default()), + sequences, + cost_table, + rc_fn, + max_match_run, + } + } + + pub fn align( + &mut self, + start: AlignmentCoordinates, + end: AlignmentCoordinates, + ) -> (Cost, Alignment) { assert!( start.is_primary() && end.is_primary() || start.is_secondary() && end.is_secondary() ); - let context = Context::new(cost_table, sequences, rc_fn, start, end, max_match_run); - let mut a_star = AStar::<_>::new(context); + let context = Context::new( + self.cost_table, + self.sequences, + self.rc_fn, + start, + end, + self.max_match_run, + ); + let mut a_star = AStar::new_with_buffers(context, self.a_star_buffers.take().unwrap()); + a_star.initialise(); - match a_star.search() { - AStarResult::FoundTarget { cost, .. } => Self { - start, - end, - alignment: a_star.reconstruct_path().into(), - cost: cost.0, - }, + let result = match a_star.search() { + AStarResult::FoundTarget { cost, .. } => { + let alignment = a_star.reconstruct_path().into(); + (cost.0, alignment) + } AStarResult::ExceededCostLimit { .. } => unreachable!("Cost limit is None"), AStarResult::ExceededMemoryLimit { .. } => unreachable!("Cost limit is None"), - AStarResult::NoTarget => Self { - start, - end, - alignment: Vec::new().into(), - cost: Cost::max_value(), - }, - } - } -} - -impl GapAffineAlignment { - pub fn start(&self) -> AlignmentCoordinates { - self.start - } + AStarResult::NoTarget => (Cost::max_value(), Vec::new().into()), + }; - pub fn end(&self) -> AlignmentCoordinates { - self.end - } - - pub fn alignment(&self) -> &Alignment { - &self.alignment - } -} + self.a_star_buffers = Some(a_star.into_buffers()); -impl GapAffineAlignment { - pub fn cost(&self) -> Cost { - self.cost + result } } diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs index 41a7978..59da36c 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs @@ -2,7 +2,7 @@ use generic_a_star::cost::U32Cost; use crate::alignment::AlignmentType; use crate::alignment::ts_kind::TsKind; -use crate::exact_chaining::gap_affine::{AlignmentCoordinates, GapAffineAlignment}; +use crate::exact_chaining::gap_affine::{AlignmentCoordinates, GapAffineAligner}; use crate::{alignment::sequences::AlignmentSequences, costs::GapAffineCosts}; fn rc_fn(c: u8) -> u8 { @@ -25,15 +25,14 @@ fn test_start_end() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_primary(4, 5); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![(4, AlignmentType::Match), (1, AlignmentType::GapA)] ); - assert_eq!(alignment.cost(), U32Cost::from(3u8)); + assert_eq!(cost, U32Cost::from(3u8)); } #[test] @@ -46,19 +45,18 @@ fn test_partial_alignment() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(4, 4); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Match), (1, AlignmentType::Substitution), (1, AlignmentType::Match) ] ); - assert_eq!(alignment.cost(), U32Cost::from(2u8)); + assert_eq!(cost, U32Cost::from(2u8)); } #[test] @@ -71,12 +69,11 @@ fn test_gap_directions() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(11, 11); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (2, AlignmentType::Match), (2, AlignmentType::GapB), @@ -85,7 +82,7 @@ fn test_gap_directions() { (1, AlignmentType::Match) ] ); - assert_eq!(alignment.cost(), U32Cost::from(8u8)); + assert_eq!(cost, U32Cost::from(8u8)); } #[test] @@ -98,19 +95,18 @@ fn test_extremity_gaps() { let start = AlignmentCoordinates::new_primary(3, 3); let end = AlignmentCoordinates::new_primary(10, 10); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (2, AlignmentType::GapB), (5, AlignmentType::Match), (2, AlignmentType::GapA), ] ); - assert_eq!(alignment.cost(), U32Cost::from(8u8)); + assert_eq!(cost, U32Cost::from(8u8)); } #[test] @@ -123,19 +119,18 @@ fn test_extremity_substitutions() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_primary(5, 5); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Substitution), (3, AlignmentType::Match), (1, AlignmentType::Substitution), ] ); - assert_eq!(alignment.cost(), U32Cost::from(4u8)); + assert_eq!(cost, U32Cost::from(4u8)); } #[test] @@ -148,17 +143,14 @@ fn test_substitutions_as_gaps() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_primary(20, 20); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert!( - alignment.alignment().alignment - == vec![(20, AlignmentType::GapA), (20, AlignmentType::GapB),] - || alignment.alignment().alignment - == vec![(20, AlignmentType::GapB), (20, AlignmentType::GapA),] + alignment.alignment == vec![(20, AlignmentType::GapA), (20, AlignmentType::GapB),] + || alignment.alignment == vec![(20, AlignmentType::GapB), (20, AlignmentType::GapA),] ); - assert_eq!(alignment.cost(), U32Cost::from(44u8)); + assert_eq!(cost, U32Cost::from(44u8)); } #[test] @@ -171,17 +163,14 @@ fn test_max_match_run_0() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(9, 9); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 0); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, 0); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert!( - alignment.alignment().alignment - == vec![(8, AlignmentType::GapA), (8, AlignmentType::GapB),] - || alignment.alignment().alignment - == vec![(8, AlignmentType::GapB), (8, AlignmentType::GapA),] + alignment.alignment == vec![(8, AlignmentType::GapA), (8, AlignmentType::GapB),] + || alignment.alignment == vec![(8, AlignmentType::GapB), (8, AlignmentType::GapA),] ); - assert_eq!(alignment.cost(), U32Cost::from(20u8)); + assert_eq!(cost, U32Cost::from(20u8)); } #[test] @@ -194,12 +183,11 @@ fn test_max_match_run_1() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(9, 9); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 1); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, 1); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Match), (1, AlignmentType::Substitution), @@ -211,7 +199,7 @@ fn test_max_match_run_1() { (1, AlignmentType::GapA), ] ); - assert_eq!(alignment.cost(), U32Cost::from(12u8)); + assert_eq!(cost, U32Cost::from(12u8)); } #[test] @@ -224,12 +212,11 @@ fn test_max_match_run_2() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(9, 9); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Match), (1, AlignmentType::Substitution), @@ -238,7 +225,7 @@ fn test_max_match_run_2() { (2, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(6u8)); + assert_eq!(cost, U32Cost::from(6u8)); } #[test] @@ -251,15 +238,11 @@ fn test_secondary_12() { let start = AlignmentCoordinates::new_secondary(9, 1, TsKind::TS12); let end = AlignmentCoordinates::new_secondary(1, 9, TsKind::TS12); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); - assert_eq!( - alignment.alignment().alignment, - vec![(8, AlignmentType::Match),] - ); - assert_eq!(alignment.cost(), U32Cost::from(0u8)); + assert_eq!(alignment.alignment, vec![(8, AlignmentType::Match),]); + assert_eq!(cost, U32Cost::from(0u8)); } #[test] @@ -272,13 +255,9 @@ fn test_secondary_21() { let start = AlignmentCoordinates::new_secondary(9, 1, TsKind::TS21); let end = AlignmentCoordinates::new_secondary(1, 9, TsKind::TS21); - let alignment = GapAffineAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); - assert_eq!( - alignment.alignment().alignment, - vec![(8, AlignmentType::Match),] - ); - assert_eq!(alignment.cost(), U32Cost::from(0u8)); + assert_eq!(alignment.alignment, vec![(8, AlignmentType::Match),]); + assert_eq!(cost, U32Cost::from(0u8)); } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs index 2c7765c..ac19bff 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs @@ -1,72 +1,69 @@ -use generic_a_star::{AStar, AStarResult, cost::AStarCost}; +use generic_a_star::{AStar, AStarBuffers, AStarResult, cost::AStarCost}; use crate::{ alignment::{Alignment, coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, costs::AlignmentCosts, - exact_chaining::ts_12_jump::algo::Context, + exact_chaining::ts_12_jump::algo::{Context, Node}, }; mod algo; #[cfg(test)] mod tests; -pub struct Ts12JumpAlignment { - start: AlignmentCoordinates, - end: AlignmentCoordinates, - alignment: Alignment, - cost: Cost, +pub struct Ts12JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> { + a_star_buffers: Option>>, + sequences: &'sequences AlignmentSequences, + alignment_costs: &'alignment_costs AlignmentCosts, + rc_fn: &'rc_fn dyn Fn(u8) -> u8, + max_match_run: u32, } -impl Ts12JumpAlignment { +impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> + Ts12JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost> +{ pub fn new( - start: AlignmentCoordinates, - end: AlignmentCoordinates, - sequences: &AlignmentSequences, - alignment_costs: &AlignmentCosts, - rc_fn: &dyn Fn(u8) -> u8, + sequences: &'sequences AlignmentSequences, + alignment_costs: &'alignment_costs AlignmentCosts, + rc_fn: &'rc_fn dyn Fn(u8) -> u8, max_match_run: u32, ) -> Self { + Self { + a_star_buffers: Some(Default::default()), + sequences, + alignment_costs, + rc_fn, + max_match_run, + } + } + + pub fn align( + &mut self, + start: AlignmentCoordinates, + end: AlignmentCoordinates, + ) -> (Cost, Alignment) { assert!(start.is_primary()); assert!(end.is_secondary()); - let context = Context::new(alignment_costs, sequences, rc_fn, start, end, max_match_run); - let mut a_star = AStar::<_>::new(context); + let context = Context::new( + self.alignment_costs, + self.sequences, + self.rc_fn, + start, + end, + self.max_match_run, + ); + let mut a_star = AStar::<_>::new_with_buffers(context, self.a_star_buffers.take().unwrap()); + a_star.initialise(); - match a_star.search() { - AStarResult::FoundTarget { cost, .. } => Self { - start, - end, - alignment: a_star.reconstruct_path().into(), - cost: cost.0, - }, + let result = match a_star.search() { + AStarResult::FoundTarget { cost, .. } => (cost.0, a_star.reconstruct_path().into()), AStarResult::ExceededCostLimit { .. } => unreachable!("Cost limit is None"), AStarResult::ExceededMemoryLimit { .. } => unreachable!("Cost limit is None"), - AStarResult::NoTarget => Self { - start, - end, - alignment: Vec::new().into(), - cost: Cost::max_value(), - }, - } - } -} - -impl Ts12JumpAlignment { - pub fn start(&self) -> AlignmentCoordinates { - self.start - } + AStarResult::NoTarget => (Cost::max_value(), Vec::new().into()), + }; - pub fn end(&self) -> AlignmentCoordinates { - self.end - } - - pub fn alignment(&self) -> &Alignment { - &self.alignment - } -} + self.a_star_buffers = Some(a_star.into_buffers()); -impl Ts12JumpAlignment { - pub fn cost(&self) -> Cost { - self.cost + result } } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs index 9c1fd30..98ecb00 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs @@ -6,7 +6,7 @@ use crate::{ ts_kind::TsKind, }, costs::{AlignmentCosts, GapAffineCosts, TsLimits}, - exact_chaining::ts_12_jump::Ts12JumpAlignment, + exact_chaining::ts_12_jump::Ts12JumpAligner, }; fn rc_fn(c: u8) -> u8 { @@ -46,12 +46,11 @@ fn test_start_end() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_secondary(0, 5, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Match), (1, AlignmentType::Substitution), @@ -66,7 +65,7 @@ fn test_start_end() { (2, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(4u8)); + assert_eq!(cost, U32Cost::from(4u8)); } #[test] @@ -96,12 +95,11 @@ fn test_partial_alignment() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_secondary(1, 4, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Substitution), (1, AlignmentType::Match), @@ -115,7 +113,7 @@ fn test_partial_alignment() { (1, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(4u8)); + assert_eq!(cost, U32Cost::from(4u8)); } #[test] @@ -145,12 +143,11 @@ fn test_gap_directions() { let start = AlignmentCoordinates::new_primary(9, 0); let end = AlignmentCoordinates::new_secondary(0, 18, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (2, AlignmentType::Match), (1, AlignmentType::GapB), @@ -171,7 +168,7 @@ fn test_gap_directions() { (2, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(6u8)); + assert_eq!(cost, U32Cost::from(6u8)); } #[test] @@ -201,12 +198,11 @@ fn test_max_match_run_0() { let start = AlignmentCoordinates::new_primary(8, 0); let end = AlignmentCoordinates::new_secondary(0, 16, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 0); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 0); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (16, AlignmentType::GapA), ( @@ -218,7 +214,7 @@ fn test_max_match_run_0() { ), ] ); - assert_eq!(alignment.cost(), U32Cost::from(20u8)); + assert_eq!(cost, U32Cost::from(20u8)); } #[test] @@ -248,12 +244,11 @@ fn test_max_match_run_1() { let start = AlignmentCoordinates::new_primary(8, 0); let end = AlignmentCoordinates::new_secondary(0, 16, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 1); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 1); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Match), (12, AlignmentType::GapA), @@ -269,7 +264,7 @@ fn test_max_match_run_1() { (1, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(18u8)); + assert_eq!(cost, U32Cost::from(18u8)); } #[test] @@ -299,12 +294,11 @@ fn test_max_match_run_2() { let start = AlignmentCoordinates::new_primary(8, 0); let end = AlignmentCoordinates::new_secondary(0, 16, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (2, AlignmentType::Match), (1, AlignmentType::Substitution), @@ -325,7 +319,7 @@ fn test_max_match_run_2() { (2, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(14u8)); + assert_eq!(cost, U32Cost::from(14u8)); } #[test] @@ -355,12 +349,11 @@ fn test_only_jump() { let start = AlignmentCoordinates::new_primary(5, 6); let end = AlignmentCoordinates::new_secondary(3, 6, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![( 1, AlignmentType::TsStart { @@ -369,7 +362,7 @@ fn test_only_jump() { } ),] ); - assert_eq!(alignment.cost(), U32Cost::from(2u8)); + assert_eq!(cost, U32Cost::from(2u8)); } #[test] @@ -399,12 +392,11 @@ fn test_only_jump_start() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_secondary(0, 0, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![( 1, AlignmentType::TsStart { @@ -413,7 +405,7 @@ fn test_only_jump_start() { } ),] ); - assert_eq!(alignment.cost(), U32Cost::from(2u8)); + assert_eq!(cost, U32Cost::from(2u8)); } #[test] @@ -443,12 +435,11 @@ fn test_only_jump_end() { let start = AlignmentCoordinates::new_primary(10, 10); let end = AlignmentCoordinates::new_secondary(10, 10, TsKind::TS12); - let alignment = Ts12JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![( 1, AlignmentType::TsStart { @@ -457,5 +448,5 @@ fn test_only_jump_end() { } ),] ); - assert_eq!(alignment.cost(), U32Cost::from(2u8)); + assert_eq!(cost, U32Cost::from(2u8)); } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs index 16706cb..d81adbc 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs @@ -1,74 +1,76 @@ -use generic_a_star::{AStar, AStarResult, cost::AStarCost}; +use generic_a_star::{AStar, AStarBuffers, AStarResult, cost::AStarCost}; use crate::{ alignment::{Alignment, coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, costs::AlignmentCosts, - exact_chaining::ts_34_jump::algo::Context, + exact_chaining::ts_34_jump::algo::{Context, Node}, }; mod algo; #[cfg(test)] mod tests; -pub struct Ts34JumpAlignment { - start: AlignmentCoordinates, - end: AlignmentCoordinates, - alignment: Alignment, - cost: Cost, +pub struct Ts34JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> { + a_star_buffers: Option>>, + sequences: &'sequences AlignmentSequences, + alignment_costs: &'alignment_costs AlignmentCosts, + rc_fn: &'rc_fn dyn Fn(u8) -> u8, + max_match_run: u32, } -impl Ts34JumpAlignment { +impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> + Ts34JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost> +{ pub fn new( - start: AlignmentCoordinates, - end: AlignmentCoordinates, - sequences: &AlignmentSequences, - alignment_costs: &AlignmentCosts, - rc_fn: &dyn Fn(u8) -> u8, + sequences: &'sequences AlignmentSequences, + alignment_costs: &'alignment_costs AlignmentCosts, + rc_fn: &'rc_fn dyn Fn(u8) -> u8, max_match_run: u32, ) -> Self { + Self { + a_star_buffers: Some(Default::default()), + sequences, + alignment_costs, + rc_fn, + max_match_run, + } + } + + pub fn align( + &mut self, + start: AlignmentCoordinates, + end: AlignmentCoordinates, + ) -> (Cost, Alignment) { assert!(start.is_secondary()); assert!(end.is_primary()); - let context = Context::new(alignment_costs, sequences, rc_fn, start, end, max_match_run); - let mut a_star = AStar::<_>::new(context); + let context = Context::new( + self.alignment_costs, + self.sequences, + self.rc_fn, + start, + end, + self.max_match_run, + ); + let mut a_star = AStar::<_>::new_with_buffers(context, self.a_star_buffers.take().unwrap()); + a_star.initialise(); - match a_star.search() { - AStarResult::FoundTarget { cost, .. } => Self { - start, - end, - alignment: a_star.reconstruct_path().into(), + let result = match a_star.search() { + AStarResult::FoundTarget { cost, .. } => { // The TS base cost is applied at the 12-jump, but we anyways apply it in this algorithm to make it label-setting if the base cost is non-zero. // But since the 34-jump has zero cost, we subtract it again. - cost: cost.0 - alignment_costs.ts_base_cost, - }, + let cost = cost.0 - self.alignment_costs.ts_base_cost; + let alignment = a_star.reconstruct_path().into(); + + (cost, alignment) + } AStarResult::ExceededCostLimit { .. } => unreachable!("Cost limit is None"), AStarResult::ExceededMemoryLimit { .. } => unreachable!("Cost limit is None"), - AStarResult::NoTarget => Self { - start, - end, - alignment: Vec::new().into(), - cost: Cost::max_value(), - }, - } - } -} - -impl Ts34JumpAlignment { - pub fn start(&self) -> AlignmentCoordinates { - self.start - } + AStarResult::NoTarget => (Cost::max_value(), Vec::new().into()), + }; - pub fn end(&self) -> AlignmentCoordinates { - self.end - } - - pub fn alignment(&self) -> &Alignment { - &self.alignment - } -} + self.a_star_buffers = Some(a_star.into_buffers()); -impl Ts34JumpAlignment { - pub fn cost(&self) -> Cost { - self.cost + result } } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs index 193c5ab..9189327 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs @@ -6,7 +6,7 @@ use crate::{ ts_kind::TsKind, }, costs::{AlignmentCosts, GapAffineCosts, TsLimits}, - exact_chaining::ts_34_jump::Ts34JumpAlignment, + exact_chaining::ts_34_jump::Ts34JumpAligner, }; fn rc_fn(c: u8) -> u8 { @@ -46,12 +46,11 @@ fn test_start_end() { let start: AlignmentCoordinates = AlignmentCoordinates::new_secondary(2, 0, TsKind::TS12); let end = AlignmentCoordinates::new_primary(4, 5); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (2, AlignmentType::Match), (1, AlignmentType::TsEnd { jump: 1 }), @@ -60,7 +59,7 @@ fn test_start_end() { (1, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(2u8)); + assert_eq!(cost, U32Cost::from(2u8)); } #[test] @@ -90,12 +89,11 @@ fn test_partial_alignment() { let start: AlignmentCoordinates = AlignmentCoordinates::new_secondary(1, 1, TsKind::TS12); let end = AlignmentCoordinates::new_primary(3, 4); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Match), (1, AlignmentType::TsEnd { jump: 1 }), @@ -103,7 +101,7 @@ fn test_partial_alignment() { (1, AlignmentType::Substitution), ] ); - assert_eq!(alignment.cost(), U32Cost::from(2u8)); + assert_eq!(cost, U32Cost::from(2u8)); } #[test] @@ -133,12 +131,11 @@ fn test_gap_directions() { let start = AlignmentCoordinates::new_secondary(18, 0, TsKind::TS21); let end = AlignmentCoordinates::new_primary(18, 9); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, u32::MAX); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (2, AlignmentType::Match), (1, AlignmentType::GapB), @@ -153,7 +150,7 @@ fn test_gap_directions() { (2, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(4u8)); + assert_eq!(cost, U32Cost::from(4u8)); } #[test] @@ -183,18 +180,17 @@ fn test_max_match_run_0() { let start = AlignmentCoordinates::new_secondary(8, 0, TsKind::TS12); let end = AlignmentCoordinates::new_primary(16, 16); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 0); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 0); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::TsEnd { jump: 8 }), (16, AlignmentType::GapA), ] ); - assert_eq!(alignment.cost(), U32Cost::from(18u8)); + assert_eq!(cost, U32Cost::from(18u8)); } #[test] @@ -224,12 +220,11 @@ fn test_max_match_run_1() { let start = AlignmentCoordinates::new_secondary(8, 0, TsKind::TS12); let end = AlignmentCoordinates::new_primary(16, 16); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 1); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 1); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (1, AlignmentType::Match), (1, AlignmentType::TsEnd { jump: -2 }), @@ -243,7 +238,7 @@ fn test_max_match_run_1() { (1, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(13u8)); + assert_eq!(cost, U32Cost::from(13u8)); } #[test] @@ -273,12 +268,11 @@ fn test_max_match_run_2() { let start = AlignmentCoordinates::new_secondary(8, 0, TsKind::TS12); let end = AlignmentCoordinates::new_primary(16, 16); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![ (2, AlignmentType::Match), (1, AlignmentType::Substitution), @@ -293,7 +287,7 @@ fn test_max_match_run_2() { (2, AlignmentType::Match), ] ); - assert_eq!(alignment.cost(), U32Cost::from(4u8)); + assert_eq!(cost, U32Cost::from(4u8)); } #[test] @@ -323,15 +317,14 @@ fn test_only_jump() { let start = AlignmentCoordinates::new_secondary(2, 8, TsKind::TS21); let end = AlignmentCoordinates::new_primary(8, 8); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![(1, AlignmentType::TsEnd { jump: 6 })] ); - assert_eq!(alignment.cost(), U32Cost::from(0u8)); + assert_eq!(cost, U32Cost::from(0u8)); } #[test] @@ -361,15 +354,14 @@ fn test_only_jump_start() { let start = AlignmentCoordinates::new_secondary(0, 0, TsKind::TS21); let end = AlignmentCoordinates::new_primary(0, 0); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![(1, AlignmentType::TsEnd { jump: 0 })] ); - assert_eq!(alignment.cost(), U32Cost::from(0u8)); + assert_eq!(cost, U32Cost::from(0u8)); } #[test] @@ -399,13 +391,12 @@ fn test_only_jump_end() { let start = AlignmentCoordinates::new_secondary(10, 10, TsKind::TS21); let end = AlignmentCoordinates::new_primary(10, 10); - let alignment = Ts34JumpAlignment::new(start, end, &sequences, &cost_table, &rc_fn, 2); + let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); + let (cost, alignment) = aligner.align(start, end); - assert_eq!(alignment.start(), start); - assert_eq!(alignment.end(), end); assert_eq!( - alignment.alignment().alignment, + alignment.alignment, vec![(1, AlignmentType::TsEnd { jump: 0 })] ); - assert_eq!(alignment.cost(), U32Cost::from(0u8)); + assert_eq!(cost, U32Cost::from(0u8)); } From 3f8eb7c342ca9801993a4231654218f85a122416 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 10:17:37 +0200 Subject: [PATCH 03/13] Measure anchor computation time. --- lib_ts_chainalign/src/anchors.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib_ts_chainalign/src/anchors.rs b/lib_ts_chainalign/src/anchors.rs index a4ea442..057b7f4 100644 --- a/lib_ts_chainalign/src/anchors.rs +++ b/lib_ts_chainalign/src/anchors.rs @@ -1,4 +1,4 @@ -use std::fmt::Display; +use std::{fmt::Display, time::Instant}; use lib_tsalign::a_star_aligner::alignment_geometry::AlignmentRange; use log::{info, trace}; @@ -68,6 +68,8 @@ impl Anchors { k: u32, rc_fn: &dyn Fn(u8) -> u8, ) -> Self { + let start_time = Instant::now(); + let k = usize::try_from(k).unwrap(); let s1 = sequences.seq1(); let s2 = sequences.seq2(); @@ -157,14 +159,18 @@ impl Anchors { }); } + let end_time = Instant::now(); + let duration = end_time - start_time; + info!( - "Found {} anchors ({} + {} + {} + {} + {})", + "Found {} anchors ({} + {} + {} + {} + {}) in {:.0}ms", primary.len() + secondaries.iter().map(Vec::len).sum::(), primary.len(), secondaries[0].len(), secondaries[1].len(), secondaries[2].len(), secondaries[3].len(), + duration.as_secs_f64() * 1e3, ); Self { From 9c054e8aabdc47a393956439e11152494e8c16d5 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 10:18:50 +0200 Subject: [PATCH 04/13] Report chain gap filling ratio without percent. --- lib_ts_chainalign/src/chain_align.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index 6ba0acc..8c11e0d 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -340,8 +340,8 @@ fn actually_align< debug!("Chaining closed nodes: {total_chaining_closed_nodes}"); debug!("Total chain gaps: {total_chain_gaps}"); debug!( - "Total chain gap fillings: {total_chain_gap_fillings} ({:.0}%)", - total_chain_gap_fillings as f64 / total_chain_gaps as f64 * 100.0 + "Total chain gap fillings: {total_chain_gap_fillings} ({:.2}x)", + total_chain_gap_fillings as f64 / total_chain_gaps as f64 ); let mut tsalign_alignment = From 47b6581ca27f9ec579286a42a48426bf9becd91f Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 10:24:56 +0200 Subject: [PATCH 05/13] Refactor chain gap filling statistics. --- lib_ts_chainalign/src/chain_align.rs | 20 ++++----- .../src/chain_align/evaluation.rs | 42 +++++++++++++------ 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index 8c11e0d..eff27dd 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -194,8 +194,6 @@ fn actually_align< let mut chain_evaluator = ChainEvaluator::new(sequences, alignment_costs, rc_fn, max_match_run); let mut chaining_execution_count = 0; - let mut total_chain_gap_fillings = 0; - let mut total_chain_gaps = 0; let mut current_lower_bound = Cost::zero(); let mut current_upper_bound = Cost::max_value(); let mut total_chaining_opened_nodes = 0; @@ -312,8 +310,6 @@ fn actually_align< end, max_match_run, astar.context_mut().chaining_cost_function, - &mut total_chain_gap_fillings, - &mut total_chain_gaps, false, ); let cost_increased = evaluated_cost > current_lower_bound; @@ -333,15 +329,19 @@ fn actually_align< debug!("Evaluation took {:.1}s", evaluation_duration.as_secs_f64()); debug!("Chaining opened nodes: {total_chaining_opened_nodes}"); debug!( - "Chaining suboptimal openend nodes: {} ({:.0}%)", + "Chaining suboptimal openend nodes: {} ({:.0}% of opened nodes)", total_chaining_suboptimal_opened_nodes, - total_chaining_suboptimal_opened_nodes as f64 / total_chaining_opened_nodes as f64 * 100.0, + total_chaining_suboptimal_opened_nodes as f64 / total_chaining_opened_nodes as f64 * 1e2, ); debug!("Chaining closed nodes: {total_chaining_closed_nodes}"); - debug!("Total chain gaps: {total_chain_gaps}"); + debug!("Total chain gaps: {}", chain_evaluator.total_gaps()); debug!( - "Total chain gap fillings: {total_chain_gap_fillings} ({:.2}x)", - total_chain_gap_fillings as f64 / total_chain_gaps as f64 + "Total chain gap fillings: {} ({:.2}x total gaps, {:.0}% redundant)", + chain_evaluator.total_gap_fillings(), + chain_evaluator.total_gap_fillings() as f64 / chain_evaluator.total_gaps() as f64, + chain_evaluator.total_redundant_gap_fillings() as f64 + / chain_evaluator.total_gap_fillings() as f64 + * 1e2, ); let mut tsalign_alignment = @@ -357,8 +357,6 @@ fn actually_align< end, max_match_run, astar.context_mut().chaining_cost_function, - &mut 0, - &mut 0, true, ); assert_eq!(evaluated_cost, result.cost()); diff --git a/lib_ts_chainalign/src/chain_align/evaluation.rs b/lib_ts_chainalign/src/chain_align/evaluation.rs index 274b833..57e6acb 100644 --- a/lib_ts_chainalign/src/chain_align/evaluation.rs +++ b/lib_ts_chainalign/src/chain_align/evaluation.rs @@ -22,6 +22,10 @@ pub struct ChainEvaluator<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> secondary_aligner: GapAffineAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, ts_12_jump_aligner: Ts12JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, ts_34_jump_aligner: Ts34JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, + + total_gaps: u64, + total_gap_fillings: u64, + total_redundant_gap_fillings: u64, } impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> @@ -58,6 +62,10 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> rc_fn, max_match_run, ), + + total_gaps: 0, + total_gap_fillings: 0, + total_redundant_gap_fillings: 0, } } @@ -70,8 +78,6 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> end: AlignmentCoordinates, max_match_run: u32, chaining_cost_function: &mut ChainingCostFunction, - total_chain_gap_fillings: &mut u64, - total_chain_gaps: &mut u64, final_evaluation: bool, ) -> (Cost, Vec) { let k = usize::try_from(max_match_run + 1).unwrap(); @@ -100,13 +106,13 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> break; }; current_from_index = to_anchor_index; - *total_chain_gaps += 1; + self.total_gaps += 1; match (from_anchor, to_anchor) { (Identifier::Start, Identifier::End) => { if final_evaluation || !chaining_cost_function.is_start_to_end_exact() { let (cost, alignment) = self.primary_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; trace!("Aligning from start to end costs {}", cost); if !final_evaluation { chaining_cost_function.update_start_to_end(cost, true); @@ -125,7 +131,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> || !chaining_cost_function.is_primary_from_start_exact(index) { let (cost, alignment) = self.primary_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; trace!( "Aligning from start to P{index}{} costs {}", anchors.primary(index), @@ -148,7 +154,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> || !chaining_cost_function.is_jump_12_from_start_exact(index, ts_kind) { let (cost, alignment) = self.ts_12_jump_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; trace!( "Aligning from start to S{}[{index}]{} costs {}", ts_kind.digits(), @@ -168,7 +174,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> let start = anchors.primary(index).end(k); if final_evaluation || !chaining_cost_function.is_primary_to_end_exact(index) { let (cost, alignment) = self.primary_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; trace!( "Aligning from P{index}{} to end costs {}", anchors.primary(index), @@ -188,7 +194,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> || !chaining_cost_function.is_jump_34_to_end_exact(index, ts_kind) { let (cost, alignment) = self.ts_34_jump_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; trace!( "Aligning from S{}[{index}]{} to end costs {}", ts_kind.digits(), @@ -229,7 +235,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> || !chaining_cost_function.is_primary_exact(from_index, to_index) { let (cost, alignment) = self.primary_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; trace!( "Aligning from P{from_index}{} to P{to_index}{} (from {start} to {end}) costs {}", anchors.primary(from_index), @@ -276,7 +282,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> || !chaining_cost_function.is_jump_12_exact(from_index, to_index, ts_kind) { let (cost, alignment) = self.ts_12_jump_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; if !final_evaluation { chaining_cost_function .update_jump_12(from_index, to_index, ts_kind, cost, true); @@ -325,7 +331,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> || !chaining_cost_function.is_secondary_exact(from_index, to_index, ts_kind) { let (cost, alignment) = self.secondary_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; trace!( "Aligning from S{}[{from_index}]{} to S{}[{to_index}]{} costs {}", ts_kind.digits(), @@ -363,7 +369,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> || !chaining_cost_function.is_jump_34_exact(from_index, to_index, ts_kind) { let (cost, alignment) = self.ts_34_jump_aligner.align(start, end); - *total_chain_gap_fillings += 1; + self.total_gap_fillings += 1; trace!( "Aligning from S{}[{from_index}]{} to P{to_index}{} (S{} to P{}) costs {}", ts_kind.digits(), @@ -404,4 +410,16 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> (current_upper_bound, alignments) } + + pub fn total_gaps(&self) -> u64 { + self.total_gaps + } + + pub fn total_gap_fillings(&self) -> u64 { + self.total_gap_fillings + } + + pub fn total_redundant_gap_fillings(&self) -> u64 { + self.total_redundant_gap_fillings + } } From 13e05fe42d6df753de759b2eded990dad4a910cc Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 10:33:10 +0200 Subject: [PATCH 06/13] Make anchor order into their natural order. --- lib_ts_chainalign/src/anchors.rs | 260 +-------------------- lib_ts_chainalign/src/anchors/primary.rs | 124 ++++++++++ lib_ts_chainalign/src/anchors/secondary.rs | 170 ++++++++++++++ 3 files changed, 301 insertions(+), 253 deletions(-) create mode 100644 lib_ts_chainalign/src/anchors/primary.rs create mode 100644 lib_ts_chainalign/src/anchors/secondary.rs diff --git a/lib_ts_chainalign/src/anchors.rs b/lib_ts_chainalign/src/anchors.rs index 057b7f4..6a0bb15 100644 --- a/lib_ts_chainalign/src/anchors.rs +++ b/lib_ts_chainalign/src/anchors.rs @@ -4,21 +4,21 @@ use lib_tsalign::a_star_aligner::alignment_geometry::AlignmentRange; use log::{info, trace}; use crate::{ - alignment::{ - coordinates::AlignmentCoordinates, - sequences::AlignmentSequences, - ts_kind::{TsDescendant, TsKind}, - }, + alignment::{sequences::AlignmentSequences, ts_kind::TsKind}, anchors::{ index::AnchorIndex, kmer_matches::find_kmer_matches, kmers::{Kmer, KmerStore}, + primary::PrimaryAnchor, + secondary::SecondaryAnchor, }, }; pub mod index; pub mod kmer_matches; pub mod kmers; +pub mod primary; +pub mod secondary; #[cfg(test)] mod tests; @@ -28,20 +28,6 @@ pub struct Anchors { secondaries: [Vec; 4], } -#[derive(Debug, PartialEq, Eq)] -pub struct PrimaryAnchor { - seq1: usize, - seq2: usize, -} - -#[derive(Debug, PartialEq, Eq)] -pub struct SecondaryAnchor { - /// Ancestor right index in the primary sequence. - ancestor: usize, - /// Descendant left index in the primary sequence. - descendant: usize, -} - impl Anchors { pub fn new( sequences: &AlignmentSequences, @@ -142,21 +128,9 @@ impl Anchors { let mut secondaries = [secondary_11, secondary_12, secondary_21, secondary_22]; // Sort anchors. - primary.sort_unstable_by_key(|primary_anchor| { - ( - primary_anchor.seq1.min(primary_anchor.seq2), - primary_anchor.seq1, - primary_anchor.seq2, - ) - }); + primary.sort_unstable(); for secondary in &mut secondaries { - secondary.sort_unstable_by_key(|secondary_anchor| { - ( - secondary_anchor.ancestor.min(secondary_anchor.descendant), - secondary_anchor.ancestor, - secondary_anchor.descendant, - ) - }); + secondary.sort_unstable(); } let end_time = Instant::now(); @@ -217,202 +191,6 @@ impl Anchors { } } -impl PrimaryAnchor { - pub fn new(seq1: usize, seq2: usize) -> Self { - Self { seq1, seq2 } - } - - pub fn start(&self) -> AlignmentCoordinates { - AlignmentCoordinates::Primary { - a: self.seq1, - b: self.seq2, - } - } - - pub fn end(&self, k: usize) -> AlignmentCoordinates { - AlignmentCoordinates::Primary { - a: self.seq1 + k, - b: self.seq2 + k, - } - } - - pub fn chaining_gaps(&self, second: &Self, k: usize) -> Option<(usize, usize)> { - let gap_start = self.end(k); - let gap_end = second.start(); - primary_chaining_gaps(gap_start, gap_end) - } - - pub fn chaining_gaps_from_start(&self, start: AlignmentCoordinates) -> (usize, usize) { - let gap_end = self.start(); - primary_chaining_gaps(start, gap_end) - .unwrap_or_else(|| panic!("self: {self}, start: {start}")) - } - - pub fn chaining_gaps_to_end(&self, end: AlignmentCoordinates, k: usize) -> (usize, usize) { - let gap_start = self.end(k); - primary_chaining_gaps(gap_start, end) - .unwrap_or_else(|| panic!("self: {self}, end: {end}, k: {k}")) - } - - pub fn chaining_jump_gap( - &self, - second: &SecondaryAnchor, - ts_kind: TsKind, - k: usize, - ) -> Option { - let gap_start = self.end(k); - let gap_end = second.start(ts_kind); - - let gap_start = match ts_kind.descendant { - TsDescendant::Seq1 => gap_start.primary_ordinate_a().unwrap(), - TsDescendant::Seq2 => gap_start.primary_ordinate_b().unwrap(), - }; - let gap_end = gap_end.secondary_ordinate_descendant().unwrap(); - - gap_end.checked_sub(gap_start) - } - - pub fn is_direct_predecessor_of(&self, successor: &Self) -> bool { - self.seq1 + 1 == successor.seq1 && self.seq2 + 1 == successor.seq2 - } -} - -fn primary_chaining_gaps( - gap_start: AlignmentCoordinates, - gap_end: AlignmentCoordinates, -) -> Option<(usize, usize)> { - let gap1 = gap_end - .primary_ordinate_a() - .unwrap() - .checked_sub(gap_start.primary_ordinate_a().unwrap())?; - let gap2 = gap_end - .primary_ordinate_b() - .unwrap() - .checked_sub(gap_start.primary_ordinate_b().unwrap())?; - - Some((gap1, gap2)) -} - -impl SecondaryAnchor { - pub fn new(ancestor: usize, descendant: usize) -> Self { - Self { - ancestor, - descendant, - } - } - - pub fn start(&self, ts_kind: TsKind) -> AlignmentCoordinates { - AlignmentCoordinates::Secondary { - ancestor: self.ancestor, - descendant: self.descendant, - ts_kind, - } - } - - pub fn end(&self, ts_kind: TsKind, k: usize) -> AlignmentCoordinates { - AlignmentCoordinates::Secondary { - ancestor: self.ancestor.checked_sub(k).unwrap(), - descendant: self.descendant + k, - ts_kind, - } - } - - pub fn chaining_gaps( - &self, - second: &Self, - ts_kind: TsKind, - k: usize, - ) -> Option<(usize, usize)> { - let gap_start = self.end(ts_kind, k); - let gap_end = second.start(ts_kind); - - let gap1 = gap_start - .secondary_ordinate_ancestor() - .unwrap() - .checked_sub(gap_end.secondary_ordinate_ancestor().unwrap())?; - let gap2 = gap_end - .secondary_ordinate_descendant() - .unwrap() - .checked_sub(gap_start.secondary_ordinate_descendant().unwrap())?; - - Some((gap1, gap2)) - } - - pub fn chaining_jump_gap( - &self, - second: &PrimaryAnchor, - ts_kind: TsKind, - k: usize, - ) -> Option { - let gap_start = self.end(ts_kind, k); - let gap_end = second.start(); - - let gap_start = gap_start.secondary_ordinate_descendant().unwrap(); - let gap_end = match ts_kind.descendant { - TsDescendant::Seq1 => gap_end.primary_ordinate_a().unwrap(), - TsDescendant::Seq2 => gap_end.primary_ordinate_b().unwrap(), - }; - - gap_end.checked_sub(gap_start) - } - - pub fn chaining_jump_gap_from_start( - &self, - start: AlignmentCoordinates, - ts_kind: TsKind, - ) -> usize { - let gap_start = match ts_kind.descendant { - TsDescendant::Seq1 => start.primary_ordinate_a().unwrap(), - TsDescendant::Seq2 => start.primary_ordinate_b().unwrap(), - }; - let gap_end = self.start(ts_kind).secondary_ordinate_descendant().unwrap(); - - gap_end.checked_sub(gap_start).unwrap() - } - - pub fn chaining_jump_gap_to_end( - &self, - end: AlignmentCoordinates, - ts_kind: TsKind, - k: usize, - ) -> usize { - let gap_start = self - .end(ts_kind, k) - .secondary_ordinate_descendant() - .unwrap(); - let gap_end = match ts_kind.descendant { - TsDescendant::Seq1 => end.primary_ordinate_a().unwrap(), - TsDescendant::Seq2 => end.primary_ordinate_b().unwrap(), - }; - - gap_end.checked_sub(gap_start).unwrap() - } - - pub fn is_direct_predecessor_of(&self, successor: &Self) -> bool { - self.ancestor - 1 == successor.ancestor && self.descendant + 1 == successor.descendant - } - - /// Returns the length of the 2-3 alignment of a TS that starts in `self` and ends in `until`. - /// - /// The length is the maximum of the difference of the two sequences. - pub fn ts_length_until(&self, until: &Self, ts_kind: TsKind, k: usize) -> usize { - let start = self.start(ts_kind); - let end = until.end(ts_kind, k); - - (start - .secondary_ordinate_ancestor() - .unwrap() - .checked_sub(end.secondary_ordinate_ancestor().unwrap()) - .unwrap()) - .max( - end.secondary_ordinate_descendant() - .unwrap() - .checked_sub(start.secondary_ordinate_descendant().unwrap()) - .unwrap(), - ) - } -} - impl Display for Anchors { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "P: [")?; @@ -451,27 +229,3 @@ impl Display for Anchors { Ok(()) } } - -impl Display for PrimaryAnchor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "({}, {})", self.seq1, self.seq2) - } -} - -impl Display for SecondaryAnchor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "({}, {})", self.ancestor, self.descendant) - } -} - -impl From<(usize, usize)> for PrimaryAnchor { - fn from(value: (usize, usize)) -> Self { - Self::new(value.0, value.1) - } -} - -impl From<(usize, usize)> for SecondaryAnchor { - fn from(value: (usize, usize)) -> Self { - Self::new(value.0, value.1) - } -} diff --git a/lib_ts_chainalign/src/anchors/primary.rs b/lib_ts_chainalign/src/anchors/primary.rs new file mode 100644 index 0000000..f0f0828 --- /dev/null +++ b/lib_ts_chainalign/src/anchors/primary.rs @@ -0,0 +1,124 @@ +use std::fmt::Display; + +use crate::{ + alignment::{ + coordinates::AlignmentCoordinates, + ts_kind::{TsDescendant, TsKind}, + }, + anchors::secondary::SecondaryAnchor, +}; + +/// A primary anchor. +/// +/// This is an anchor between the two sequences in forward direction. +/// +/// The anchor is ordered by its minimum ordinate first, then by its first ordinate and finally by its second ordinate. +#[derive(Debug, PartialEq, Eq)] +pub struct PrimaryAnchor { + pub(super) seq1: usize, + pub(super) seq2: usize, +} + +impl PrimaryAnchor { + pub fn new(seq1: usize, seq2: usize) -> Self { + Self { seq1, seq2 } + } + + pub fn start(&self) -> AlignmentCoordinates { + AlignmentCoordinates::Primary { + a: self.seq1, + b: self.seq2, + } + } + + pub fn end(&self, k: usize) -> AlignmentCoordinates { + AlignmentCoordinates::Primary { + a: self.seq1 + k, + b: self.seq2 + k, + } + } + + pub fn chaining_gaps(&self, second: &Self, k: usize) -> Option<(usize, usize)> { + let gap_start = self.end(k); + let gap_end = second.start(); + primary_chaining_gaps(gap_start, gap_end) + } + + pub fn chaining_gaps_from_start(&self, start: AlignmentCoordinates) -> (usize, usize) { + let gap_end = self.start(); + primary_chaining_gaps(start, gap_end) + .unwrap_or_else(|| panic!("self: {self}, start: {start}")) + } + + pub fn chaining_gaps_to_end(&self, end: AlignmentCoordinates, k: usize) -> (usize, usize) { + let gap_start = self.end(k); + primary_chaining_gaps(gap_start, end) + .unwrap_or_else(|| panic!("self: {self}, end: {end}, k: {k}")) + } + + pub fn chaining_jump_gap( + &self, + second: &SecondaryAnchor, + ts_kind: TsKind, + k: usize, + ) -> Option { + let gap_start = self.end(k); + let gap_end = second.start(ts_kind); + + let gap_start = match ts_kind.descendant { + TsDescendant::Seq1 => gap_start.primary_ordinate_a().unwrap(), + TsDescendant::Seq2 => gap_start.primary_ordinate_b().unwrap(), + }; + let gap_end = gap_end.secondary_ordinate_descendant().unwrap(); + + gap_end.checked_sub(gap_start) + } + + pub fn is_direct_predecessor_of(&self, successor: &Self) -> bool { + self.seq1 + 1 == successor.seq1 && self.seq2 + 1 == successor.seq2 + } +} + +fn primary_chaining_gaps( + gap_start: AlignmentCoordinates, + gap_end: AlignmentCoordinates, +) -> Option<(usize, usize)> { + let gap1 = gap_end + .primary_ordinate_a() + .unwrap() + .checked_sub(gap_start.primary_ordinate_a().unwrap())?; + let gap2 = gap_end + .primary_ordinate_b() + .unwrap() + .checked_sub(gap_start.primary_ordinate_b().unwrap())?; + + Some((gap1, gap2)) +} + +impl Display for PrimaryAnchor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "({}, {})", self.seq1, self.seq2) + } +} + +impl From<(usize, usize)> for PrimaryAnchor { + fn from(value: (usize, usize)) -> Self { + Self::new(value.0, value.1) + } +} + +impl Ord for PrimaryAnchor { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.seq1 + .min(self.seq2) + .cmp(&other.seq1.min(other.seq2)) + .then_with(|| self.seq1.cmp(&other.seq1)) + .then_with(|| self.seq2.cmp(&other.seq2)) + } +} + +impl PartialOrd for PrimaryAnchor { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} diff --git a/lib_ts_chainalign/src/anchors/secondary.rs b/lib_ts_chainalign/src/anchors/secondary.rs new file mode 100644 index 0000000..aea1412 --- /dev/null +++ b/lib_ts_chainalign/src/anchors/secondary.rs @@ -0,0 +1,170 @@ +use std::fmt::Display; + +use crate::{ + alignment::{ + coordinates::AlignmentCoordinates, + ts_kind::{TsDescendant, TsKind}, + }, + anchors::primary::PrimaryAnchor, +}; + +/// A secondary anchor. +/// +/// This is an anchor between the ancestor in reverse direction and the descendant in forward direction. +/// +/// The anchor is ordered by its minimum ordinate first, then by its ancestor ordinate and finally by its descendant ordinate. +#[derive(Debug, PartialEq, Eq)] +pub struct SecondaryAnchor { + /// Ancestor right index in the forward sequence. + pub(super) ancestor: usize, + /// Descendant left index in the forward sequence. + pub(super) descendant: usize, +} + +impl SecondaryAnchor { + pub fn new(ancestor: usize, descendant: usize) -> Self { + Self { + ancestor, + descendant, + } + } + + pub fn start(&self, ts_kind: TsKind) -> AlignmentCoordinates { + AlignmentCoordinates::Secondary { + ancestor: self.ancestor, + descendant: self.descendant, + ts_kind, + } + } + + pub fn end(&self, ts_kind: TsKind, k: usize) -> AlignmentCoordinates { + AlignmentCoordinates::Secondary { + ancestor: self.ancestor.checked_sub(k).unwrap(), + descendant: self.descendant + k, + ts_kind, + } + } + + pub fn chaining_gaps( + &self, + second: &Self, + ts_kind: TsKind, + k: usize, + ) -> Option<(usize, usize)> { + let gap_start = self.end(ts_kind, k); + let gap_end = second.start(ts_kind); + + let gap1 = gap_start + .secondary_ordinate_ancestor() + .unwrap() + .checked_sub(gap_end.secondary_ordinate_ancestor().unwrap())?; + let gap2 = gap_end + .secondary_ordinate_descendant() + .unwrap() + .checked_sub(gap_start.secondary_ordinate_descendant().unwrap())?; + + Some((gap1, gap2)) + } + + pub fn chaining_jump_gap( + &self, + second: &PrimaryAnchor, + ts_kind: TsKind, + k: usize, + ) -> Option { + let gap_start = self.end(ts_kind, k); + let gap_end = second.start(); + + let gap_start = gap_start.secondary_ordinate_descendant().unwrap(); + let gap_end = match ts_kind.descendant { + TsDescendant::Seq1 => gap_end.primary_ordinate_a().unwrap(), + TsDescendant::Seq2 => gap_end.primary_ordinate_b().unwrap(), + }; + + gap_end.checked_sub(gap_start) + } + + pub fn chaining_jump_gap_from_start( + &self, + start: AlignmentCoordinates, + ts_kind: TsKind, + ) -> usize { + let gap_start = match ts_kind.descendant { + TsDescendant::Seq1 => start.primary_ordinate_a().unwrap(), + TsDescendant::Seq2 => start.primary_ordinate_b().unwrap(), + }; + let gap_end = self.start(ts_kind).secondary_ordinate_descendant().unwrap(); + + gap_end.checked_sub(gap_start).unwrap() + } + + pub fn chaining_jump_gap_to_end( + &self, + end: AlignmentCoordinates, + ts_kind: TsKind, + k: usize, + ) -> usize { + let gap_start = self + .end(ts_kind, k) + .secondary_ordinate_descendant() + .unwrap(); + let gap_end = match ts_kind.descendant { + TsDescendant::Seq1 => end.primary_ordinate_a().unwrap(), + TsDescendant::Seq2 => end.primary_ordinate_b().unwrap(), + }; + + gap_end.checked_sub(gap_start).unwrap() + } + + pub fn is_direct_predecessor_of(&self, successor: &Self) -> bool { + self.ancestor - 1 == successor.ancestor && self.descendant + 1 == successor.descendant + } + + /// Returns the length of the 2-3 alignment of a TS that starts in `self` and ends in `until`. + /// + /// The length is the maximum of the difference of the two sequences. + pub fn ts_length_until(&self, until: &Self, ts_kind: TsKind, k: usize) -> usize { + let start = self.start(ts_kind); + let end = until.end(ts_kind, k); + + (start + .secondary_ordinate_ancestor() + .unwrap() + .checked_sub(end.secondary_ordinate_ancestor().unwrap()) + .unwrap()) + .max( + end.secondary_ordinate_descendant() + .unwrap() + .checked_sub(start.secondary_ordinate_descendant().unwrap()) + .unwrap(), + ) + } +} + +impl Display for SecondaryAnchor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "({}, {})", self.ancestor, self.descendant) + } +} + +impl From<(usize, usize)> for SecondaryAnchor { + fn from(value: (usize, usize)) -> Self { + Self::new(value.0, value.1) + } +} + +impl Ord for SecondaryAnchor { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.ancestor + .min(self.descendant) + .cmp(&other.ancestor.min(other.descendant)) + .then_with(|| self.ancestor.cmp(&other.ancestor)) + .then_with(|| self.descendant.cmp(&other.descendant)) + } +} + +impl PartialOrd for SecondaryAnchor { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} From db1f8ed919a92e6cbd0504a1d6a609dfc5e5bf31 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 12:13:12 +0200 Subject: [PATCH 07/13] Batch gap-affine chain evaluation. --- generic_a_star/src/closed_lists.rs | 16 +++ generic_a_star/src/lib.rs | 4 + lib_ts_chainalign/src/anchors.rs | 7 +- lib_ts_chainalign/src/anchors/primary.rs | 24 +++- .../src/anchors/reverse_lookup.rs | 108 ++++++++++++++++++ lib_ts_chainalign/src/anchors/secondary.rs | 11 +- lib_ts_chainalign/src/chain_align/chainer.rs | 2 +- .../src/chain_align/chainer/closed_list.rs | 34 ++++++ .../src/chain_align/evaluation.rs | 101 ++++++++++++++-- .../src/chaining_cost_function.rs | 101 ++++++++++++++-- .../src/chaining_cost_function/cost_array.rs | 1 - .../src/exact_chaining/gap_affine.rs | 30 ++++- .../src/exact_chaining/gap_affine/tests.rs | 22 ++-- 13 files changed, 424 insertions(+), 37 deletions(-) create mode 100644 lib_ts_chainalign/src/anchors/reverse_lookup.rs diff --git a/generic_a_star/src/closed_lists.rs b/generic_a_star/src/closed_lists.rs index 11ee036..c333d2a 100644 --- a/generic_a_star/src/closed_lists.rs +++ b/generic_a_star/src/closed_lists.rs @@ -35,6 +35,13 @@ pub trait AStarClosedList: Reset { self.get(identifier).is_some() } + /// Returns an iterator over the nodes in the closed list. + fn iter<'this: 'node, 'node>( + &'this self, + ) -> impl use<'this, 'node, Self, Node> + Iterator + where + Node: 'node; + fn can_skip_node(&self, node: &Node, is_label_setting: bool) -> bool { if let Some(previous_visit) = self.get(node.identifier()) { if is_label_setting { @@ -99,6 +106,15 @@ impl AStarClosedList fn get(&self, identifier: &::Identifier) -> Option<&Node> { Self::get(self, identifier) } + + fn iter<'this: 'node, 'node>( + &'this self, + ) -> impl use<'this, 'node, Node> + Iterator + where + Node: 'node, + { + Self::values(self) + } } impl Reset for FxHashMapSeed { diff --git a/generic_a_star/src/lib.rs b/generic_a_star/src/lib.rs index 06b24df..163a0b2 100644 --- a/generic_a_star/src/lib.rs +++ b/generic_a_star/src/lib.rs @@ -268,6 +268,10 @@ impl< self.closed_list.get(node_identifier) } + pub fn iter_closed_nodes(&self) -> impl Iterator { + self.closed_list.iter() + } + pub fn performance_counters(&self) -> &AStarPerformanceCounters { &self.performance_counters } diff --git a/lib_ts_chainalign/src/anchors.rs b/lib_ts_chainalign/src/anchors.rs index 6a0bb15..aedb7b9 100644 --- a/lib_ts_chainalign/src/anchors.rs +++ b/lib_ts_chainalign/src/anchors.rs @@ -18,6 +18,7 @@ pub mod index; pub mod kmer_matches; pub mod kmers; pub mod primary; +pub mod reverse_lookup; pub mod secondary; #[cfg(test)] mod tests; @@ -161,9 +162,10 @@ impl Anchors { self.primary.len().into() } - pub fn enumerate_primaries(&self) -> impl Iterator { + pub fn enumerate_primaries(&self) -> impl Iterator { self.primary .iter() + .copied() .enumerate() .map(|(index, anchor)| (index.into(), anchor)) } @@ -183,9 +185,10 @@ impl Anchors { pub fn enumerate_secondaries( &self, ts_kind: TsKind, - ) -> impl Iterator { + ) -> impl Iterator { self.secondary_anchor_vec(ts_kind) .iter() + .copied() .enumerate() .map(|(index, anchor)| (index.into(), anchor)) } diff --git a/lib_ts_chainalign/src/anchors/primary.rs b/lib_ts_chainalign/src/anchors/primary.rs index f0f0828..2f9add5 100644 --- a/lib_ts_chainalign/src/anchors/primary.rs +++ b/lib_ts_chainalign/src/anchors/primary.rs @@ -13,7 +13,7 @@ use crate::{ /// This is an anchor between the two sequences in forward direction. /// /// The anchor is ordered by its minimum ordinate first, then by its first ordinate and finally by its second ordinate. -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct PrimaryAnchor { pub(super) seq1: usize, pub(super) seq2: usize, @@ -24,6 +24,28 @@ impl PrimaryAnchor { Self { seq1, seq2 } } + pub fn new_from_start(alignment_coordinates: &AlignmentCoordinates) -> Self { + Self::new( + alignment_coordinates.primary_ordinate_a().unwrap(), + alignment_coordinates.primary_ordinate_b().unwrap(), + ) + } + + pub fn new_from_end(alignment_coordinates: &AlignmentCoordinates, k: usize) -> Self { + Self::new( + alignment_coordinates + .primary_ordinate_a() + .unwrap() + .checked_sub(k) + .unwrap(), + alignment_coordinates + .primary_ordinate_b() + .unwrap() + .checked_sub(k) + .unwrap(), + ) + } + pub fn start(&self) -> AlignmentCoordinates { AlignmentCoordinates::Primary { a: self.seq1, diff --git a/lib_ts_chainalign/src/anchors/reverse_lookup.rs b/lib_ts_chainalign/src/anchors/reverse_lookup.rs new file mode 100644 index 0000000..725ad44 --- /dev/null +++ b/lib_ts_chainalign/src/anchors/reverse_lookup.rs @@ -0,0 +1,108 @@ +use std::{cmp::Ordering, iter::Peekable}; + +use crate::{ + alignment::ts_kind::TsKind, + anchors::{Anchors, index::AnchorIndex, primary::PrimaryAnchor, secondary::SecondaryAnchor}, +}; + +struct PrimaryAnchorToIndexIter { + coordinate_iter: Peekable, + anchor_iter: Peekable, +} + +struct SecondaryAnchorToIndexIter { + coordinate_iter: Peekable, + anchor_iter: Peekable, +} + +impl< + CoordinateIter: Iterator, + AnchorIter: Iterator, +> Iterator for PrimaryAnchorToIndexIter +{ + type Item = Option; + + fn next(&mut self) -> Option { + while let (Some(coordinate_anchor), Some((anchor_index, anchor))) = + (self.coordinate_iter.peek(), self.anchor_iter.peek()) + { + match coordinate_anchor.cmp(anchor) { + Ordering::Less => { + self.coordinate_iter.next().unwrap(); + return Some(None); + } + Ordering::Equal => { + let result = Some(Some(*anchor_index)); + self.coordinate_iter.next().unwrap(); + self.anchor_iter.next().unwrap(); + return result; + } + Ordering::Greater => { + self.anchor_iter.next().unwrap(); + } + } + } + + self.coordinate_iter.next().map(|_| None) + } +} + +impl< + CoordinateIter: Iterator, + AnchorIter: Iterator, +> Iterator for SecondaryAnchorToIndexIter +{ + type Item = Option; + + fn next(&mut self) -> Option { + while let (Some(coordinate_anchor), Some((anchor_index, anchor))) = + (self.coordinate_iter.peek(), self.anchor_iter.peek()) + { + match coordinate_anchor.cmp(anchor) { + Ordering::Less => { + self.coordinate_iter.next().unwrap(); + return Some(None); + } + Ordering::Equal => { + let result = Some(Some(*anchor_index)); + self.coordinate_iter.next().unwrap(); + self.anchor_iter.next().unwrap(); + return result; + } + Ordering::Greater => { + self.anchor_iter.next().unwrap(); + } + } + } + + self.coordinate_iter.next().map(|_| None) + } +} + +impl Anchors { + /// Returns an iterator over the primary anchor indices that correspond to the given primary anchors. + /// + /// If a primary anchor does not exist, then the iterator returns `Some(None)`. + pub fn primary_anchor_to_index_iter( + &self, + iter: impl IntoIterator, + ) -> impl Iterator> { + PrimaryAnchorToIndexIter { + coordinate_iter: iter.into_iter().peekable(), + anchor_iter: self.enumerate_primaries().peekable(), + } + } + /// Returns an iterator over the secondary anchor indices that correspond to the given secondary anchors. + /// + /// If a secondary anchor does not exist, then the iterator returns `Some(None)`. + pub fn secondary_anchor_to_index_iter( + &self, + iter: impl IntoIterator, + ts_kind: TsKind, + ) -> impl Iterator> { + SecondaryAnchorToIndexIter { + coordinate_iter: iter.into_iter().peekable(), + anchor_iter: self.enumerate_secondaries(ts_kind).peekable(), + } + } +} diff --git a/lib_ts_chainalign/src/anchors/secondary.rs b/lib_ts_chainalign/src/anchors/secondary.rs index aea1412..c2c5bd0 100644 --- a/lib_ts_chainalign/src/anchors/secondary.rs +++ b/lib_ts_chainalign/src/anchors/secondary.rs @@ -13,7 +13,7 @@ use crate::{ /// This is an anchor between the ancestor in reverse direction and the descendant in forward direction. /// /// The anchor is ordered by its minimum ordinate first, then by its ancestor ordinate and finally by its descendant ordinate. -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct SecondaryAnchor { /// Ancestor right index in the forward sequence. pub(super) ancestor: usize, @@ -29,6 +29,15 @@ impl SecondaryAnchor { } } + pub fn new_from_start(alignment_coordinates: &AlignmentCoordinates) -> Self { + Self::new( + alignment_coordinates.secondary_ordinate_ancestor().unwrap(), + alignment_coordinates + .secondary_ordinate_descendant() + .unwrap(), + ) + } + pub fn start(&self, ts_kind: TsKind) -> AlignmentCoordinates { AlignmentCoordinates::Secondary { ancestor: self.ancestor, diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index ffd7464..5b85572 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -477,7 +477,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { } fn is_label_setting(&self) -> bool { - false + true } } diff --git a/lib_ts_chainalign/src/chain_align/chainer/closed_list.rs b/lib_ts_chainalign/src/chain_align/chainer/closed_list.rs index 9b2fa06..b1e8df0 100644 --- a/lib_ts_chainalign/src/chain_align/chainer/closed_list.rs +++ b/lib_ts_chainalign/src/chain_align/chainer/closed_list.rs @@ -305,6 +305,40 @@ impl AStarClosedList> for ChainerClosedList { } } + fn iter<'this: 'node, 'node>( + &'this self, + ) -> impl use<'this, 'node, Cost> + Iterator> + where + Node: 'node, + { + self.start + .iter() + .chain(&self.start_to_primary) + .chain( + self.start_to_secondary + .iter() + .flat_map(IntoIterator::into_iter), + ) + .chain(self.primary_to_primary.iter().flatten()) + .chain( + self.primary_to_secondary + .iter() + .flat_map(IntoIterator::into_iter) + .flatten(), + ) + .chain( + self.secondary_to_secondary + .iter() + .flat_map(FxHashMapSeed::values), + ) + .chain( + self.secondary_to_primary + .iter() + .flat_map(FxHashMapSeed::values), + ) + .chain(&self.end) + } + fn can_skip_node(&self, node: &Node, _is_label_setting: bool) -> bool { if let Some(current) = self.get(node.identifier()) { Ordering::Less diff --git a/lib_ts_chainalign/src/chain_align/evaluation.rs b/lib_ts_chainalign/src/chain_align/evaluation.rs index 57e6acb..f2c4dba 100644 --- a/lib_ts_chainalign/src/chain_align/evaluation.rs +++ b/lib_ts_chainalign/src/chain_align/evaluation.rs @@ -8,7 +8,7 @@ use crate::{ alignment::{ Alignment, AlignmentType, coordinates::AlignmentCoordinates, sequences::AlignmentSequences, }, - anchors::Anchors, + anchors::{Anchors, primary::PrimaryAnchor, secondary::SecondaryAnchor}, chain_align::chainer::Identifier, chaining_cost_function::ChainingCostFunction, costs::AlignmentCosts, @@ -23,11 +23,16 @@ pub struct ChainEvaluator<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> ts_12_jump_aligner: Ts12JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, ts_34_jump_aligner: Ts34JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, + additional_primary_targets_buffer: Vec<(PrimaryAnchor, Cost)>, + additional_secondary_targets_buffer: Vec<(SecondaryAnchor, Cost)>, + total_gaps: u64, total_gap_fillings: u64, total_redundant_gap_fillings: u64, } +struct PanicOnExtend; + impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> ChainEvaluator<'sequences, 'alignment_costs, 'rc_fn, Cost> { @@ -63,6 +68,9 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> max_match_run, ), + additional_primary_targets_buffer: Default::default(), + additional_secondary_targets_buffer: Default::default(), + total_gaps: 0, total_gap_fillings: 0, total_redundant_gap_fillings: 0, @@ -81,6 +89,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> final_evaluation: bool, ) -> (Cost, Vec) { let k = usize::try_from(max_match_run + 1).unwrap(); + let mut current_upper_bound = Cost::zero(); let mut alignments = Vec::new(); let mut current_from_index = 0; @@ -111,11 +120,23 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> match (from_anchor, to_anchor) { (Identifier::Start, Identifier::End) => { if final_evaluation || !chaining_cost_function.is_start_to_end_exact() { - let (cost, alignment) = self.primary_aligner.align(start, end); - self.total_gap_fillings += 1; + self.additional_primary_targets_buffer.clear(); + let (cost, alignment) = self.primary_aligner.align( + start, + end, + &mut self.additional_primary_targets_buffer, + &mut PanicOnExtend, + ); + self.total_gap_fillings += + u64::try_from(self.additional_primary_targets_buffer.len()).unwrap(); trace!("Aligning from start to end costs {}", cost); if !final_evaluation { chaining_cost_function.update_start_to_end(cost, true); + chaining_cost_function.update_additional_primary_targets_from_start( + &mut self.additional_primary_targets_buffer, + anchors, + &mut self.total_redundant_gap_fillings, + ); } alignments.push(alignment); } @@ -130,8 +151,15 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_primary_from_start_exact(index) { - let (cost, alignment) = self.primary_aligner.align(start, end); - self.total_gap_fillings += 1; + self.additional_primary_targets_buffer.clear(); + let (cost, alignment) = self.primary_aligner.align( + start, + end, + &mut self.additional_primary_targets_buffer, + &mut PanicOnExtend, + ); + self.total_gap_fillings += + u64::try_from(self.additional_primary_targets_buffer.len()).unwrap(); trace!( "Aligning from start to P{index}{} costs {}", anchors.primary(index), @@ -139,6 +167,11 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> ); if !final_evaluation { chaining_cost_function.update_primary_from_start(index, cost, true); + chaining_cost_function.update_additional_primary_targets_from_start( + &mut self.additional_primary_targets_buffer, + anchors, + &mut self.total_redundant_gap_fillings, + ); } alignments.push(alignment); } @@ -173,8 +206,15 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> (Identifier::PrimaryToPrimary { index, .. }, Identifier::End) => { let start = anchors.primary(index).end(k); if final_evaluation || !chaining_cost_function.is_primary_to_end_exact(index) { - let (cost, alignment) = self.primary_aligner.align(start, end); - self.total_gap_fillings += 1; + self.additional_primary_targets_buffer.clear(); + let (cost, alignment) = self.primary_aligner.align( + start, + end, + &mut self.additional_primary_targets_buffer, + &mut PanicOnExtend, + ); + self.total_gap_fillings += + u64::try_from(self.additional_primary_targets_buffer.len()).unwrap(); trace!( "Aligning from P{index}{} to end costs {}", anchors.primary(index), @@ -182,6 +222,12 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> ); if !final_evaluation { chaining_cost_function.update_primary_to_end(index, cost, true); + chaining_cost_function.update_additional_primary_targets( + index, + &mut self.additional_primary_targets_buffer, + anchors, + &mut self.total_redundant_gap_fillings, + ); } alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); @@ -234,8 +280,15 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_primary_exact(from_index, to_index) { - let (cost, alignment) = self.primary_aligner.align(start, end); - self.total_gap_fillings += 1; + self.additional_primary_targets_buffer.clear(); + let (cost, alignment) = self.primary_aligner.align( + start, + end, + &mut self.additional_primary_targets_buffer, + &mut PanicOnExtend, + ); + self.total_gap_fillings += + u64::try_from(self.additional_primary_targets_buffer.len()).unwrap(); trace!( "Aligning from P{from_index}{} to P{to_index}{} (from {start} to {end}) costs {}", anchors.primary(from_index), @@ -255,6 +308,12 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if !final_evaluation { chaining_cost_function.update_primary(from_index, to_index, cost, true); + chaining_cost_function.update_additional_primary_targets( + from_index, + &mut self.additional_primary_targets_buffer, + anchors, + &mut self.total_redundant_gap_fillings, + ); } alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); @@ -330,8 +389,15 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_secondary_exact(from_index, to_index, ts_kind) { - let (cost, alignment) = self.secondary_aligner.align(start, end); - self.total_gap_fillings += 1; + self.additional_secondary_targets_buffer.clear(); + let (cost, alignment) = self.secondary_aligner.align( + start, + end, + &mut PanicOnExtend, + &mut self.additional_secondary_targets_buffer, + ); + self.total_gap_fillings += + u64::try_from(self.additional_secondary_targets_buffer.len()).unwrap(); trace!( "Aligning from S{}[{from_index}]{} to S{}[{to_index}]{} costs {}", ts_kind.digits(), @@ -343,6 +409,13 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if !final_evaluation { chaining_cost_function .update_secondary(from_index, to_index, ts_kind, cost, true); + chaining_cost_function.update_additional_secondary_targets( + from_index, + &mut self.additional_secondary_targets_buffer, + ts_kind, + anchors, + &mut self.total_redundant_gap_fillings, + ); } alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); @@ -423,3 +496,9 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> self.total_redundant_gap_fillings } } + +impl Extend for PanicOnExtend { + fn extend>(&mut self, iter: Iter) { + assert!(iter.into_iter().next().is_none()); + } +} diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index c5679c0..c84f5ad 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -7,7 +7,7 @@ use num_traits::Zero; use crate::{ alignment::{coordinates::AlignmentCoordinates, ts_kind::TsKind}, - anchors::{Anchors, index::AnchorIndex}, + anchors::{Anchors, index::AnchorIndex, primary::PrimaryAnchor, secondary::SecondaryAnchor}, chaining_cost_function::cost_array::ChainingCostArray, chaining_lower_bounds::ChainingLowerBounds, }; @@ -54,11 +54,11 @@ impl ChainingCostFunction { for (to_index, to_anchor) in anchors.enumerate_primaries() { let to_index = to_index + 1; - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, k) { + if let Some((gap1, gap2)) = from_anchor.chaining_gaps(&to_anchor, k) { primary[[from_index, to_index]] = chaining_lower_bounds.primary_lower_bound(gap1, gap2); } - if from_anchor.is_direct_predecessor_of(to_anchor) { + if from_anchor.is_direct_predecessor_of(&to_anchor) { primary[[from_index, to_index]] = Cost::zero(); } } @@ -79,11 +79,11 @@ impl ChainingCostFunction { for (ts_kind, secondary) in TsKind::iter().zip(&mut secondaries) { for (from_index, from_anchor) in anchors.enumerate_secondaries(ts_kind) { for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { - if let Some((gap1, gap2)) = from_anchor.chaining_gaps(to_anchor, ts_kind, k) { + if let Some((gap1, gap2)) = from_anchor.chaining_gaps(&to_anchor, ts_kind, k) { secondary[[from_index, to_index]] = chaining_lower_bounds.secondary_lower_bound(gap1, gap2); } - if from_anchor.is_direct_predecessor_of(to_anchor) { + if from_anchor.is_direct_predecessor_of(&to_anchor) { secondary[[from_index, to_index]] = Cost::zero(); } } @@ -103,7 +103,7 @@ impl ChainingCostFunction { for (from_index, from_anchor) in anchors.enumerate_primaries() { let from_index = from_index + 1; for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, ts_kind, k) { + if let Some(gap) = from_anchor.chaining_jump_gap(&to_anchor, ts_kind, k) { jump_12[[from_index, to_index]] = chaining_lower_bounds.jump_12_lower_bound(gap); } @@ -124,7 +124,7 @@ impl ChainingCostFunction { for (from_index, from_anchor) in anchors.enumerate_secondaries(ts_kind) { for (to_index, to_anchor) in anchors.enumerate_primaries() { let to_index = to_index + 1; - if let Some(gap) = from_anchor.chaining_jump_gap(to_anchor, ts_kind, k) { + if let Some(gap) = from_anchor.chaining_jump_gap(&to_anchor, ts_kind, k) { jump_34[[from_index, to_index]] = chaining_lower_bounds.jump_34_lower_bound(gap); } @@ -565,4 +565,91 @@ impl ChainingCostFunction { .filter(|(to_primary_index, _)| *to_primary_index != AnchorIndex::zero()) .map(|(to_primary_index, chaining_cost)| (to_primary_index - 1, chaining_cost)) } + + pub fn update_additional_primary_targets( + &mut self, + from_primary_index: AnchorIndex, + additional_targets: &mut [(PrimaryAnchor, Cost)], + anchors: &Anchors, + total_redundant_gap_fillings: &mut u64, + ) { + additional_targets.sort_unstable(); + for (to_anchor_index, cost) in anchors + .primary_anchor_to_index_iter(additional_targets.iter().map(|(anchor, _)| *anchor)) + .zip(additional_targets.iter().map(|(_, cost)| *cost)) + { + let Some(to_primary_index) = to_anchor_index else { + continue; + }; + + if self.is_primary_exact(from_primary_index, to_primary_index) { + *total_redundant_gap_fillings += 1; + debug_assert_eq!(self.primary(from_primary_index, to_primary_index), cost,); + } else { + self.update_primary(from_primary_index, to_primary_index, cost, true); + } + } + } + + pub fn update_additional_primary_targets_from_start( + &mut self, + additional_targets: &mut [(PrimaryAnchor, Cost)], + anchors: &Anchors, + total_redundant_gap_fillings: &mut u64, + ) { + additional_targets.sort_unstable(); + for (to_anchor_index, cost) in anchors + .primary_anchor_to_index_iter(additional_targets.iter().map(|(anchor, _)| *anchor)) + .zip(additional_targets.iter().map(|(_, cost)| *cost)) + { + let Some(to_primary_index) = to_anchor_index else { + continue; + }; + + if self.is_primary_from_start_exact(to_primary_index) { + *total_redundant_gap_fillings += 1; + debug_assert_eq!(self.primary_from_start(to_primary_index), cost,); + } else { + self.update_primary_from_start(to_primary_index, cost, true); + } + } + } + + pub fn update_additional_secondary_targets( + &mut self, + from_secondary_index: AnchorIndex, + additional_targets: &mut [(SecondaryAnchor, Cost)], + ts_kind: TsKind, + anchors: &Anchors, + total_redundant_gap_fillings: &mut u64, + ) { + additional_targets.sort_unstable(); + for (to_anchor_index, cost) in anchors + .secondary_anchor_to_index_iter( + additional_targets.iter().map(|(anchor, _)| *anchor), + ts_kind, + ) + .zip(additional_targets.iter().map(|(_, cost)| *cost)) + { + let Some(to_secondary_index) = to_anchor_index else { + continue; + }; + + if self.is_secondary_exact(from_secondary_index, to_secondary_index, ts_kind) { + *total_redundant_gap_fillings += 1; + debug_assert_eq!( + self.secondary(from_secondary_index, to_secondary_index, ts_kind), + cost, + ); + } else { + self.update_secondary( + from_secondary_index, + to_secondary_index, + ts_kind, + cost, + true, + ); + } + } + } } diff --git a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs index 9696bce..b0d51d5 100644 --- a/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs +++ b/lib_ts_chainalign/src/chaining_cost_function/cost_array.rs @@ -33,7 +33,6 @@ impl ChainingCostArray { } pub fn set_exact(&mut self, c1: AnchorIndex, c2: AnchorIndex) { - debug_assert!(!self.is_exact(c1, c2)); self.is_exact .set(coordinates_to_index(c1, c2, self.len), true); } diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine.rs index fa151ec..58af794 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine.rs @@ -2,6 +2,7 @@ use generic_a_star::{AStar, AStarBuffers, AStarResult, cost::AStarCost}; use crate::{ alignment::{Alignment, coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, + anchors::{primary::PrimaryAnchor, secondary::SecondaryAnchor}, costs::GapAffineCosts, exact_chaining::gap_affine::algo::{Context, Node}, }; @@ -40,6 +41,8 @@ impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> &mut self, start: AlignmentCoordinates, end: AlignmentCoordinates, + additional_primary_targets_output: &mut impl Extend<(PrimaryAnchor, Cost)>, + additional_secondary_targets_output: &mut impl Extend<(SecondaryAnchor, Cost)>, ) -> (Cost, Alignment) { assert!( start.is_primary() && end.is_primary() || start.is_secondary() && end.is_secondary() @@ -56,7 +59,7 @@ impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> let mut a_star = AStar::new_with_buffers(context, self.a_star_buffers.take().unwrap()); a_star.initialise(); - let result = match a_star.search() { + let (cost, alignment) = match a_star.search() { AStarResult::FoundTarget { cost, .. } => { let alignment = a_star.reconstruct_path().into(); (cost.0, alignment) @@ -66,8 +69,31 @@ impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> AStarResult::NoTarget => (Cost::max_value(), Vec::new().into()), }; + a_star.search_until(|_, node| node.cost > cost); + additional_primary_targets_output.extend( + a_star + .iter_closed_nodes() + .filter(|node| node.identifier.coordinates.is_primary()) + .map(|node| { + ( + PrimaryAnchor::new_from_start(&node.identifier.coordinates), + node.cost, + ) + }), + ); + additional_secondary_targets_output.extend( + a_star + .iter_closed_nodes() + .filter(|node| node.identifier.coordinates.is_secondary()) + .map(|node| { + ( + SecondaryAnchor::new_from_start(&node.identifier.coordinates), + node.cost, + ) + }), + ); self.a_star_buffers = Some(a_star.into_buffers()); - result + (cost, alignment) } } diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs index 59da36c..b6b2ee1 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs @@ -26,7 +26,7 @@ fn test_start_end() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_primary(4, 5); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!( alignment.alignment, @@ -46,7 +46,7 @@ fn test_partial_alignment() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(4, 4); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!( alignment.alignment, @@ -70,7 +70,7 @@ fn test_gap_directions() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(11, 11); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!( alignment.alignment, @@ -96,7 +96,7 @@ fn test_extremity_gaps() { let start = AlignmentCoordinates::new_primary(3, 3); let end = AlignmentCoordinates::new_primary(10, 10); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!( alignment.alignment, @@ -120,7 +120,7 @@ fn test_extremity_substitutions() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_primary(5, 5); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!( alignment.alignment, @@ -144,7 +144,7 @@ fn test_substitutions_as_gaps() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_primary(20, 20); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert!( alignment.alignment == vec![(20, AlignmentType::GapA), (20, AlignmentType::GapB),] @@ -164,7 +164,7 @@ fn test_max_match_run_0() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(9, 9); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, 0); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert!( alignment.alignment == vec![(8, AlignmentType::GapA), (8, AlignmentType::GapB),] @@ -184,7 +184,7 @@ fn test_max_match_run_1() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(9, 9); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, 1); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!( alignment.alignment, @@ -213,7 +213,7 @@ fn test_max_match_run_2() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_primary(9, 9); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!( alignment.alignment, @@ -239,7 +239,7 @@ fn test_secondary_12() { let start = AlignmentCoordinates::new_secondary(9, 1, TsKind::TS12); let end = AlignmentCoordinates::new_secondary(1, 9, TsKind::TS12); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!(alignment.alignment, vec![(8, AlignmentType::Match),]); assert_eq!(cost, U32Cost::from(0u8)); @@ -256,7 +256,7 @@ fn test_secondary_21() { let start = AlignmentCoordinates::new_secondary(9, 1, TsKind::TS21); let end = AlignmentCoordinates::new_secondary(1, 9, TsKind::TS21); let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); assert_eq!(alignment.alignment, vec![(8, AlignmentType::Match),]); assert_eq!(cost, U32Cost::from(0u8)); From dd28907131f8ec66fc2a47affd7b902affdc5265 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 12:42:48 +0200 Subject: [PATCH 08/13] Fix batched gap-affine chain evaluation. --- generic_a_star/src/cost.rs | 19 +++++++++-- generic_a_star/src/lib.rs | 6 +++- lib_ts_chainalign/src/chain_align/chainer.rs | 2 +- .../src/chain_align/evaluation.rs | 34 ++++++++++++------- .../src/chaining_cost_function.rs | 4 +-- .../src/exact_chaining/gap_affine.rs | 13 +++++-- .../src/exact_chaining/gap_affine/algo.rs | 12 ++++++- test_files/twin_10_repetitive.fa | 4 +++ 8 files changed, 71 insertions(+), 23 deletions(-) create mode 100644 test_files/twin_10_repetitive.fa diff --git a/generic_a_star/src/cost.rs b/generic_a_star/src/cost.rs index f7b67fa..41dfd71 100644 --- a/generic_a_star/src/cost.rs +++ b/generic_a_star/src/cost.rs @@ -5,13 +5,14 @@ use std::{ str::FromStr, }; -use num_traits::{Bounded, CheckedAdd, CheckedSub, SaturatingSub, Zero}; +use num_traits::{Bounded, CheckedAdd, CheckedSub, SaturatingAdd, SaturatingSub, Zero}; /// The cost of an A* node. pub trait AStarCost: From + Add + Sub + + SaturatingAdd + SaturatingSub + CheckedAdd + CheckedSub @@ -93,7 +94,7 @@ macro_rules! primitive_cost { type Output = Self; fn add(self, rhs: Self) -> Self::Output { - Self(self.0.checked_add(rhs.0).unwrap()) + Self(self.0.checked_add(rhs.0).unwrap_or_else(|| panic!("Overflow when adding costs {self} + {rhs}"))) } } @@ -101,7 +102,13 @@ macro_rules! primitive_cost { type Output = Self; fn sub(self, rhs: Self) -> Self::Output { - Self(self.0.checked_sub(rhs.0).unwrap()) + Self(self.0.checked_sub(rhs.0).unwrap_or_else(|| panic!("Overflow when subtracting costs {self} - {rhs}"))) + } + } + + impl num_traits::SaturatingAdd for $name { + fn saturating_add(&self, rhs: &Self) -> Self { + Self(self.0.saturating_add(rhs.0)) } } @@ -309,6 +316,12 @@ impl CheckedSub for OrderedPairCost { } } +impl SaturatingAdd for OrderedPairCost { + fn saturating_add(&self, rhs: &Self) -> Self { + Self(self.0.saturating_add(&rhs.0), self.1.saturating_add(&rhs.1)) + } +} + impl SaturatingSub for OrderedPairCost { fn saturating_sub(&self, rhs: &Self) -> Self { Self(self.0.saturating_sub(&rhs.0), self.1.saturating_sub(&rhs.1)) diff --git a/generic_a_star/src/lib.rs b/generic_a_star/src/lib.rs index 163a0b2..f0d57e8 100644 --- a/generic_a_star/src/lib.rs +++ b/generic_a_star/src/lib.rs @@ -422,7 +422,11 @@ impl< let previous_visit = self.closed_list.insert(node.identifier().clone(), node); self.performance_counters.closed_nodes += 1; - debug_assert!(previous_visit.is_none() || !self.context.is_label_setting()); + debug_assert!( + previous_visit.is_none() || !self.context.is_label_setting(), + "Node was visited previously: {}", + previous_visit.unwrap() + ); } let Some(target_identifier) = target_identifier else { diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 5b85572..ffd7464 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -477,7 +477,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { } fn is_label_setting(&self) -> bool { - true + false } } diff --git a/lib_ts_chainalign/src/chain_align/evaluation.rs b/lib_ts_chainalign/src/chain_align/evaluation.rs index f2c4dba..c3cbc98 100644 --- a/lib_ts_chainalign/src/chain_align/evaluation.rs +++ b/lib_ts_chainalign/src/chain_align/evaluation.rs @@ -140,7 +140,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> } alignments.push(alignment); } - current_upper_bound += chaining_cost_function.start_to_end(); + current_upper_bound = + current_upper_bound.saturating_add(&chaining_cost_function.start_to_end()); } ( Identifier::Start, @@ -175,7 +176,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> } alignments.push(alignment); } - current_upper_bound += chaining_cost_function.primary_from_start(index); + current_upper_bound = current_upper_bound + .saturating_add(&chaining_cost_function.primary_from_start(index)); } ( Identifier::Start, @@ -200,8 +202,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> } alignments.push(alignment); } - current_upper_bound += - chaining_cost_function.jump_12_from_start(index, ts_kind); + current_upper_bound = current_upper_bound + .saturating_add(&chaining_cost_function.jump_12_from_start(index, ts_kind)); } (Identifier::PrimaryToPrimary { index, .. }, Identifier::End) => { let start = anchors.primary(index).end(k); @@ -232,7 +234,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); } - current_upper_bound += chaining_cost_function.primary_to_end(index); + current_upper_bound = current_upper_bound + .saturating_add(&chaining_cost_function.primary_to_end(index)); } (Identifier::SecondaryToPrimary { index, ts_kind, .. }, Identifier::End) => { let start = anchors.secondary(index, ts_kind).end(ts_kind, k); @@ -254,7 +257,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); } - current_upper_bound += chaining_cost_function.jump_34_to_end(index, ts_kind); + current_upper_bound = current_upper_bound + .saturating_add(&chaining_cost_function.jump_34_to_end(index, ts_kind)); } ( Identifier::PrimaryToPrimary { @@ -318,7 +322,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); } - current_upper_bound += chaining_cost_function.primary(from_index, to_index); + current_upper_bound = current_upper_bound + .saturating_add(&chaining_cost_function.primary(from_index, to_index)); } ( Identifier::PrimaryToSecondary { @@ -356,8 +361,9 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); } - current_upper_bound += - chaining_cost_function.jump_12(from_index, to_index, ts_kind); + current_upper_bound = current_upper_bound.saturating_add( + &chaining_cost_function.jump_12(from_index, to_index, ts_kind), + ); } ( Identifier::SecondaryToSecondary { @@ -420,8 +426,9 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); } - current_upper_bound += - chaining_cost_function.secondary(from_index, to_index, ts_kind); + current_upper_bound = current_upper_bound.saturating_add( + &chaining_cost_function.secondary(from_index, to_index, ts_kind), + ); } ( Identifier::SecondaryToPrimary { @@ -459,8 +466,9 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); } - current_upper_bound += - chaining_cost_function.jump_34(from_index, to_index, ts_kind); + current_upper_bound = current_upper_bound.saturating_add( + &chaining_cost_function.jump_34(from_index, to_index, ts_kind), + ); } (Identifier::End, _) | (_, Identifier::Start) diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index c84f5ad..a2f1be4 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -584,7 +584,7 @@ impl ChainingCostFunction { if self.is_primary_exact(from_primary_index, to_primary_index) { *total_redundant_gap_fillings += 1; - debug_assert_eq!(self.primary(from_primary_index, to_primary_index), cost,); + debug_assert_eq!(self.primary(from_primary_index, to_primary_index), cost); } else { self.update_primary(from_primary_index, to_primary_index, cost, true); } @@ -608,7 +608,7 @@ impl ChainingCostFunction { if self.is_primary_from_start_exact(to_primary_index) { *total_redundant_gap_fillings += 1; - debug_assert_eq!(self.primary_from_start(to_primary_index), cost,); + debug_assert_eq!(self.primary_from_start(to_primary_index), cost); } else { self.update_primary_from_start(to_primary_index, cost, true); } diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine.rs index 58af794..dc6a79a 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine.rs @@ -48,12 +48,17 @@ impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> start.is_primary() && end.is_primary() || start.is_secondary() && end.is_secondary() ); + if start == end { + return (Cost::zero(), Vec::new().into()); + } + let context = Context::new( self.cost_table, self.sequences, self.rc_fn, start, end, + true, self.max_match_run, ); let mut a_star = AStar::new_with_buffers(context, self.a_star_buffers.take().unwrap()); @@ -73,7 +78,9 @@ impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> additional_primary_targets_output.extend( a_star .iter_closed_nodes() - .filter(|node| node.identifier.coordinates.is_primary()) + .filter(|node| { + node.identifier.coordinates.is_primary() && node.identifier.has_non_match + }) .map(|node| { ( PrimaryAnchor::new_from_start(&node.identifier.coordinates), @@ -84,7 +91,9 @@ impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> additional_secondary_targets_output.extend( a_star .iter_closed_nodes() - .filter(|node| node.identifier.coordinates.is_secondary()) + .filter(|node| { + node.identifier.coordinates.is_secondary() && node.identifier.has_non_match + }) .map(|node| { ( SecondaryAnchor::new_from_start(&node.identifier.coordinates), diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine/algo.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine/algo.rs index c480faf..2537210 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine/algo.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine/algo.rs @@ -20,6 +20,7 @@ pub struct Context<'costs, 'sequences, 'rc_fn, Cost> { rc_fn: &'rc_fn dyn Fn(u8) -> u8, start: AlignmentCoordinates, end: AlignmentCoordinates, + enforce_non_match: bool, max_match_run: u32, } @@ -36,6 +37,7 @@ pub struct Node { pub struct Identifier { pub coordinates: AlignmentCoordinates, gap_type: GapType, + pub has_non_match: bool, } impl<'costs, 'sequences, 'rc_fn, Cost> Context<'costs, 'sequences, 'rc_fn, Cost> { @@ -45,6 +47,7 @@ impl<'costs, 'sequences, 'rc_fn, Cost> Context<'costs, 'sequences, 'rc_fn, Cost> rc_fn: &'rc_fn dyn Fn(u8) -> u8, start: AlignmentCoordinates, end: AlignmentCoordinates, + enforce_non_match: bool, max_match_run: u32, ) -> Self { Self { @@ -53,6 +56,7 @@ impl<'costs, 'sequences, 'rc_fn, Cost> Context<'costs, 'sequences, 'rc_fn, Cost> rc_fn, start, end, + enforce_non_match, max_match_run, } } @@ -66,6 +70,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier { coordinates: self.start, gap_type: GapType::None, + has_non_match: !self.enforce_non_match, }, predecessor: None, predecessor_alignment_type: None, @@ -85,6 +90,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { let Identifier { coordinates, gap_type, + has_non_match, } = *identifier; if coordinates.can_increment_both(self.end, Some(self.sequences)) { @@ -101,6 +107,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier { coordinates: coordinates.increment_both(), gap_type: GapType::None, + has_non_match, }, predecessor, predecessor_alignment_type: Some(AlignmentType::Match), @@ -115,6 +122,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier { coordinates: coordinates.increment_both(), gap_type: GapType::None, + has_non_match: true, }, predecessor, predecessor_alignment_type: Some(AlignmentType::Substitution), @@ -135,6 +143,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier { coordinates: coordinates.increment_a(), gap_type: GapType::InB, + has_non_match: true, }, predecessor, predecessor_alignment_type: Some(AlignmentType::GapB), @@ -154,6 +163,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier { coordinates: coordinates.increment_b(), gap_type: GapType::InA, + has_non_match: true, }, predecessor, predecessor_alignment_type: Some(AlignmentType::GapA), @@ -164,7 +174,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { } fn is_target(&self, node: &Self::Node) -> bool { - node.identifier.coordinates == self.end + node.identifier.coordinates == self.end && node.identifier.has_non_match } fn cost_limit(&self) -> Option<::Cost> { diff --git a/test_files/twin_10_repetitive.fa b/test_files/twin_10_repetitive.fa new file mode 100644 index 0000000..a09783a --- /dev/null +++ b/test_files/twin_10_repetitive.fa @@ -0,0 +1,4 @@ +>reference +ACACAGTATA +>query +ACACAGTATA \ No newline at end of file From c128ed1d10b678d3fb59f92a65270ad4cf72a125 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 13:14:35 +0200 Subject: [PATCH 09/13] Batch 12 jump chain evaluation. --- .../src/chain_align/evaluation.rs | 32 ++++++++- .../src/chaining_cost_function.rs | 66 +++++++++++++++++++ .../src/exact_chaining/ts_12_jump.rs | 36 +++++++++- .../src/exact_chaining/ts_12_jump/algo.rs | 28 +++++++- .../src/exact_chaining/ts_12_jump/tests.rs | 18 ++--- .../src/exact_chaining/ts_34_jump.rs | 2 + 6 files changed, 166 insertions(+), 16 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align/evaluation.rs b/lib_ts_chainalign/src/chain_align/evaluation.rs index c3cbc98..95f4d1d 100644 --- a/lib_ts_chainalign/src/chain_align/evaluation.rs +++ b/lib_ts_chainalign/src/chain_align/evaluation.rs @@ -188,8 +188,14 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_jump_12_from_start_exact(index, ts_kind) { - let (cost, alignment) = self.ts_12_jump_aligner.align(start, end); - self.total_gap_fillings += 1; + self.additional_secondary_targets_buffer.clear(); + let (cost, alignment) = self.ts_12_jump_aligner.align( + start, + end, + &mut self.additional_secondary_targets_buffer, + ); + self.total_gap_fillings += + u64::try_from(self.additional_secondary_targets_buffer.len()).unwrap(); trace!( "Aligning from start to S{}[{index}]{} costs {}", ts_kind.digits(), @@ -199,6 +205,12 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if !final_evaluation { chaining_cost_function .update_jump_12_from_start(index, ts_kind, cost, true); + chaining_cost_function.update_additional_12_jump_targets_from_start( + &mut self.additional_secondary_targets_buffer, + ts_kind, + anchors, + &mut self.total_redundant_gap_fillings, + ); } alignments.push(alignment); } @@ -345,11 +357,25 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_jump_12_exact(from_index, to_index, ts_kind) { - let (cost, alignment) = self.ts_12_jump_aligner.align(start, end); + self.additional_primary_targets_buffer.clear(); + let (cost, alignment) = self.ts_12_jump_aligner.align( + start, + end, + &mut self.additional_secondary_targets_buffer, + ); + self.total_gap_fillings += + u64::try_from(self.additional_primary_targets_buffer.len()).unwrap(); self.total_gap_fillings += 1; if !final_evaluation { chaining_cost_function .update_jump_12(from_index, to_index, ts_kind, cost, true); + chaining_cost_function.update_additional_12_jump_targets( + from_index, + &mut self.additional_secondary_targets_buffer, + ts_kind, + anchors, + &mut self.total_redundant_gap_fillings, + ); } trace!( "Aligning from P{from_index}{} to S{}[{to_index}]{} costs {}", diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index a2f1be4..8cec5f4 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -652,4 +652,70 @@ impl ChainingCostFunction { } } } + + pub fn update_additional_12_jump_targets( + &mut self, + from_secondary_index: AnchorIndex, + additional_targets: &mut [(SecondaryAnchor, Cost)], + ts_kind: TsKind, + anchors: &Anchors, + total_redundant_gap_fillings: &mut u64, + ) { + additional_targets.sort_unstable(); + for (to_anchor_index, cost) in anchors + .secondary_anchor_to_index_iter( + additional_targets.iter().map(|(anchor, _)| *anchor), + ts_kind, + ) + .zip(additional_targets.iter().map(|(_, cost)| *cost)) + { + let Some(to_secondary_index) = to_anchor_index else { + continue; + }; + + if self.is_jump_12_exact(from_secondary_index, to_secondary_index, ts_kind) { + *total_redundant_gap_fillings += 1; + debug_assert_eq!( + self.jump_12(from_secondary_index, to_secondary_index, ts_kind), + cost, + ); + } else { + self.update_jump_12( + from_secondary_index, + to_secondary_index, + ts_kind, + cost, + true, + ); + } + } + } + + pub fn update_additional_12_jump_targets_from_start( + &mut self, + additional_targets: &mut [(SecondaryAnchor, Cost)], + ts_kind: TsKind, + anchors: &Anchors, + total_redundant_gap_fillings: &mut u64, + ) { + additional_targets.sort_unstable(); + for (to_anchor_index, cost) in anchors + .secondary_anchor_to_index_iter( + additional_targets.iter().map(|(anchor, _)| *anchor), + ts_kind, + ) + .zip(additional_targets.iter().map(|(_, cost)| *cost)) + { + let Some(to_secondary_index) = to_anchor_index else { + continue; + }; + + if self.is_jump_12_from_start_exact(to_secondary_index, ts_kind) { + *total_redundant_gap_fillings += 1; + debug_assert_eq!(self.jump_12_from_start(to_secondary_index, ts_kind), cost,); + } else { + self.update_jump_12_from_start(to_secondary_index, ts_kind, cost, true); + } + } + } } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs index ac19bff..79f6656 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs @@ -1,7 +1,11 @@ use generic_a_star::{AStar, AStarBuffers, AStarResult, cost::AStarCost}; use crate::{ - alignment::{Alignment, coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, + alignment::{ + Alignment, coordinates::AlignmentCoordinates, sequences::AlignmentSequences, + ts_kind::TsDescendant, + }, + anchors::secondary::SecondaryAnchor, costs::AlignmentCosts, exact_chaining::ts_12_jump::algo::{Context, Node}, }; @@ -40,30 +44,56 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> &mut self, start: AlignmentCoordinates, end: AlignmentCoordinates, + additional_secondary_targets_output: &mut impl Extend<(SecondaryAnchor, Cost)>, ) -> (Cost, Alignment) { assert!(start.is_primary()); assert!(end.is_secondary()); + // Enfore non-match if there is a gap between the anchors in the descendant. + // This is to match the lower-bound computation. + // It also discourages chains to deviate from the alignment geometry boundaries. + let descendant_start = match end.ts_kind().unwrap().descendant { + TsDescendant::Seq1 => start.primary_ordinate_a(), + TsDescendant::Seq2 => start.primary_ordinate_b(), + } + .unwrap(); + let enforce_non_match = descendant_start != end.secondary_ordinate_descendant().unwrap(); + let context = Context::new( self.alignment_costs, self.sequences, self.rc_fn, start, end, + enforce_non_match, self.max_match_run, ); let mut a_star = AStar::<_>::new_with_buffers(context, self.a_star_buffers.take().unwrap()); a_star.initialise(); - let result = match a_star.search() { + let (cost, alignment) = match a_star.search() { AStarResult::FoundTarget { cost, .. } => (cost.0, a_star.reconstruct_path().into()), AStarResult::ExceededCostLimit { .. } => unreachable!("Cost limit is None"), AStarResult::ExceededMemoryLimit { .. } => unreachable!("Cost limit is None"), AStarResult::NoTarget => (Cost::max_value(), Vec::new().into()), }; + a_star.search_until(|_, node| node.cost > cost); + additional_secondary_targets_output.extend( + a_star + .iter_closed_nodes() + .filter(|node| { + node.identifier.coordinates().is_secondary() && node.identifier.has_non_match() + }) + .map(|node| { + ( + SecondaryAnchor::new_from_start(&node.identifier.coordinates()), + node.cost, + ) + }), + ); self.a_star_buffers = Some(a_star.into_buffers()); - result + (cost, alignment) } } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/algo.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/algo.rs index cef1a7c..d4587ed 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/algo.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/algo.rs @@ -20,6 +20,7 @@ pub struct Context<'costs, 'sequences, 'rc_fn, Cost> { rc_fn: &'rc_fn dyn Fn(u8) -> u8, start: AlignmentCoordinates, end: AlignmentCoordinates, + enforce_non_match: bool, max_match_run: u32, } @@ -37,13 +38,16 @@ pub enum Identifier { Primary { coordinates: AlignmentCoordinates, gap_type: GapType, + has_non_match: bool, }, Jump12 { coordinates: AlignmentCoordinates, + has_non_match: bool, }, Secondary { coordinates: AlignmentCoordinates, gap_type: GapType, + has_non_match: bool, }, } @@ -54,6 +58,7 @@ impl<'costs, 'sequences, 'rc_fn, Cost> Context<'costs, 'sequences, 'rc_fn, Cost> rc_fn: &'rc_fn dyn Fn(u8) -> u8, start: AlignmentCoordinates, end: AlignmentCoordinates, + enforce_non_match: bool, max_match_run: u32, ) -> Self { assert!(start.is_primary()); @@ -65,6 +70,7 @@ impl<'costs, 'sequences, 'rc_fn, Cost> Context<'costs, 'sequences, 'rc_fn, Cost> rc_fn, start, end, + enforce_non_match, max_match_run, } } @@ -78,6 +84,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier::Primary { coordinates: self.start, gap_type: GapType::None, + has_non_match: !self.enforce_non_match, }, predecessor: None, predecessor_alignment_type: None, @@ -96,6 +103,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { let predecessor = Some(*identifier); let coordinates = identifier.coordinates(); + let has_non_match = identifier.has_non_match(); let gap_type = identifier.gap_type(); let is_primary = matches!(identifier, Identifier::Primary { .. }); let gap_affine_costs = if is_primary { @@ -120,6 +128,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { is_primary, coordinates.increment_both(), GapType::None, + has_non_match, ), predecessor, predecessor_alignment_type: Some(AlignmentType::Match), @@ -135,6 +144,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { is_primary, coordinates.increment_both(), GapType::None, + true, ), predecessor, predecessor_alignment_type: Some(AlignmentType::Substitution), @@ -156,6 +166,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { is_primary, coordinates.increment_a(), GapType::InB, + true, ), predecessor, predecessor_alignment_type: Some(AlignmentType::GapB), @@ -176,6 +187,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { is_primary, coordinates.increment_b(), GapType::InA, + true, ), predecessor, predecessor_alignment_type: Some(AlignmentType::GapA), @@ -193,7 +205,10 @@ impl AStarContext for Context<'_, '_, '_, Cost> { coordinates .generate_12_jumps(self.end, self.sequences.end()) .map(|(jump, coordinates)| Node { - identifier: Identifier::Jump12 { coordinates }, + identifier: Identifier::Jump12 { + coordinates, + has_non_match, + }, predecessor, predecessor_alignment_type: Some(AlignmentType::TsStart { jump, @@ -263,16 +278,19 @@ impl Identifier { is_primary: bool, coordinates: AlignmentCoordinates, gap_type: GapType, + has_non_match: bool, ) -> Self { if is_primary { Identifier::Primary { coordinates, gap_type, + has_non_match, } } else { Identifier::Secondary { coordinates, gap_type, + has_non_match, } } } @@ -292,6 +310,14 @@ impl Identifier { Identifier::Secondary { gap_type, .. } => *gap_type, } } + + pub fn has_non_match(&self) -> bool { + match self { + Identifier::Primary { has_non_match, .. } => *has_non_match, + Identifier::Jump12 { has_non_match, .. } => *has_non_match, + Identifier::Secondary { has_non_match, .. } => *has_non_match, + } + } } impl Display for Node { diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs index 98ecb00..0731c77 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs @@ -47,7 +47,7 @@ fn test_start_end() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_secondary(0, 5, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -96,7 +96,7 @@ fn test_partial_alignment() { let start = AlignmentCoordinates::new_primary(1, 1); let end = AlignmentCoordinates::new_secondary(1, 4, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -144,7 +144,7 @@ fn test_gap_directions() { let start = AlignmentCoordinates::new_primary(9, 0); let end = AlignmentCoordinates::new_secondary(0, 18, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -199,7 +199,7 @@ fn test_max_match_run_0() { let start = AlignmentCoordinates::new_primary(8, 0); let end = AlignmentCoordinates::new_secondary(0, 16, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 0); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -245,7 +245,7 @@ fn test_max_match_run_1() { let start = AlignmentCoordinates::new_primary(8, 0); let end = AlignmentCoordinates::new_secondary(0, 16, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 1); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -295,7 +295,7 @@ fn test_max_match_run_2() { let start = AlignmentCoordinates::new_primary(8, 0); let end = AlignmentCoordinates::new_secondary(0, 16, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -350,7 +350,7 @@ fn test_only_jump() { let start = AlignmentCoordinates::new_primary(5, 6); let end = AlignmentCoordinates::new_secondary(3, 6, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -393,7 +393,7 @@ fn test_only_jump_start() { let start = AlignmentCoordinates::new_primary(0, 0); let end = AlignmentCoordinates::new_secondary(0, 0, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -436,7 +436,7 @@ fn test_only_jump_end() { let start = AlignmentCoordinates::new_primary(10, 10); let end = AlignmentCoordinates::new_secondary(10, 10, TsKind::TS12); let mut aligner = Ts12JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs index d81adbc..34ef40a 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs @@ -44,6 +44,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> assert!(start.is_secondary()); assert!(end.is_primary()); + // Do not enforce non-match, because TS geometry may cause match-only jump alignments. + let context = Context::new( self.alignment_costs, self.sequences, From deae26cd0d9bfc2c2a38db6b39a1fcc2327692bc Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 13:33:43 +0200 Subject: [PATCH 10/13] Batch 34 jump chain evaluation. --- .../src/chain_align/evaluation.rs | 37 ++++++++++++--- .../src/chaining_cost_function.rs | 29 ++++++++++++ .../src/exact_chaining/ts_12_jump.rs | 3 +- .../src/exact_chaining/ts_34_jump.rs | 45 ++++++++++++++++--- .../src/exact_chaining/ts_34_jump/algo.rs | 28 +++++++++++- .../src/exact_chaining/ts_34_jump/tests.rs | 18 ++++---- 6 files changed, 137 insertions(+), 23 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align/evaluation.rs b/lib_ts_chainalign/src/chain_align/evaluation.rs index 95f4d1d..d7d62d0 100644 --- a/lib_ts_chainalign/src/chain_align/evaluation.rs +++ b/lib_ts_chainalign/src/chain_align/evaluation.rs @@ -254,8 +254,14 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_jump_34_to_end_exact(index, ts_kind) { - let (cost, alignment) = self.ts_34_jump_aligner.align(start, end); - self.total_gap_fillings += 1; + self.additional_primary_targets_buffer.clear(); + let (cost, alignment) = self.ts_34_jump_aligner.align( + start, + end, + &mut self.additional_primary_targets_buffer, + ); + self.total_gap_fillings += + u64::try_from(self.additional_primary_targets_buffer.len()).unwrap(); trace!( "Aligning from S{}[{index}]{} to end costs {}", ts_kind.digits(), @@ -265,6 +271,13 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if !final_evaluation { chaining_cost_function .update_jump_34_to_end(index, ts_kind, cost, true); + chaining_cost_function.update_additional_34_jump_targets( + index, + &mut self.additional_primary_targets_buffer, + ts_kind, + anchors, + &mut self.total_redundant_gap_fillings, + ); } alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); @@ -357,14 +370,14 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_jump_12_exact(from_index, to_index, ts_kind) { - self.additional_primary_targets_buffer.clear(); + self.additional_secondary_targets_buffer.clear(); let (cost, alignment) = self.ts_12_jump_aligner.align( start, end, &mut self.additional_secondary_targets_buffer, ); self.total_gap_fillings += - u64::try_from(self.additional_primary_targets_buffer.len()).unwrap(); + u64::try_from(self.additional_secondary_targets_buffer.len()).unwrap(); self.total_gap_fillings += 1; if !final_evaluation { chaining_cost_function @@ -474,7 +487,14 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_jump_34_exact(from_index, to_index, ts_kind) { - let (cost, alignment) = self.ts_34_jump_aligner.align(start, end); + self.additional_primary_targets_buffer.clear(); + let (cost, alignment) = self.ts_34_jump_aligner.align( + start, + end, + &mut self.additional_primary_targets_buffer, + ); + self.total_gap_fillings += + u64::try_from(self.additional_primary_targets_buffer.len()).unwrap(); self.total_gap_fillings += 1; trace!( "Aligning from S{}[{from_index}]{} to P{to_index}{} (S{} to P{}) costs {}", @@ -488,6 +508,13 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if !final_evaluation { chaining_cost_function .update_jump_34(from_index, to_index, ts_kind, cost, true); + chaining_cost_function.update_additional_34_jump_targets( + from_index, + &mut self.additional_primary_targets_buffer, + ts_kind, + anchors, + &mut self.total_redundant_gap_fillings, + ); } alignments.push(iter::repeat_n(AlignmentType::Match, k).collect()); alignments.push(alignment); diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 8cec5f4..3d6a5fb 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -718,4 +718,33 @@ impl ChainingCostFunction { } } } + + pub fn update_additional_34_jump_targets( + &mut self, + from_secondary_index: AnchorIndex, + additional_targets: &mut [(PrimaryAnchor, Cost)], + ts_kind: TsKind, + anchors: &Anchors, + total_redundant_gap_fillings: &mut u64, + ) { + additional_targets.sort_unstable(); + for (to_anchor_index, cost) in anchors + .primary_anchor_to_index_iter(additional_targets.iter().map(|(anchor, _)| *anchor)) + .zip(additional_targets.iter().map(|(_, cost)| *cost)) + { + let Some(to_primary_index) = to_anchor_index else { + continue; + }; + + if self.is_jump_34_exact(from_secondary_index, to_primary_index, ts_kind) { + *total_redundant_gap_fillings += 1; + debug_assert_eq!( + self.jump_34(from_secondary_index, to_primary_index, ts_kind), + cost, + ); + } else { + self.update_jump_34(from_secondary_index, to_primary_index, ts_kind, cost, true); + } + } + } } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs index 79f6656..0f8d1fc 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs @@ -83,7 +83,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> a_star .iter_closed_nodes() .filter(|node| { - node.identifier.coordinates().is_secondary() && node.identifier.has_non_match() + node.identifier.coordinates().is_secondary() + && (node.identifier.has_non_match() || !enforce_non_match) }) .map(|node| { ( diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs index 34ef40a..12f9143 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs @@ -1,7 +1,11 @@ use generic_a_star::{AStar, AStarBuffers, AStarResult, cost::AStarCost}; use crate::{ - alignment::{Alignment, coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, + alignment::{ + Alignment, coordinates::AlignmentCoordinates, sequences::AlignmentSequences, + ts_kind::TsDescendant, + }, + anchors::primary::PrimaryAnchor, costs::AlignmentCosts, exact_chaining::ts_34_jump::algo::{Context, Node}, }; @@ -40,11 +44,20 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> &mut self, start: AlignmentCoordinates, end: AlignmentCoordinates, + additional_primary_targets_output: &mut impl Extend<(PrimaryAnchor, Cost)>, ) -> (Cost, Alignment) { assert!(start.is_secondary()); assert!(end.is_primary()); - // Do not enforce non-match, because TS geometry may cause match-only jump alignments. + // Enfore non-match if there is a gap between the anchors in the descendant. + // This is to match the lower-bound computation. + // It also discourages chains to deviate from the alignment geometry boundaries. + let descendant_end = match start.ts_kind().unwrap().descendant { + TsDescendant::Seq1 => end.primary_ordinate_a(), + TsDescendant::Seq2 => end.primary_ordinate_b(), + } + .unwrap(); + let enforce_non_match = descendant_end != start.secondary_ordinate_descendant().unwrap(); let context = Context::new( self.alignment_costs, @@ -52,16 +65,15 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> self.rc_fn, start, end, + enforce_non_match, self.max_match_run, ); let mut a_star = AStar::<_>::new_with_buffers(context, self.a_star_buffers.take().unwrap()); a_star.initialise(); - let result = match a_star.search() { + let (cost, alignment) = match a_star.search() { AStarResult::FoundTarget { cost, .. } => { - // The TS base cost is applied at the 12-jump, but we anyways apply it in this algorithm to make it label-setting if the base cost is non-zero. - // But since the 34-jump has zero cost, we subtract it again. - let cost = cost.0 - self.alignment_costs.ts_base_cost; + let cost = cost.0; let alignment = a_star.reconstruct_path().into(); (cost, alignment) @@ -71,8 +83,27 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> AStarResult::NoTarget => (Cost::max_value(), Vec::new().into()), }; + a_star.search_until(|_, node| node.cost > cost); + additional_primary_targets_output.extend( + a_star + .iter_closed_nodes() + .filter(|node| { + node.identifier.coordinates().is_primary() + && (node.identifier.has_non_match() || !enforce_non_match) + }) + .map(|node| { + ( + PrimaryAnchor::new_from_start(&node.identifier.coordinates()), + // The TS base cost is applied at the 12-jump, but we anyways apply it in this algorithm to make it label-setting if the base cost is non-zero. + // But since the 34-jump has zero cost, we subtract it again. + node.cost - self.alignment_costs.ts_base_cost, + ) + }), + ); self.a_star_buffers = Some(a_star.into_buffers()); - result + // The TS base cost is applied at the 12-jump, but we anyways apply it in this algorithm to make it label-setting if the base cost is non-zero. + // But since the 34-jump has zero cost, we subtract it again. + (cost - self.alignment_costs.ts_base_cost, alignment) } } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/algo.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/algo.rs index c8ab2d5..ae10c92 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/algo.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/algo.rs @@ -22,6 +22,7 @@ pub struct Context<'costs, 'sequences, 'rc_fn, Cost> { rc_fn: &'rc_fn dyn Fn(u8) -> u8, start: AlignmentCoordinates, end: AlignmentCoordinates, + enforce_non_match: bool, max_match_run: u32, } @@ -39,13 +40,16 @@ pub enum Identifier { Primary { coordinates: AlignmentCoordinates, gap_type: GapType, + has_non_match: bool, }, Jump34 { coordinates: AlignmentCoordinates, + has_non_match: bool, }, Secondary { coordinates: AlignmentCoordinates, gap_type: GapType, + has_non_match: bool, }, } @@ -56,6 +60,7 @@ impl<'costs, 'sequences, 'rc_fn, Cost> Context<'costs, 'sequences, 'rc_fn, Cost> rc_fn: &'rc_fn dyn Fn(u8) -> u8, start: AlignmentCoordinates, end: AlignmentCoordinates, + enforce_non_match: bool, max_match_run: u32, ) -> Self { assert!(start.is_secondary()); @@ -67,6 +72,7 @@ impl<'costs, 'sequences, 'rc_fn, Cost> Context<'costs, 'sequences, 'rc_fn, Cost> rc_fn, start, end, + enforce_non_match, max_match_run, } } @@ -80,6 +86,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier::Secondary { coordinates: self.start, gap_type: GapType::None, + has_non_match: !self.enforce_non_match, }, predecessor: None, predecessor_alignment_type: None, @@ -98,6 +105,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { let predecessor = Some(*identifier); let coordinates = identifier.coordinates(); + let has_non_match = identifier.has_non_match(); let gap_type = identifier.gap_type(); let is_secondary = matches!(identifier, Identifier::Secondary { .. }); let gap_affine_costs = if is_secondary { @@ -122,6 +130,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { !is_secondary, coordinates.increment_both(), GapType::None, + has_non_match, ), predecessor, predecessor_alignment_type: Some(AlignmentType::Match), @@ -137,6 +146,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { !is_secondary, coordinates.increment_both(), GapType::None, + true, ), predecessor, predecessor_alignment_type: Some(AlignmentType::Substitution), @@ -158,6 +168,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { !is_secondary, coordinates.increment_a(), GapType::InB, + true, ), predecessor, predecessor_alignment_type: Some(AlignmentType::GapB), @@ -178,6 +189,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { !is_secondary, coordinates.increment_b(), GapType::InA, + true, ), predecessor, predecessor_alignment_type: Some(AlignmentType::GapA), @@ -204,7 +216,10 @@ impl AStarContext for Context<'_, '_, '_, Cost> { ); } Node { - identifier: Identifier::Jump34 { coordinates }, + identifier: Identifier::Jump34 { + coordinates, + has_non_match, + }, predecessor, predecessor_alignment_type: Some(AlignmentType::TsEnd { jump }), cost: new_cost, @@ -272,16 +287,19 @@ impl Identifier { is_primary: bool, coordinates: AlignmentCoordinates, gap_type: GapType, + has_non_match: bool, ) -> Self { if is_primary { Identifier::Primary { coordinates, gap_type, + has_non_match, } } else { Identifier::Secondary { coordinates, gap_type, + has_non_match, } } } @@ -301,6 +319,14 @@ impl Identifier { Identifier::Secondary { gap_type, .. } => *gap_type, } } + + pub fn has_non_match(&self) -> bool { + match self { + Identifier::Primary { has_non_match, .. } => *has_non_match, + Identifier::Jump34 { has_non_match, .. } => *has_non_match, + Identifier::Secondary { has_non_match, .. } => *has_non_match, + } + } } impl Display for Node { diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs index 9189327..1efcf4c 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs @@ -47,7 +47,7 @@ fn test_start_end() { let start: AlignmentCoordinates = AlignmentCoordinates::new_secondary(2, 0, TsKind::TS12); let end = AlignmentCoordinates::new_primary(4, 5); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -90,7 +90,7 @@ fn test_partial_alignment() { let start: AlignmentCoordinates = AlignmentCoordinates::new_secondary(1, 1, TsKind::TS12); let end = AlignmentCoordinates::new_primary(3, 4); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -132,7 +132,7 @@ fn test_gap_directions() { let start = AlignmentCoordinates::new_secondary(18, 0, TsKind::TS21); let end = AlignmentCoordinates::new_primary(18, 9); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -181,7 +181,7 @@ fn test_max_match_run_0() { let start = AlignmentCoordinates::new_secondary(8, 0, TsKind::TS12); let end = AlignmentCoordinates::new_primary(16, 16); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 0); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -221,7 +221,7 @@ fn test_max_match_run_1() { let start = AlignmentCoordinates::new_secondary(8, 0, TsKind::TS12); let end = AlignmentCoordinates::new_primary(16, 16); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 1); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -269,7 +269,7 @@ fn test_max_match_run_2() { let start = AlignmentCoordinates::new_secondary(8, 0, TsKind::TS12); let end = AlignmentCoordinates::new_primary(16, 16); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -318,7 +318,7 @@ fn test_only_jump() { let start = AlignmentCoordinates::new_secondary(2, 8, TsKind::TS21); let end = AlignmentCoordinates::new_primary(8, 8); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -355,7 +355,7 @@ fn test_only_jump_start() { let start = AlignmentCoordinates::new_secondary(0, 0, TsKind::TS21); let end = AlignmentCoordinates::new_primary(0, 0); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, @@ -392,7 +392,7 @@ fn test_only_jump_end() { let start = AlignmentCoordinates::new_secondary(10, 10, TsKind::TS21); let end = AlignmentCoordinates::new_primary(10, 10); let mut aligner = Ts34JumpAligner::new(&sequences, &cost_table, &rc_fn, 2); - let (cost, alignment) = aligner.align(start, end); + let (cost, alignment) = aligner.align(start, end, &mut Vec::new()); assert_eq!( alignment.alignment, From a10b00da6cf9e57499ab50f9883a368e66d5ea79 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 13:38:48 +0200 Subject: [PATCH 11/13] Update tests. --- .../src/exact_chaining/gap_affine/tests.rs | 18 ++++++++++++------ .../src/exact_chaining/ts_12_jump/tests.rs | 6 ++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs index b6b2ee1..13c72fe 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs @@ -230,7 +230,7 @@ fn test_max_match_run_2() { #[test] fn test_secondary_12() { - let seq1 = b"GAAAAAAAAG".to_vec(); + let seq1 = b"GAAAAAAATG".to_vec(); let seq2 = b"GTTTTTTTTG".to_vec(); let sequences = AlignmentSequences::new(seq1, seq2); let cost_table = @@ -241,13 +241,16 @@ fn test_secondary_12() { let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); - assert_eq!(alignment.alignment, vec![(8, AlignmentType::Match),]); - assert_eq!(cost, U32Cost::from(0u8)); + assert_eq!( + alignment.alignment, + vec![(1, AlignmentType::Substitution), (7, AlignmentType::Match)] + ); + assert_eq!(cost, U32Cost::from(2u8)); } #[test] fn test_secondary_21() { - let seq1 = b"GAAAAAAAAG".to_vec(); + let seq1 = b"GAAAAAAATG".to_vec(); let seq2 = b"GTTTTTTTTG".to_vec(); let sequences = AlignmentSequences::new(seq1, seq2); let cost_table = @@ -258,6 +261,9 @@ fn test_secondary_21() { let mut aligner = GapAffineAligner::new(&sequences, &cost_table, &rc_fn, u32::MAX); let (cost, alignment) = aligner.align(start, end, &mut Vec::new(), &mut Vec::new()); - assert_eq!(alignment.alignment, vec![(8, AlignmentType::Match),]); - assert_eq!(cost, U32Cost::from(0u8)); + assert_eq!( + alignment.alignment, + vec![(7, AlignmentType::Match), (1, AlignmentType::Substitution)] + ); + assert_eq!(cost, U32Cost::from(2u8)); } diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs index 0731c77..897f53e 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs @@ -251,13 +251,11 @@ fn test_max_match_run_1() { alignment.alignment, vec![ (1, AlignmentType::Match), - (12, AlignmentType::GapA), - (1, AlignmentType::Substitution), - (1, AlignmentType::Match), + (14, AlignmentType::GapA), ( 1, AlignmentType::TsStart { - jump: -10, + jump: -8, ts_kind: TsKind::TS12 } ), From 3d02f71c78083825258246367a5d0debf5d4a2cf Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 17 Dec 2025 13:56:29 +0200 Subject: [PATCH 12/13] Do not adjust Cost::max_value() in Ts34JumpAligner. --- .../src/exact_chaining/ts_34_jump.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs index 12f9143..cc37652 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs @@ -96,7 +96,11 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> PrimaryAnchor::new_from_start(&node.identifier.coordinates()), // The TS base cost is applied at the 12-jump, but we anyways apply it in this algorithm to make it label-setting if the base cost is non-zero. // But since the 34-jump has zero cost, we subtract it again. - node.cost - self.alignment_costs.ts_base_cost, + if node.cost == Cost::max_value() { + Cost::max_value() + } else { + node.cost - self.alignment_costs.ts_base_cost + }, ) }), ); @@ -104,6 +108,13 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> // The TS base cost is applied at the 12-jump, but we anyways apply it in this algorithm to make it label-setting if the base cost is non-zero. // But since the 34-jump has zero cost, we subtract it again. - (cost - self.alignment_costs.ts_base_cost, alignment) + ( + if cost == Cost::max_value() { + Cost::max_value() + } else { + cost - self.alignment_costs.ts_base_cost + }, + alignment, + ) } } From e0f3b79bfac596c05c10cb07a36d46603ff3790c Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Thu, 18 Dec 2025 12:03:55 +0200 Subject: [PATCH 13/13] Fix wrong exact alignment computation regarding non-matches. Closes #130 --- lib_ts_chainalign/src/anchors/primary.rs | 1 + .../src/chaining_cost_function.rs | 22 ++++++++++--------- .../src/exact_chaining/gap_affine.rs | 6 +++-- .../src/exact_chaining/gap_affine/algo.rs | 5 +++-- .../src/exact_chaining/ts_12_jump.rs | 8 ++++++- .../src/exact_chaining/ts_12_jump/algo.rs | 3 ++- .../src/exact_chaining/ts_34_jump.rs | 12 +++++++++- .../src/exact_chaining/ts_34_jump/algo.rs | 3 ++- 8 files changed, 42 insertions(+), 18 deletions(-) diff --git a/lib_ts_chainalign/src/anchors/primary.rs b/lib_ts_chainalign/src/anchors/primary.rs index 2f9add5..bcbde19 100644 --- a/lib_ts_chainalign/src/anchors/primary.rs +++ b/lib_ts_chainalign/src/anchors/primary.rs @@ -78,6 +78,7 @@ impl PrimaryAnchor { .unwrap_or_else(|| panic!("self: {self}, end: {end}, k: {k}")) } + /// Returns the gap in the descendant for the 12-jump from this anchor to the given anchor. pub fn chaining_jump_gap( &self, second: &SecondaryAnchor, diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 3d6a5fb..cd981ad 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -655,7 +655,7 @@ impl ChainingCostFunction { pub fn update_additional_12_jump_targets( &mut self, - from_secondary_index: AnchorIndex, + from_primary_index: AnchorIndex, additional_targets: &mut [(SecondaryAnchor, Cost)], ts_kind: TsKind, anchors: &Anchors, @@ -673,20 +673,17 @@ impl ChainingCostFunction { continue; }; - if self.is_jump_12_exact(from_secondary_index, to_secondary_index, ts_kind) { + if self.is_jump_12_exact(from_primary_index, to_secondary_index, ts_kind) { *total_redundant_gap_fillings += 1; debug_assert_eq!( - self.jump_12(from_secondary_index, to_secondary_index, ts_kind), + self.jump_12(from_primary_index, to_secondary_index, ts_kind), cost, + "Jump12: Previous exact cost was {} but additional target cost is {cost}.\nFrom P-{from_primary_index} to S{}-{to_secondary_index}.", + self.jump_12(from_primary_index, to_secondary_index, ts_kind), + ts_kind.digits(), ); } else { - self.update_jump_12( - from_secondary_index, - to_secondary_index, - ts_kind, - cost, - true, - ); + self.update_jump_12(from_primary_index, to_secondary_index, ts_kind, cost, true); } } } @@ -741,6 +738,11 @@ impl ChainingCostFunction { debug_assert_eq!( self.jump_34(from_secondary_index, to_primary_index, ts_kind), cost, + "Jump12: Previous exact cost was {} but additional target cost is {cost}.\nFrom S{}-{from_secondary_index}{} to P-{to_primary_index}{}.", + self.jump_34(from_secondary_index, to_primary_index, ts_kind), + ts_kind.digits(), + anchors.secondary(from_secondary_index, ts_kind), + anchors.primary(to_primary_index), ); } else { self.update_jump_34(from_secondary_index, to_primary_index, ts_kind, cost, true); diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine.rs index dc6a79a..46a6ba4 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine.rs @@ -79,7 +79,8 @@ impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> a_star .iter_closed_nodes() .filter(|node| { - node.identifier.coordinates.is_primary() && node.identifier.has_non_match + node.identifier.coordinates.is_primary() + && (node.identifier.has_non_match == (start != node.identifier.coordinates)) }) .map(|node| { ( @@ -92,7 +93,8 @@ impl<'sequences, 'cost_table, 'rc_fn, Cost: AStarCost> a_star .iter_closed_nodes() .filter(|node| { - node.identifier.coordinates.is_secondary() && node.identifier.has_non_match + node.identifier.coordinates.is_secondary() + && (node.identifier.has_non_match == (start != node.identifier.coordinates)) }) .map(|node| { ( diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine/algo.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine/algo.rs index 2537210..22d9697 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine/algo.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine/algo.rs @@ -70,7 +70,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier { coordinates: self.start, gap_type: GapType::None, - has_non_match: !self.enforce_non_match, + has_non_match: false, }, predecessor: None, predecessor_alignment_type: None, @@ -174,7 +174,8 @@ impl AStarContext for Context<'_, '_, '_, Cost> { } fn is_target(&self, node: &Self::Node) -> bool { - node.identifier.coordinates == self.end && node.identifier.has_non_match + node.identifier.coordinates == self.end + && (node.identifier.has_non_match || !self.enforce_non_match) } fn cost_limit(&self) -> Option<::Cost> { diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs index 0f8d1fc..cea55dc 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs @@ -84,7 +84,13 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> .iter_closed_nodes() .filter(|node| { node.identifier.coordinates().is_secondary() - && (node.identifier.has_non_match() || !enforce_non_match) + && (node.identifier.has_non_match() + != (descendant_start + == node + .identifier + .coordinates() + .secondary_ordinate_descendant() + .unwrap())) }) .map(|node| { ( diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/algo.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/algo.rs index d4587ed..049c075 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/algo.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/algo.rs @@ -84,7 +84,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier::Primary { coordinates: self.start, gap_type: GapType::None, - has_non_match: !self.enforce_non_match, + has_non_match: false, }, predecessor: None, predecessor_alignment_type: None, @@ -223,6 +223,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { fn is_target(&self, node: &Self::Node) -> bool { node.identifier.coordinates() == self.end + && (node.identifier.has_non_match() || !self.enforce_non_match) } fn cost_limit(&self) -> Option<::Cost> { diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs index cc37652..4e281b1 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump.rs @@ -57,6 +57,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> TsDescendant::Seq2 => end.primary_ordinate_b(), } .unwrap(); + let descendant_start = start.secondary_ordinate_descendant().unwrap(); let enforce_non_match = descendant_end != start.secondary_ordinate_descendant().unwrap(); let context = Context::new( @@ -89,7 +90,16 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> .iter_closed_nodes() .filter(|node| { node.identifier.coordinates().is_primary() - && (node.identifier.has_non_match() || !enforce_non_match) + && (node.identifier.has_non_match() + != (descendant_start + == match start.ts_kind().unwrap().descendant { + TsDescendant::Seq1 => { + node.identifier.coordinates().primary_ordinate_a().unwrap() + } + TsDescendant::Seq2 => { + node.identifier.coordinates().primary_ordinate_b().unwrap() + } + })) }) .map(|node| { ( diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/algo.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/algo.rs index ae10c92..dd51e6b 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/algo.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/algo.rs @@ -86,7 +86,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { identifier: Identifier::Secondary { coordinates: self.start, gap_type: GapType::None, - has_non_match: !self.enforce_non_match, + has_non_match: false, }, predecessor: None, predecessor_alignment_type: None, @@ -232,6 +232,7 @@ impl AStarContext for Context<'_, '_, '_, Cost> { fn is_target(&self, node: &Self::Node) -> bool { node.identifier.coordinates() == self.end + && (node.identifier.has_non_match() || !self.enforce_non_match) } fn cost_limit(&self) -> Option<::Cost> {