From 8aa5598e2273fdd4d153c9e68d7a8c6221e6ea8d Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 12 Jan 2026 13:56:10 +0200 Subject: [PATCH 1/4] Better logging. --- lib_ts_chainalign/src/chain_align.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index 5cd7a501..1db27427 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -19,7 +19,7 @@ use lib_tsalign::a_star_aligner::{ alignment_result::AlignmentResult, template_switch_distance::{EqualCostRange, TemplateSwitchDirection}, }; -use log::{debug, trace}; +use log::{debug, info, trace}; use rustc_hash::FxHashMapSeed; use std::{ fmt::Write, @@ -148,6 +148,7 @@ fn actually_align< anchors: &Anchors, chaining_cost_function: &mut ChainingCostFunction, ) -> AlignmentResult { + info!("Aligning..."); let progress_bar = ProgressBar::new_spinner(); progress_bar.enable_steady_tick(Duration::from_millis(200)); From 7f7cdc1a7fd31969ca855c9aa51a93f5cc6f026a Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 12 Jan 2026 13:56:26 +0200 Subject: [PATCH 2/4] Add failing case. --- test_files/config/bench/config.tsa | 127 +++++++++++++++++++++++ test_files/config/chainalignn/config.tsa | 2 +- test_files/twin_ari_email_2.fa | 4 + 3 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 test_files/config/bench/config.tsa create mode 100644 test_files/twin_ari_email_2.fa diff --git a/test_files/config/bench/config.tsa b/test_files/config/bench/config.tsa new file mode 100644 index 00000000..774b4368 --- /dev/null +++ b/test_files/config/bench/config.tsa @@ -0,0 +1,127 @@ +# Limits + +left_flank_length = 0 +right_flank_length = 0 + +# Base Cost + +rrf_cost = inf +rqf_cost = inf +qrf_cost = inf +qqf_cost = inf +rrr_cost = 2 +rqr_cost = 2 +qrr_cost = 2 +qqr_cost = 2 + +# Jump Costs + +Offset + -inf -100 101 + inf 0 inf + +Length + 0 10 200 + inf 0 inf + +LengthDifference + -inf -100 -20 -10 11 21 101 + inf 4 2 0 2 4 inf + +ForwardAntiPrimaryGap + -inf 1 + 0 inf + +ReverseAntiPrimaryGap + -inf + 0 + +# Primary Edit Costs + +SubstitutionCostTable + | A C G T +--+------------ +A | 0 2 2 2 +C | 2 0 2 2 +G | 2 2 0 2 +T | 2 2 2 0 + +GapOpenCostVector + A C G T + 3 3 3 3 + +GapExtendCostVector + A C G T + 1 1 1 1 + +# Secondary Forward Edit Costs + +SubstitutionCostTable + | A C G T +--+------------ +A | 0 4 4 4 +C | 4 0 4 4 +G | 4 4 0 4 +T | 4 4 4 0 + +GapOpenCostVector + A C G T + 3 3 3 3 + +GapExtendCostVector + A C G T + 2 2 2 2 + +# Secondary Reverse Edit Costs + +SubstitutionCostTable + | A C G T +--+------------ +A | 0 4 4 4 +C | 4 0 4 4 +G | 4 4 0 4 +T | 4 4 4 0 + +GapOpenCostVector + A C G T + 3 3 3 3 + +GapExtendCostVector + A C G T + 2 2 2 2 + +# Left Flank Edit Costs + +SubstitutionCostTable + | A C G T +--+------------ +A | 0 3 3 3 +C | 3 0 3 3 +G | 3 3 0 3 +T | 3 3 3 0 + +GapOpenCostVector + A C G T + 4 4 4 4 + +GapExtendCostVector + A C G T + 1 1 1 1 + +# Right Flank Edit Costs + +SubstitutionCostTable + | A C G T +--+------------ +A | 0 3 3 3 +C | 3 0 3 3 +G | 3 3 0 3 +T | 3 3 3 0 + +GapOpenCostVector + A C G T + 4 4 4 4 + +GapExtendCostVector + A C G T + 1 1 1 1 diff --git a/test_files/config/chainalignn/config.tsa b/test_files/config/chainalignn/config.tsa index 5d321d32..c9d2a9d4 100644 --- a/test_files/config/chainalignn/config.tsa +++ b/test_files/config/chainalignn/config.tsa @@ -21,7 +21,7 @@ Offset inf 0 inf Length - 0 4 200 + 0 10 200 inf 0 inf LengthDifference diff --git a/test_files/twin_ari_email_2.fa b/test_files/twin_ari_email_2.fa new file mode 100644 index 00000000..d3b61a56 --- /dev/null +++ b/test_files/twin_ari_email_2.fa @@ -0,0 +1,4 @@ +>REF +cagaggaagcttcctcttccctaggccttgacagttgtgcagagccaggccctcattggctccaatgaggttgaattcctatctgtgaaccaatccctacggctagggagatTAGACCTGTTTACAAGCTTATACTATAAGTGTAAGAGTTcacattagaatctcctgaggagctttgaaacatacttacactgtttcccaccctagagaaattaactcagaatatctagaggtgagagtcaggcattaatttttttaaagctccctttgtgattccaatgtgaatcctggtttgaaaaACCCCAGTTCAAATAATTTAGGGCCCAAACCTGAAGTTGTAACTAAGGCAAGCTCATTTCAAACACATGACCTGAGCCTTGGTAACGGGTGATACTTCAAATGAATTTTGGGCAACTCTTTAGCAGATGATGTTTGATGCATACTGGATTTTAAAAATCTTCGGCTCCACTATTTTGAACAAAACAAATGGGGACTCCTTTCATTTGTTCCCATTATGCATCCCTTTTCCCCTTCCTATCATAAAGTGAGAGGTAAGATTCCCCTGGGCTCCCACAGTCATGTGGCTTGGCATATCAAGTAGCCTGAGAGCTGGGAGTTAAAACCTGAGGACATTCATGTCTCTCTTTCTAGAAGTTATAGTCTGTACATTGGTCATCAATGGATGCATTAAGGCCTCCCCCAGAGATGGCTCCTTACAACATTACCCTGTTCTGCAAATTTACCTGTGACTTCCATCCACAGTATTCACATCATTCTTATGCTCTCTTGAGAGTTAATTCTTCTTATAAATAACTGACATGTTCTCCTGAAGAAAAATGTATTTCCAGATTCATGAAAATGTAAATTAGGATTTTTAACTACAAGTTCAACAGGCTGGACTCATTCTCTGGCAATAAAGCATGTCAGAGCAATATAAAACATCTAGAAAAAACAAGCTGTGCCTTAAAATAATACCCAGACACCTGACTGTATCTTCCAGAAAATAGA +>ALT +cagaggaagcttcctcttccctaggccttgacagttgtgcagagccaggccctcattggctccaatgaggttgaattcctatctgtgaaccaatccctacggctagggagatTAGACCTGTTTACAAGCTTATACTATAAGTGTAAGAGTTcacattagaatctcctgaggagctttgaaacatacttacactgtttcccaccctagagaaattaactcagaatatctagaggtgagagtcaggcattaatttttttaaagctccctttgtgattccaatgtgaatcctggtttgaaaaACCCCAGTTCAAATAATTTAGGGCCCAAACCTGAAGTTGTAACTAAGGCAAGCTCATTTCAAACACATGACCTGAGCCTTGGTAACGGGTGATACTTCAAATGAATTTTGGGCAACTCTTTAGCAGATGATGTTTGATGCATACTGGATTTTAAAAATCTTCGGCTCCACTATTTTGAACAAAACAAATGGGGACTCCTTTTATTTTTTCCCATTATGCATCCCTTTTCCCCTTCCTATCATAAAGTGAGAGGTAAGATTCCCCTGGGCTCCCACAGTCATGTGGCTTGGCATATCAAGTAGCCTGAGAGCTGGGAGTTAAAACCTGAGGACATTCATGTCTCTCTTTCTAGAAGTTATAGTCTGTACATTGGTCATCAATGGATGCATTAAGGCCTCCCCCAGAGATGGCTCCTTACAACATTACCCTGTTCTGCAAATTTACCTGTGACTTCCATCCACAGTATTCACATCATTCTTATGCTCTCTTGAGAGTTAATTCTTCTTATAAATAACTGACATGTTCTCCTGAAGAAAAATGTATTTCCAGATTCATGAAAATGTAAATTAGGATTTTTAACTACAAGTTCAACAGGCTGGACTCATTCTCTGGCAATAAAGCATGTCAGAGCAATATAAAACATCTAGAAAAAACAAGCTGTGCCTTAAAATAATACCCAGACACCTGACTGTATCTTCCAGAAAATAGA \ No newline at end of file From 17664f7dce13bc2e4af727af303fba630d70bee4 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 12 Jan 2026 14:44:24 +0200 Subject: [PATCH 3/4] Move RQ ranges into AlignmentSequences. --- lib_ts_chainalign/src/alignment/sequences.rs | 41 +++++++++++++++---- lib_ts_chainalign/src/anchors/tests.rs | 4 +- lib_ts_chainalign/src/chain_align.rs | 27 ++---------- .../src/chain_align/evaluation.rs | 21 ++++------ .../src/chaining_cost_function.rs | 13 +++--- .../src/exact_chaining/gap_affine/tests.rs | 22 +++++----- .../src/exact_chaining/ts_12_jump/tests.rs | 18 ++++---- .../src/exact_chaining/ts_34_jump/tests.rs | 18 ++++---- lib_ts_chainalign/src/lib.rs | 8 +--- test_files/twin_20_badends.fa | 8 ++++ 10 files changed, 93 insertions(+), 87 deletions(-) create mode 100644 test_files/twin_20_badends.fa diff --git a/lib_ts_chainalign/src/alignment/sequences.rs b/lib_ts_chainalign/src/alignment/sequences.rs index 8d687c54..0b6afbce 100644 --- a/lib_ts_chainalign/src/alignment/sequences.rs +++ b/lib_ts_chainalign/src/alignment/sequences.rs @@ -8,24 +8,47 @@ pub struct AlignmentSequences { seq2: Vec, seq1_name: String, seq2_name: String, + start: AlignmentCoordinates, + end: AlignmentCoordinates, } impl AlignmentSequences { - pub fn new(seq1: Vec, seq2: Vec) -> Self { - Self { + pub fn new( + seq1: Vec, + seq2: Vec, + start: AlignmentCoordinates, + end: AlignmentCoordinates, + ) -> Self { + Self::new_named( seq1, seq2, - seq1_name: "seq1".to_string(), - seq2_name: "seq2".to_string(), - } + "seq1".to_string(), + "seq2".to_string(), + start, + end, + ) + } + + pub fn new_complete(seq1: Vec, seq2: Vec) -> Self { + let end = AlignmentCoordinates::new_primary(seq1.len(), seq2.len()); + Self::new(seq1, seq2, AlignmentCoordinates::new_primary(0, 0), end) } - pub fn new_named(seq1: Vec, seq2: Vec, seq1_name: String, seq2_name: String) -> Self { + pub fn new_named( + seq1: Vec, + seq2: Vec, + seq1_name: String, + seq2_name: String, + start: AlignmentCoordinates, + end: AlignmentCoordinates, + ) -> Self { Self { seq1, seq2, seq1_name, seq2_name, + start, + end, } } @@ -54,11 +77,11 @@ impl AlignmentSequences { } pub fn primary_start(&self) -> AlignmentCoordinates { - AlignmentCoordinates::new_primary(0, 0) + self.start } pub fn primary_end(&self) -> AlignmentCoordinates { - self.end(None) + self.end } pub fn secondary_end(&self, ts_kind: TsKind) -> AlignmentCoordinates { @@ -67,7 +90,7 @@ impl AlignmentSequences { pub fn end(&self, ts_kind: Option) -> AlignmentCoordinates { match ts_kind { - None => AlignmentCoordinates::new_primary(self.seq1.len(), self.seq2.len()), + None => self.primary_end(), Some(ts_kind @ (TsKind::TS11 | TsKind::TS21)) => { AlignmentCoordinates::new_secondary(0, self.seq1.len(), ts_kind) } diff --git a/lib_ts_chainalign/src/anchors/tests.rs b/lib_ts_chainalign/src/anchors/tests.rs index 1fe7bee6..7cede306 100644 --- a/lib_ts_chainalign/src/anchors/tests.rs +++ b/lib_ts_chainalign/src/anchors/tests.rs @@ -17,7 +17,7 @@ fn rc_fn(c: u8) -> u8 { #[test] fn test_coordinates() { - let sequences = AlignmentSequences::new(b"ACAC".to_vec(), b"ACGT".to_vec()); + let sequences = AlignmentSequences::new_complete(b"ACAC".to_vec(), b"ACGT".to_vec()); let range = AlignmentRange::new_complete(sequences.seq1().len(), sequences.seq2().len()); let k = 2; @@ -40,7 +40,7 @@ fn test_coordinates() { #[test] fn test_coordinates_rev() { - let sequences = AlignmentSequences::new(b"ACGT".to_vec(), b"ACAC".to_vec()); + let sequences = AlignmentSequences::new_complete(b"ACGT".to_vec(), b"ACAC".to_vec()); let range = AlignmentRange::new_complete(sequences.seq1().len(), sequences.seq2().len()); let k = 2; diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index 1db27427..b62a94fd 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -28,7 +28,7 @@ use std::{ }; use crate::{ - alignment::{AlignmentType, coordinates::AlignmentCoordinates, sequences::AlignmentSequences}, + alignment::{AlignmentType, sequences::AlignmentSequences}, anchors::Anchors, chain_align::{ chainer::{Context, Identifier, Node, closed_list::ChainerClosedList}, @@ -45,11 +45,8 @@ mod chainer; mod evaluation; pub mod performance_parameters; -#[expect(clippy::too_many_arguments)] pub fn align( sequences: &AlignmentSequences, - start: AlignmentCoordinates, - end: AlignmentCoordinates, performance_parameters: &AlignmentPerformanceParameters, alignment_costs: &AlignmentCosts, rc_fn: &dyn Fn(u8) -> u8, @@ -61,8 +58,6 @@ pub fn align( ChainingOpenList::StdHeap => { choose_closed_list::, AStarNodeComparator>>( sequences, - start, - end, performance_parameters, alignment_costs, rc_fn, @@ -73,8 +68,6 @@ pub fn align( } ChainingOpenList::LinearHeap => choose_closed_list::>( sequences, - start, - end, performance_parameters, alignment_costs, rc_fn, @@ -85,15 +78,12 @@ pub fn align( } } -#[expect(clippy::too_many_arguments)] pub fn choose_closed_list< AlphabetType: Alphabet, Cost: AStarCost, OpenList: AStarOpenList>, >( sequences: &AlignmentSequences, - start: AlignmentCoordinates, - end: AlignmentCoordinates, performance_parameters: &AlignmentPerformanceParameters, alignment_costs: &AlignmentCosts, rc_fn: &dyn Fn(u8) -> u8, @@ -105,8 +95,6 @@ pub fn choose_closed_list< ChainingClosedList::FxHashMap => { actually_align::, OpenList>( sequences, - start, - end, performance_parameters, alignment_costs, rc_fn, @@ -118,8 +106,6 @@ pub fn choose_closed_list< ChainingClosedList::Special => { actually_align::, OpenList>( sequences, - start, - end, performance_parameters, alignment_costs, rc_fn, @@ -131,7 +117,6 @@ pub fn choose_closed_list< } } -#[expect(clippy::too_many_arguments)] fn actually_align< AlphabetType: Alphabet, Cost: AStarCost, @@ -139,8 +124,6 @@ fn actually_align< OpenList: AStarOpenList>, >( sequences: &AlignmentSequences, - start: AlignmentCoordinates, - end: AlignmentCoordinates, performance_parameters: &AlignmentPerformanceParameters, alignment_costs: &AlignmentCosts, rc_fn: &dyn Fn(u8) -> u8, @@ -281,8 +264,6 @@ fn actually_align< let (evaluated_cost, _) = chain_evaluator.evaluate_chain( anchors, &chain, - start, - end, max_match_run, astar.context_mut().chaining_cost_function, false, @@ -371,8 +352,6 @@ fn actually_align< let (evaluated_cost, alignments) = chain_evaluator.evaluate_chain( anchors, &chain, - start, - end, max_match_run, astar.context_mut().chaining_cost_function, true, @@ -465,8 +444,8 @@ fn actually_align< .as_genome_subsequence(), sequences.seq1_name(), sequences.seq2_name(), - start.primary_ordinate_a().unwrap(), - start.primary_ordinate_b().unwrap(), + sequences.primary_start().primary_ordinate_a().unwrap(), + sequences.primary_start().primary_ordinate_b().unwrap(), result.without_node_identifier(), duration_seconds, 0, diff --git a/lib_ts_chainalign/src/chain_align/evaluation.rs b/lib_ts_chainalign/src/chain_align/evaluation.rs index 1d8e5103..ea688d89 100644 --- a/lib_ts_chainalign/src/chain_align/evaluation.rs +++ b/lib_ts_chainalign/src/chain_align/evaluation.rs @@ -5,9 +5,7 @@ use log::trace; use num_traits::Zero; use crate::{ - alignment::{ - Alignment, AlignmentType, coordinates::AlignmentCoordinates, sequences::AlignmentSequences, - }, + alignment::{Alignment, AlignmentType, sequences::AlignmentSequences}, anchors::{Anchors, primary::PrimaryAnchor, secondary::SecondaryAnchor}, chain_align::chainer::Identifier, chaining_cost_function::ChainingCostFunction, @@ -19,6 +17,7 @@ use crate::{ }; pub struct ChainEvaluator<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> { + sequences: &'sequences AlignmentSequences, primary_aligner: GapAffineAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, secondary_aligner: GapAffineAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, ts_12_jump_aligner: Ts12JumpAligner<'sequences, 'alignment_costs, 'rc_fn, Cost>, @@ -43,6 +42,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> max_match_run: u32, ) -> Self { Self { + sequences, primary_aligner: GapAffineAligner::new( sequences, &alignment_costs.primary_costs, @@ -78,13 +78,10 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> } } - #[expect(clippy::too_many_arguments)] pub fn evaluate_chain( &mut self, anchors: &Anchors, chain: &[Identifier], - start: AlignmentCoordinates, - end: AlignmentCoordinates, max_match_run: u32, chaining_cost_function: &mut ChainingCostFunction, final_evaluation: bool, @@ -124,8 +121,8 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> if final_evaluation || !chaining_cost_function.is_start_to_end_exact() { self.additional_primary_targets_buffer.clear(); let (cost, alignment) = self.primary_aligner.align( - start, - end, + self.sequences.primary_start(), + self.sequences.primary_end(), &mut self.additional_primary_targets_buffer, &mut PanicOnExtend, ); @@ -158,7 +155,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> { self.additional_primary_targets_buffer.clear(); let (cost, alignment) = self.primary_aligner.align( - start, + self.sequences.primary_start(), end, &mut self.additional_primary_targets_buffer, &mut PanicOnExtend, @@ -196,7 +193,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> { self.additional_secondary_targets_buffer.clear(); let (cost, alignment) = self.ts_12_jump_aligner.align( - start, + self.sequences.primary_start(), end, &mut self.additional_secondary_targets_buffer, ); @@ -231,7 +228,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> self.additional_primary_targets_buffer.clear(); let (cost, alignment) = self.primary_aligner.align( start, - end, + self.sequences.primary_end(), &mut self.additional_primary_targets_buffer, &mut PanicOnExtend, ); @@ -267,7 +264,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> self.additional_primary_targets_buffer.clear(); let (cost, alignment) = self.ts_34_jump_aligner.align( start, - end, + self.sequences.primary_end(), &mut self.additional_primary_targets_buffer, ); self.total_gap_fillings += diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 70d73008..2eddd92c 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -32,14 +32,14 @@ impl ChainingCostFunction { chaining_lower_bounds: &ChainingLowerBounds, anchors: &Anchors, sequences: &AlignmentSequences, - start: AlignmentCoordinates, - end: AlignmentCoordinates, max_exact_cost_function_cost: Cost, rc_fn: &dyn Fn(u8) -> u8, ) -> Self { info!("Initialising chaining cost function..."); let start_time = Instant::now(); + let start = sequences.primary_start(); + let end = sequences.primary_end(); let k = usize::try_from(chaining_lower_bounds.max_match_run() + 1).unwrap(); let primary_anchor_amount = anchors.primary_len() + 2; let primary_start_anchor_index = AnchorIndex::zero(); @@ -84,7 +84,7 @@ impl ChainingCostFunction { trace!("Fill primary"); additional_primary_targets_output.clear(); primary_aligner.align_until_cost_limit( - sequences.primary_start(), + start, max_exact_cost_function_cost, &mut additional_primary_targets_output, &mut PanicOnExtend, @@ -663,7 +663,7 @@ impl ChainingCostFunction { let target = &mut self.primary[[from_primary_index + 1, to_primary_index + 1]]; assert!( *target <= cost, - "Target is larger than cost.\ntarget: {target}; cost: {cost}; from_primary_index: {from_primary_index}; to_primary_index: {to_primary_index}; is_exact: {is_exact}" + "Target is larger than cost.\ntarget: {target}; cost: {cost}; from_primary_index: {from_primary_index}; to_primary_index: {to_primary_index}; is_exact: {is_exact}", ); let result = *target < cost; *target = cost; @@ -698,7 +698,10 @@ impl ChainingCostFunction { self.primary.set_exact(primary_index + 1, end_index); } let target = &mut self.primary[[primary_index + 1, end_index]]; - assert!(*target <= cost); + assert!( + *target <= cost, + "Target is larger than cost.\ntarget: {target}; cost: {cost}; from_primary_index: {primary_index}; is_exact: {is_exact}", + ); let result = *target < cost; *target = cost; result diff --git a/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs b/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs index 13c72fe1..b951b1f4 100644 --- a/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/gap_affine/tests.rs @@ -19,7 +19,7 @@ fn rc_fn(c: u8) -> u8 { fn test_start_end() { let seq1 = b"ACGT".to_vec(); let seq2 = b"ACGTT".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -39,7 +39,7 @@ fn test_start_end() { fn test_partial_alignment() { let seq1 = b"ACCGT".to_vec(); let seq2 = b"ACGGTT".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -63,7 +63,7 @@ fn test_partial_alignment() { fn test_gap_directions() { let seq1 = b"ACGCCGTGTTCT".to_vec(); let seq2 = b"ACGGTGTTAACT".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -89,7 +89,7 @@ fn test_gap_directions() { fn test_extremity_gaps() { let seq1 = b"ACGCCGTGTTCT".to_vec(); let seq2 = b"ACGGTGTTAACT".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -113,7 +113,7 @@ fn test_extremity_gaps() { fn test_extremity_substitutions() { let seq1 = b"AGGGA".to_vec(); let seq2 = b"TGGGT".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -137,7 +137,7 @@ fn test_extremity_substitutions() { fn test_substitutions_as_gaps() { let seq1 = b"AAAAAAAAAAAAAAAAAAAA".to_vec(); let seq2 = b"TTTTTTTTTTTTTTTTTTTT".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(3u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -157,7 +157,7 @@ fn test_substitutions_as_gaps() { fn test_max_match_run_0() { let seq1 = b"AAAAAAAAAA".to_vec(); let seq2 = b"AACAACCAAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -177,7 +177,7 @@ fn test_max_match_run_0() { fn test_max_match_run_1() { let seq1 = b"AAAAAAAAAA".to_vec(); let seq2 = b"AACAACCAAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -206,7 +206,7 @@ fn test_max_match_run_1() { fn test_max_match_run_2() { let seq1 = b"AAAAAAAAAA".to_vec(); let seq2 = b"AACAACCAAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -232,7 +232,7 @@ fn test_max_match_run_2() { fn test_secondary_12() { let seq1 = b"GAAAAAAATG".to_vec(); let seq2 = b"GTTTTTTTTG".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); @@ -252,7 +252,7 @@ fn test_secondary_12() { fn test_secondary_21() { let seq1 = b"GAAAAAAATG".to_vec(); let seq2 = b"GTTTTTTTTG".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = GapAffineCosts::new(U32Cost::from(2u8), U32Cost::from(3u8), U32Cost::from(1u8)); diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs index 897f53e3..b1578a49 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump/tests.rs @@ -23,7 +23,7 @@ fn rc_fn(c: u8) -> u8 { fn test_start_end() { let seq1 = b"AAGG".to_vec(); let seq2 = b"ACGTT".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -72,7 +72,7 @@ fn test_start_end() { fn test_partial_alignment() { let seq1 = b"AAGG".to_vec(); let seq2 = b"ACGTT".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -120,7 +120,7 @@ fn test_partial_alignment() { fn test_gap_directions() { let seq1 = b"CCCCCCACCAACAAAAAA".to_vec(); let seq2 = b"AAAAAACAAGGGGGGAGG".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(10u8), @@ -175,7 +175,7 @@ fn test_gap_directions() { fn test_max_match_run_0() { let seq1 = b"GGAGGAGGAACAACAA".to_vec(); let seq2 = b"AAAAAAAACCTCCTCC".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -221,7 +221,7 @@ fn test_max_match_run_0() { fn test_max_match_run_1() { let seq1 = b"GGAGGAGGAACAACAA".to_vec(); let seq2 = b"AAAAAAAACCTCCTCC".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -269,7 +269,7 @@ fn test_max_match_run_1() { fn test_max_match_run_2() { let seq1 = b"GGAGGAGGAACAACAA".to_vec(); let seq2 = b"AAAAAAAACCCCCCCC".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -324,7 +324,7 @@ fn test_max_match_run_2() { fn test_only_jump() { let seq1 = b"ACGTACGTAC".to_vec(); let seq2 = b"ACGTACGTAC".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -367,7 +367,7 @@ fn test_only_jump() { fn test_only_jump_start() { let seq1 = b"ACGTACGTAC".to_vec(); let seq2 = b"ACGTACGTAC".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -410,7 +410,7 @@ fn test_only_jump_start() { fn test_only_jump_end() { let seq1 = b"ACGTACGTAC".to_vec(); let seq2 = b"ACGTACGTAC".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), diff --git a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs index 1efcf4c7..4bb56ca1 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_34_jump/tests.rs @@ -23,7 +23,7 @@ fn rc_fn(c: u8) -> u8 { fn test_start_end() { let seq1 = b"AAGG".to_vec(); let seq2 = b"TTACG".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -66,7 +66,7 @@ fn test_start_end() { fn test_partial_alignment() { let seq1 = b"AAGG".to_vec(); let seq2 = b"TTACG".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(2u8), @@ -108,7 +108,7 @@ fn test_partial_alignment() { fn test_gap_directions() { let seq1 = b"CCCCCCACCAACAAAAAA".to_vec(); let seq2 = b"AAAAAACAAGGGGGGAGG".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(10u8), @@ -157,7 +157,7 @@ fn test_gap_directions() { fn test_max_match_run_0() { let seq1 = b"GGAGGAGGAACAACAA".to_vec(); let seq2 = b"CCCCCCCCAAAAAAAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(1u8), @@ -197,7 +197,7 @@ fn test_max_match_run_0() { fn test_max_match_run_1() { let seq1 = b"GGAGGAGGAACAACAA".to_vec(); let seq2 = b"CCCCCCCCAAAAAAAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(1u8), @@ -245,7 +245,7 @@ fn test_max_match_run_1() { fn test_max_match_run_2() { let seq1 = b"GGAGGAGGAACAACAA".to_vec(); let seq2 = b"CCCCCCCCAAAAAAAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(1u8), @@ -294,7 +294,7 @@ fn test_max_match_run_2() { fn test_only_jump() { let seq1 = b"ACATCTGCAA".to_vec(); let seq2 = b"ACGCAGATAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(1u8), @@ -331,7 +331,7 @@ fn test_only_jump() { fn test_only_jump_start() { let seq1 = b"ACATCTGCAA".to_vec(); let seq2 = b"ACGCAGATAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(1u8), @@ -368,7 +368,7 @@ fn test_only_jump_start() { fn test_only_jump_end() { let seq1 = b"ACATCTGCAA".to_vec(); let seq2 = b"ACGCAGATAA".to_vec(); - let sequences = AlignmentSequences::new(seq1, seq2); + let sequences = AlignmentSequences::new_complete(seq1, seq2); let cost_table = AlignmentCosts { primary_costs: GapAffineCosts::new( U32Cost::from(1u8), diff --git a/lib_ts_chainalign/src/lib.rs b/lib_ts_chainalign/src/lib.rs index 3bca35b6..f090c86d 100644 --- a/lib_ts_chainalign/src/lib.rs +++ b/lib_ts_chainalign/src/lib.rs @@ -88,27 +88,23 @@ pub fn align( query, reference_name.to_string(), query_name.to_string(), + AlignmentCoordinates::new_primary(range.reference_offset(), range.query_offset()), + AlignmentCoordinates::new_primary(range.reference_limit(), range.query_limit()), ); let k = chaining_lower_bounds.max_match_run() + 1; let anchors = Anchors::new(&sequences, range.clone(), k, rc_fn); trace!("Anchors:\n{anchors}"); - let start = AlignmentCoordinates::new_primary(range.reference_offset(), range.query_offset()); - let end = AlignmentCoordinates::new_primary(range.reference_limit(), range.query_limit()); let mut chaining_cost_function = ChainingCostFunction::new_from_lower_bounds( chaining_lower_bounds, &anchors, &sequences, - start, - end, performance_parameters.max_exact_cost_function_cost, rc_fn, ); chain_align::align::( &sequences, - start, - end, performance_parameters, chaining_lower_bounds.alignment_costs(), rc_fn, diff --git a/test_files/twin_20_badends.fa b/test_files/twin_20_badends.fa new file mode 100644 index 00000000..87dfa245 --- /dev/null +++ b/test_files/twin_20_badends.fa @@ -0,0 +1,8 @@ +>reference +GGGGG +ACGCAGATGA +GGGGG +>query +AAAAA +ACGCAGATGA +AAAAA \ No newline at end of file From 3333a8f9fee1a3cbc77d2eff03f2b607b048f2bd Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 13 Jan 2026 09:08:42 +0200 Subject: [PATCH 4/4] Add debug files and message. --- lib_ts_chainalign/src/chaining_cost_function.rs | 5 ++++- test_files/twin_10_no_anchors.fa | 4 ++++ test_files/twin_ari_email_469.fa | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 test_files/twin_10_no_anchors.fa create mode 100644 test_files/twin_ari_email_469.fa diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 2eddd92c..b4756ff4 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -718,7 +718,10 @@ impl ChainingCostFunction { .set_exact(Self::primary_start_anchor_index(), end_index); } let target = &mut self.primary[[Self::primary_start_anchor_index(), end_index]]; - assert!(*target <= cost); + assert!( + *target <= cost, + "Target is larger than cost.\ntarget: {target}; cost: {cost}; is_exact: {is_exact}", + ); let result = *target < cost; *target = cost; result diff --git a/test_files/twin_10_no_anchors.fa b/test_files/twin_10_no_anchors.fa new file mode 100644 index 00000000..1c68051b --- /dev/null +++ b/test_files/twin_10_no_anchors.fa @@ -0,0 +1,4 @@ +>reference +AAAAAAAAAA +>query +GGGGGGGGGG \ No newline at end of file diff --git a/test_files/twin_ari_email_469.fa b/test_files/twin_ari_email_469.fa new file mode 100644 index 00000000..ea175c1e --- /dev/null +++ b/test_files/twin_ari_email_469.fa @@ -0,0 +1,4 @@ +>REF +TAGATATGTAAATGATCATTTTATTTAGTGCCAAAGTTCACACATCTATTTGGTCAAGTTCCCTTTGTTTCTCTACTGAGTATGTCTCCAGACACTAAAATACTAGGTTTATTCAATTCTTGCCCAGTAATTAATTTTAAAAATGTAACCCAAATTAATTGTGTCTGAAATAATTTTTCTCTTCACTAAACTTACTGACCATCTGACTGAATTGACAAATTAGATAAAAGATGATTGTGCAGGACTTTGGTGGAAAGCTCTGAACATATTCTCTGAGAAAACCCTAACAGTCAGATAACGAGTCTCATTTTACGGCCTCTTAGAATCTCAGGATTCTCACTATTCATGTGGCTTAACCTTTCACCGCTTTCATGTCCATAACAAAACATTCTGAGCATTTCTTAATTCATCAAAAGATTTTGTTTTTTCTAAATATTTCACATCCTGACAGTTAATTATTAATATTTTTCCAATTTCCCTATTTGTTTCCTCAGTAGAATGCAAGTTAACTTGCATCCATCAGTAGAATGAGATTAACATGCAATAGAAACGCAATACGGCTGTGAACCAATATGTTCTGAAATATTCAAGAATAATATACATTTATCTTTTAACAACATGGAAGTTATAGTCATGAATAGTAAAAATAACGGAAAAATCATTATCATTTTTACTAACTTTCAGGTTCTCCTTCCCAAGTATAGCCGTTGACCATGAAACGAAACTGATTTAGGTTTGGCTAGAATTATTTTTTCCTAATCAAGAGATTGCTAAGTGAAAACAAAATATTTAGAATGAATATTGGGGGAAGATAGAGATTTACTTCCTGTTACAGTAAGAAAATAACAAGGTAAGCTAAAAATCATATTTTTCCATAAAGAGAAAATAGGCATAAACTGCTGAAAATCAAATAAAAACAGAAAATCTAAAAGCAATTGCAggctgggtgcggtggctcacgcccataatcccaacactttggggggtgggatcacctgacgggggtg +>ALT +TAGATATGTAAATGATCATTTTATTTAGTGCCAAAGTTCACACATCTATTTGGTCAAGTTCCCTTTGTTTCTCTACTGAGTATGTCTCCAGACACTAAAATACTAGGTTTATTCAATTCTTGCCCAGTAATTAATTTTAAAAATGTAACCCAAATTAATTGTGTCTGAAATAATTTTTCTCTTCACTAAACTTACTGACCATCTGACTGAATTGACAAATTAGATAAAAGATGATTGTGCAGGACTTTGGTGGAAAGCTCTGAACATATTCTCTGAGAAAACCCTAACAGTCAGATAACGAGTCTCATTTTACGGCCTCTTAGAATCTCAGGATTCTCACTATTCATGTGGCTTAACCTTTCACCGCTTTCATGTCCATAACAAAACATTCTGAGCATTTCTTAATTCATCAAAAGATTTTGTTTTTTCTAAATATTTCACATCCTGACAGTTAATTATTAATATTTTTCCAATTTCCCTATTTGTTTCCTCAGTAGAATG-ATGTTAACTTGCATCCATCAGTAGAATGAGATTAACATGCAATAGAAACGCAATACGGCTGTGAACCAATATGTTCTGAAATATTCAAGAATAATATACATTTATCTTTTAACAACATGGAAGTTATAGTCATGAATAGTAAAAATAACGGAAAAATCATTATCATTTTTACTAACTTTCAGGTTCTCCTTCCCAAGTATAGCCGTTGACCATGAAACGAAACTGATTTAGGTTTGGCTAGAATTATTTTTTCCTAATCAAGAGATTGCTAAGTGAAAACAAAATATTTAGAATGAATATTGGGGGAAGATAGAGATTTACTTCCTGTTACAGTAAGAAAATAACAAGGTAAGCTAAAAATCATATTTTTCCATAAAGAGAAAATAGGCATAAACTGCTGAAAATCAAATAAAAACAGAAAATCTAAAAGCAATTGCAggctgggtgcggtggctcacgcccataatcccaacactttggggggtgggatcacctgacgggggtg \ No newline at end of file