From f2fb051d05ee6f8556d2d5a9b6ce556172fb35c9 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 6 May 2025 12:17:15 +0300 Subject: [PATCH 01/10] In progress: extend TS start backwards. --- lib_tsalign/src/a_star_aligner.rs | 6 +- .../src/a_star_aligner/alignment_result.rs | 116 ++++++++- .../alignment_result/alignment.rs | 5 + .../alignment_result/alignment/stream.rs | 229 ++++++++++++++++++ .../template_switch_distance.rs | 3 + .../alignment_type.rs | 9 +- .../alignment_type/equal_cost_range.rs | 41 ++++ .../template_switch_distance/display.rs | 21 +- .../template_switch_distance/identifier.rs | 36 ++- lib_tsshow/src/plain_text.rs | 26 +- .../src/plain_text/parse_template_switches.rs | 16 +- 11 files changed, 483 insertions(+), 25 deletions(-) create mode 100644 lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs create mode 100644 lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type/equal_cost_range.rs diff --git a/lib_tsalign/src/a_star_aligner.rs b/lib_tsalign/src/a_star_aligner.rs index 203a44dd..627feba6 100644 --- a/lib_tsalign/src/a_star_aligner.rs +++ b/lib_tsalign/src/a_star_aligner.rs @@ -170,7 +170,7 @@ pub fn template_switch_distance_a_star_align< primary_match: (), }; - a_star_align(template_switch_distance::Context::< + let mut result = a_star_align(template_switch_distance::Context::< SubsequenceType, Strategies, >::new( @@ -183,5 +183,7 @@ pub fn template_switch_distance_a_star_align< memory, cost_limit, memory_limit, - )) + )); + result.compute_ts_equal_cost_ranges(); + result } diff --git a/lib_tsalign/src/a_star_aligner/alignment_result.rs b/lib_tsalign/src/a_star_aligner/alignment_result.rs index 020e8085..693c43a6 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result.rs @@ -1,12 +1,18 @@ use std::fmt::{Display, Formatter, Result, Write}; use a_star_sequences::SequencePair; -use alignment::Alignment; +use alignment::{Alignment, stream::AlignmentStream}; use compact_genome::interface::{alphabet::Alphabet, sequence::GenomeSequence}; use generic_a_star::{AStarResult, cost::AStarCost}; use noisy_float::types::{R64, r64}; use num_traits::{Float, Zero}; +use crate::config::TemplateSwitchConfig; + +use super::template_switch_distance::{ + TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, +}; + pub mod a_star_sequences; pub mod alignment; @@ -225,6 +231,114 @@ impl AlignmentResult AlignmentResult { + pub fn compute_ts_equal_cost_ranges< + AlphabetType: Alphabet, + SubsequenceType: GenomeSequence + ?Sized, + >( + &mut self, + reference: &SubsequenceType, + query: &SubsequenceType, + config: &TemplateSwitchConfig, + ) { + let Self::WithTarget { alignment, .. } = self else { + return; + }; + let mut stream = AlignmentStream::new(); + + for i in 0..alignment.inner_mut().len() { + let multiplicity = alignment.inner_mut()[i].0; + let alignment_type = alignment.inner_mut()[i].1; + + match alignment_type { + super::template_switch_distance::AlignmentType::TemplateSwitchEntrance { + first_offset, + primary, + secondary, + direction, + mut equal_cost_range, + .. + } => { + equal_cost_range.min_start = 0; + equal_cost_range.max_start = 0; + equal_cost_range.min_end = 0; + equal_cost_range.max_end = 0; + + // Extend start backwards. + let mut remaining_left_flank = config.left_flank_length; + let mut preceding_reference_index = stream.head_coordinates().reference(); + let mut preceding_query_index = stream.head_coordinates().query(); + let mut preceding_alignment = alignment.iter_flat_cloned().rev(); + let mut inner_primary_index = match primary { + TemplateSwitchPrimary::Reference => preceding_reference_index, + TemplateSwitchPrimary::Query => preceding_query_index, + }; + let mut inner_secondary_index = (match secondary { + TemplateSwitchSecondary::Reference => preceding_reference_index, + TemplateSwitchSecondary::Query => preceding_query_index, + } as isize + + first_offset) + as usize; + + loop { + match preceding_alignment.next() { + Some( + super::template_switch_distance::AlignmentType::PrimaryFlankMatch + | super::template_switch_distance::AlignmentType::PrimaryMatch, + ) => { /* Do not exit loop. */ } + _ => break, + } + + inner_primary_index -= 1; + match direction { + TemplateSwitchDirection::Forward => inner_secondary_index -= 1, + TemplateSwitchDirection::Reverse => inner_secondary_index += 1, + } + preceding_reference_index -= 1; + preceding_query_index -= 1; + + let primary_character = + primary.get(reference, query)[inner_primary_index].clone(); + let secondary_character = + secondary.get(reference, query)[inner_secondary_index].clone(); + let reference_character = reference[preceding_reference_index].clone(); + let query_character = query[preceding_query_index].clone(); + + let preceding_cost = if remaining_left_flank > 0 { + remaining_left_flank -= 1; + &config.left_flank_edit_costs + } else { + &config.primary_edit_costs + } + .match_or_substitution_cost(reference_character, query_character); + let inner_cost = config + .secondary_edit_costs(direction) + .match_or_substitution_cost(primary_character, secondary_character); + + if preceding_cost.is_zero() && inner_cost.is_zero() { + equal_cost_range.min_start -= 1; + } else { + break; + } + } + + let super::template_switch_distance::AlignmentType::TemplateSwitchEntrance { + equal_cost_range: alignment_equal_cost_range, + .. + } = &mut alignment.inner_mut()[i].1 + else { + unreachable!() + }; + *alignment_equal_cost_range = equal_cost_range; + } + _ => { /* Do nothing. */ } + } + + stream.push(multiplicity, alignment_type); + } + } +} + impl AlignmentResult { pub fn statistics(&self) -> &AlignmentStatistics { match self { diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment.rs index 78ed88c1..fc3df58d 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result/alignment.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment.rs @@ -7,6 +7,7 @@ use iter::{ use super::IAlignmentType; pub mod iter; +pub mod stream; #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -33,6 +34,10 @@ impl Alignment { self.alignment.push((1, alignment_type)); } } + + pub fn inner_mut(&mut self) -> &mut Vec<(usize, AlignmentType)> { + &mut self.alignment + } } impl Alignment { diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs new file mode 100644 index 00000000..176f31c4 --- /dev/null +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs @@ -0,0 +1,229 @@ +use std::{collections::VecDeque, iter}; + +use crate::a_star_aligner::{ + alignment_result::alignment::Alignment, + template_switch_distance::{AlignmentType, TemplateSwitchPrimary}, +}; + +#[derive(Debug, Clone, Default)] +pub struct AlignmentStream { + stream: VecDeque<(usize, AlignmentType)>, + length: usize, + + /// The alignment coordinates of the first alignment after the head of the queue. + head_coordinates: AlignmentStreamCoordinates, + /// The alignment coordinates of the tailmost element in the queue. + tail_coordinates: AlignmentStreamCoordinates, +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct AlignmentStreamCoordinates { + reference: usize, + query: usize, + template_switch_primary: Option, +} + +impl AlignmentStream { + pub fn new() -> Self { + Default::default() + } + + pub fn new_with_offset(reference_offset: usize, query_offset: usize) -> Self { + let mut result = Self::new(); + result.head_coordinates.reference = reference_offset; + result.head_coordinates.query = query_offset; + result.tail_coordinates.reference = reference_offset; + result.tail_coordinates.query = query_offset; + result + } + + pub fn len(&self) -> usize { + self.length + } + + pub fn stream_iter(&self) -> impl use<'_> + Iterator { + self.stream.iter().copied() + } + + pub fn stream_iter_flat(&self) -> impl use<'_> + Iterator { + self.stream_iter() + .flat_map(|(multiplicity, alignment_type)| iter::repeat_n(alignment_type, multiplicity)) + } + + fn stream_vec(&self) -> Vec<(usize, AlignmentType)> { + self.stream_iter().collect() + } + + pub fn stream_alignment(&self) -> Alignment { + self.stream_vec().into() + } + + pub fn head_coordinates(&self) -> AlignmentStreamCoordinates { + self.head_coordinates + } + + pub fn tail_coordinates(&self) -> AlignmentStreamCoordinates { + self.tail_coordinates + } + + pub fn push_until_full( + &mut self, + multiplicity: &mut usize, + alignment_type: AlignmentType, + requested_length: usize, + ) { + let available_length = requested_length - self.length; + let push_length = *multiplicity * Self::stream_length(alignment_type); + + if available_length >= push_length { + self.push(*multiplicity, alignment_type); + *multiplicity = 0; + } else { + let push_multiplicity = available_length.div_ceil(Self::stream_length(alignment_type)); + self.push(push_multiplicity, alignment_type); + *multiplicity -= push_multiplicity; + } + } + + pub fn clear(&mut self) { + self.pop(0); + } + + pub fn is_full(&self, requested_length: usize) -> bool { + self.length >= requested_length + } + + pub fn is_empty(&self) -> bool { + self.stream_iter().next().is_none() + } + + pub fn push(&mut self, multiplicity: usize, alignment_type: AlignmentType) { + self.stream.push_back((multiplicity, alignment_type)); + self.head_coordinates.advance(multiplicity, alignment_type); + self.length += multiplicity * Self::stream_length(alignment_type); + } + + /// Pops one unit of length from the tail of the stream. + pub fn pop_one(&mut self) { + self.pop(self.length.saturating_sub(1)); + } + + pub fn pop(&mut self, requested_length: usize) { + while self.length > requested_length { + let requested_pop_length = self.length - requested_length; + let (multiplicity, alignment_type) = self.stream.front_mut().unwrap(); + let front_length = *multiplicity * Self::stream_length(*alignment_type); + + if front_length <= requested_pop_length { + self.tail_coordinates + .advance(*multiplicity, *alignment_type); + self.stream.pop_front(); + self.length -= front_length; + } else { + let pop_multiplicity = requested_pop_length / Self::stream_length(*alignment_type); + self.tail_coordinates + .advance(pop_multiplicity, *alignment_type); + *multiplicity -= pop_multiplicity; + self.length -= pop_multiplicity * Self::stream_length(*alignment_type); + break; + } + } + + while let Some((multiplicity, alignment_type)) = self.stream.front().copied() { + if Self::stream_length(alignment_type) == 0 { + self.tail_coordinates.advance(multiplicity, alignment_type); + self.stream.pop_front(); + } else { + break; + } + } + } + + fn stream_length(alignment_type: AlignmentType) -> usize { + match alignment_type { + AlignmentType::PrimaryInsertion + | AlignmentType::PrimaryDeletion + | AlignmentType::PrimarySubstitution + | AlignmentType::PrimaryMatch + | AlignmentType::PrimaryFlankInsertion + | AlignmentType::PrimaryFlankDeletion + | AlignmentType::PrimaryFlankSubstitution + | AlignmentType::PrimaryFlankMatch + | AlignmentType::SecondaryInsertion + | AlignmentType::SecondaryDeletion + | AlignmentType::SecondarySubstitution + | AlignmentType::SecondaryMatch => 1, + AlignmentType::TemplateSwitchEntrance { .. } + | AlignmentType::TemplateSwitchExit { .. } + | AlignmentType::Root + | AlignmentType::SecondaryRoot + | AlignmentType::PrimaryReentry => 0, + AlignmentType::PrimaryShortcut { .. } => { + unreachable!("Shortcut alignments are not supported for show") + } + } + } +} + +impl AlignmentStreamCoordinates { + pub fn reference(&self) -> usize { + self.reference + } + + pub fn query(&self) -> usize { + self.query + } + + fn advance(&mut self, multiplicity: usize, alignment_type: AlignmentType) { + let (reference_length, query_length) = match alignment_type { + AlignmentType::PrimaryInsertion | AlignmentType::PrimaryFlankInsertion => (0, 1), + AlignmentType::PrimaryDeletion | AlignmentType::PrimaryFlankDeletion => (1, 0), + AlignmentType::PrimarySubstitution + | AlignmentType::PrimaryMatch + | AlignmentType::PrimaryFlankSubstitution + | AlignmentType::PrimaryFlankMatch => (1, 1), + AlignmentType::TemplateSwitchEntrance { primary, .. } => { + assert!( + self.template_switch_primary.is_none(), + "Encountered template switch entrance within template switch" + ); + self.template_switch_primary = Some(primary); + (0, 0) + } + AlignmentType::SecondaryInsertion + | AlignmentType::SecondarySubstitution + | AlignmentType::SecondaryMatch => match self.template_switch_primary.unwrap() { + TemplateSwitchPrimary::Reference => (1, 0), + TemplateSwitchPrimary::Query => (0, 1), + }, + AlignmentType::TemplateSwitchExit { anti_primary_gap } => { + let Some(template_switch_primary) = self.template_switch_primary.take() else { + panic!( + "Encountered template switch exit without first encountering a template switch entrance" + ) + }; + match template_switch_primary { + TemplateSwitchPrimary::Reference => { + self.query = + usize::try_from(self.query as isize + anti_primary_gap).unwrap() + } + TemplateSwitchPrimary::Query => { + self.reference = + usize::try_from(self.reference as isize + anti_primary_gap).unwrap() + } + } + (0, 0) + } + AlignmentType::SecondaryDeletion + | AlignmentType::Root + | AlignmentType::SecondaryRoot + | AlignmentType::PrimaryReentry => (0, 0), + AlignmentType::PrimaryShortcut { .. } => { + unreachable!("Shortcut alignments are not supported for show") + } + }; + + self.reference += multiplicity * reference_length; + self.query += multiplicity * query_length; + } +} diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance.rs index 5d767ca1..4446fd60 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance.rs @@ -1,5 +1,6 @@ use std::fmt::Display; +use alignment_type::equal_cost_range::EqualCostRange; use compact_genome::interface::sequence::GenomeSequence; use generic_a_star::{AStarNode, cost::AStarCost}; use identifier::GapType; @@ -296,6 +297,7 @@ impl Node { primary: *template_switch_primary, secondary: *template_switch_secondary, direction: *template_switch_direction, + equal_cost_range: EqualCostRange::new_invalid(), first_offset: *template_switch_first_offset, }, context, @@ -342,6 +344,7 @@ impl Node { primary: template_switch_primary, secondary: template_switch_secondary, direction: template_switch_direction, + equal_cost_range: EqualCostRange::new_invalid(), first_offset: successor_template_switch_first_offset, }, context, diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type.rs index 7dffa41c..e6b3dce9 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type.rs @@ -1,7 +1,11 @@ +use equal_cost_range::EqualCostRange; + use crate::a_star_aligner::alignment_result::IAlignmentType; use super::identifier::{TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary}; +pub mod equal_cost_range; + #[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum AlignmentType { @@ -39,10 +43,11 @@ pub enum AlignmentType { SecondaryMatch, /// A template switch entrance. TemplateSwitchEntrance { + first_offset: isize, + equal_cost_range: EqualCostRange, primary: TemplateSwitchPrimary, secondary: TemplateSwitchSecondary, direction: TemplateSwitchDirection, - first_offset: isize, }, /// A template switch exit. TemplateSwitchExit { @@ -158,11 +163,13 @@ impl AlignmentType { primary, secondary, direction, + equal_cost_range, first_offset, } => Self::TemplateSwitchEntrance { primary: primary.inverted(), secondary: secondary.inverted(), direction: direction.inverted(), + equal_cost_range: *equal_cost_range, first_offset: *first_offset, }, Self::PrimaryShortcut { diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type/equal_cost_range.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type/equal_cost_range.rs new file mode 100644 index 00000000..00942cf7 --- /dev/null +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type/equal_cost_range.rs @@ -0,0 +1,41 @@ +/// A heuristic range within which the start and end of the TS can be shifted without increasing cost. +/// +/// The range may not be maximal, but is required to be correct. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct EqualCostRange { + /// How much the start of the TS can be shifted to the left without increasing cost. + /// + /// This number must not be positive. + pub min_start: i8, + + /// How much the start of the TS can be shifted to the right without increasing cost. + /// + /// This number must not be negative. + pub max_start: i8, + + /// How much the end of the TS can be shifted to the left without increasing cost. + /// + /// This number must not be positive. + pub min_end: i8, + + /// How much the end of the TS can be shifted to the right without increasing cost. + /// + /// This number must not be negative. + pub max_end: i8, +} + +impl EqualCostRange { + pub fn new_invalid() -> Self { + Self { + min_start: 1, + max_start: -1, + min_end: 1, + max_end: -1, + } + } + + pub fn is_valid(&self) -> bool { + self.min_start <= 0 && self.max_start >= 0 && self.min_end <= 0 && self.max_end >= 0 + } +} diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance/display.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance/display.rs index a3c7eaef..cfbf05b7 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance/display.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance/display.rs @@ -2,7 +2,7 @@ use std::fmt::{Display, Formatter, Result}; use super::{ AlignmentType, GapType, Identifier, TemplateSwitchPrimary, TemplateSwitchSecondary, - identifier::TemplateSwitchDirection, + alignment_type::equal_cost_range::EqualCostRange, identifier::TemplateSwitchDirection, }; impl Display for AlignmentType { @@ -22,8 +22,12 @@ impl Display for AlignmentType { primary, secondary, direction, + equal_cost_range, first_offset, - } => write!(f, "[TS{primary}{secondary}{direction}{first_offset}:"), + } => write!( + f, + "[TS{primary}{secondary}{direction}:{equal_cost_range}:{first_offset}:" + ), Self::TemplateSwitchExit { anti_primary_gap } => write!(f, ":{anti_primary_gap}]"), Self::Root => Ok(()), Self::SecondaryRoot => Ok(()), @@ -73,6 +77,19 @@ impl Display for TemplateSwitchDirection { } } +impl Display for EqualCostRange { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + assert!(self.is_valid()); + let Self { + min_start, + max_start, + min_end, + max_end, + } = self; + write!(f, "[{min_start},{max_start}]:[{min_end},{max_end}]") + } +} + impl Display for Identifier { fn fmt(&self, f: &mut Formatter<'_>) -> Result { match self { diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance/identifier.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance/identifier.rs index a7bc85cc..98a62fd7 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance/identifier.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance/identifier.rs @@ -1,4 +1,4 @@ -use compact_genome::interface::sequence::GenomeSequence; +use compact_genome::interface::{alphabet::Alphabet, sequence::GenomeSequence}; use super::{ AlignmentType, Context, @@ -445,6 +445,23 @@ impl TemplateSwitchPrimary { Self::Query => Self::Reference, } } + + pub fn get< + 'reference: 'result, + 'query: 'result, + 'result, + AlphabetType: Alphabet, + SubsequenceType: GenomeSequence + ?Sized, + >( + &self, + reference: &'reference SubsequenceType, + query: &'query SubsequenceType, + ) -> &'result SubsequenceType { + match self { + TemplateSwitchPrimary::Reference => reference, + TemplateSwitchPrimary::Query => query, + } + } } impl TemplateSwitchSecondary { @@ -454,6 +471,23 @@ impl TemplateSwitchSecondary { Self::Query => Self::Reference, } } + + pub fn get< + 'reference: 'result, + 'query: 'result, + 'result, + AlphabetType: Alphabet, + SubsequenceType: GenomeSequence + ?Sized, + >( + &self, + reference: &'reference SubsequenceType, + query: &'query SubsequenceType, + ) -> &'result SubsequenceType { + match self { + TemplateSwitchSecondary::Reference => reference, + TemplateSwitchSecondary::Query => query, + } + } } impl TemplateSwitchDirection { diff --git a/lib_tsshow/src/plain_text.rs b/lib_tsshow/src/plain_text.rs index 8c18d00e..232da49a 100644 --- a/lib_tsshow/src/plain_text.rs +++ b/lib_tsshow/src/plain_text.rs @@ -1,9 +1,12 @@ use std::{io::Write, iter}; -use alignment_stream::{AlignmentCoordinates, AlignmentStream}; use lib_tsalign::{ a_star_aligner::{ - alignment_result::{AlignmentResult, a_star_sequences::SequencePair}, + alignment_result::{ + AlignmentResult, + a_star_sequences::SequencePair, + alignment::stream::{AlignmentStream, AlignmentStreamCoordinates}, + }, template_switch_distance::{AlignmentType, TemplateSwitchPrimary, TemplateSwitchSecondary}, }, costs::U64Cost, @@ -14,7 +17,6 @@ use parse_template_switches::{STREAM_PADDING, TSShow}; use crate::error::Result; -pub mod alignment_stream; pub mod mutlipair_alignment_renderer; mod parse_template_switches; @@ -98,15 +100,16 @@ fn show_template_switch( &sequences.reference_name, reference, &reference_c, - Box::new(|alignment_coordinates: &AlignmentCoordinates| { + Box::new(|alignment_coordinates: &AlignmentStreamCoordinates| { alignment_coordinates.reference() - }) as Box usize>, + }) as Box usize>, "Child".to_string(), &sequences.query_name, query, &query_c, - Box::new(|alignment_coordinates: &AlignmentCoordinates| alignment_coordinates.query()) - as Box usize>, + Box::new(|alignment_coordinates: &AlignmentStreamCoordinates| { + alignment_coordinates.query() + }) as Box usize>, true, ), TemplateSwitchPrimary::Query => ( @@ -114,15 +117,16 @@ fn show_template_switch( &sequences.query_name, query, &query_c, - Box::new(|alignment_coordinates: &AlignmentCoordinates| alignment_coordinates.query()) - as Box usize>, + Box::new(|alignment_coordinates: &AlignmentStreamCoordinates| { + alignment_coordinates.query() + }) as Box usize>, "Parent".to_string(), &sequences.reference_name, reference, &reference_c, - Box::new(|alignment_coordinates: &AlignmentCoordinates| { + Box::new(|alignment_coordinates: &AlignmentStreamCoordinates| { alignment_coordinates.reference() - }) as Box usize>, + }) as Box usize>, false, ), }; diff --git a/lib_tsshow/src/plain_text/parse_template_switches.rs b/lib_tsshow/src/plain_text/parse_template_switches.rs index 16260ffd..72177909 100644 --- a/lib_tsshow/src/plain_text/parse_template_switches.rs +++ b/lib_tsshow/src/plain_text/parse_template_switches.rs @@ -1,5 +1,9 @@ use lib_tsalign::a_star_aligner::{ - alignment_result::alignment::{Alignment, iter::CompactAlignmentIterCloned}, + alignment_result::alignment::{ + Alignment, + iter::CompactAlignmentIterCloned, + stream::{AlignmentStream, AlignmentStreamCoordinates}, + }, template_switch_distance::{ AlignmentType, TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, }, @@ -8,19 +12,17 @@ use log::{debug, trace}; use crate::error::Result; -use super::alignment_stream::{AlignmentCoordinates, AlignmentStream}; - pub const STREAM_DEFAULT_LENGTH: usize = 20; pub const STREAM_PADDING: usize = 10; #[derive(Debug)] pub struct TSShow { - pub upstream_offset: AlignmentCoordinates, - pub downstream_limit: AlignmentCoordinates, - pub sp1_offset: AlignmentCoordinates, + pub upstream_offset: AlignmentStreamCoordinates, + pub downstream_limit: AlignmentStreamCoordinates, + pub sp1_offset: AlignmentStreamCoordinates, pub sp2_secondary_offset: usize, pub sp3_secondary_offset: usize, - pub sp4_offset: AlignmentCoordinates, + pub sp4_offset: AlignmentStreamCoordinates, pub primary: TemplateSwitchPrimary, pub secondary: TemplateSwitchSecondary, pub upstream: Alignment, From 74e14859811c0eaf1ce3bf951ffc513b186e84aa Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 6 May 2025 12:18:40 +0300 Subject: [PATCH 02/10] Fix errors and lints. --- lib_tsalign/src/a_star_aligner.rs | 4 ++-- lib_tsalign/src/a_star_aligner/alignment_result.rs | 14 +++++--------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/lib_tsalign/src/a_star_aligner.rs b/lib_tsalign/src/a_star_aligner.rs index 627feba6..967faad6 100644 --- a/lib_tsalign/src/a_star_aligner.rs +++ b/lib_tsalign/src/a_star_aligner.rs @@ -179,11 +179,11 @@ pub fn template_switch_distance_a_star_align< reference_name, query_name, range, - config, + config.clone(), memory, cost_limit, memory_limit, )); - result.compute_ts_equal_cost_ranges(); + result.compute_ts_equal_cost_ranges(reference, query, &config); result } diff --git a/lib_tsalign/src/a_star_aligner/alignment_result.rs b/lib_tsalign/src/a_star_aligner/alignment_result.rs index 693c43a6..7452511e 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result.rs @@ -280,15 +280,11 @@ impl AlignmentResult { /* Do not exit loop. */ } - _ => break, - } - + while let Some( + super::template_switch_distance::AlignmentType::PrimaryFlankMatch + | super::template_switch_distance::AlignmentType::PrimaryMatch, + ) = preceding_alignment.next() + { inner_primary_index -= 1; match direction { TemplateSwitchDirection::Forward => inner_secondary_index -= 1, From fe0daa3ceba9d0ce5f33207c324b9888dab98832 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 6 May 2025 12:19:16 +0300 Subject: [PATCH 03/10] Fix errors in lib_tsshow. --- lib_tsshow/src/plain_text/parse_template_switches.rs | 1 + lib_tsshow/src/ts_arrangement/source.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/lib_tsshow/src/plain_text/parse_template_switches.rs b/lib_tsshow/src/plain_text/parse_template_switches.rs index 72177909..e8696976 100644 --- a/lib_tsshow/src/plain_text/parse_template_switches.rs +++ b/lib_tsshow/src/plain_text/parse_template_switches.rs @@ -64,6 +64,7 @@ fn parse_template_switch( secondary, direction, first_offset, + .. }, ) = alignment.peek_front_cloned().unwrap() else { diff --git a/lib_tsshow/src/ts_arrangement/source.rs b/lib_tsshow/src/ts_arrangement/source.rs index c5bb07e6..823f52aa 100644 --- a/lib_tsshow/src/ts_arrangement/source.rs +++ b/lib_tsshow/src/ts_arrangement/source.rs @@ -101,6 +101,7 @@ impl TsSourceArrangement { secondary, direction, first_offset, + .. } => { template_switches_out.extend([result.align_ts( ts_index, From 1a0650f2165f116f083adbc8f933a726cec3c4b6 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 6 May 2025 12:59:23 +0300 Subject: [PATCH 04/10] Fix equal extension cost computation. --- lib_tsalign/src/a_star_aligner.rs | 4 +- .../src/a_star_aligner/alignment_result.rs | 28 +++- test_files/config/range/config.tsa | 132 ++++++++++++++++++ test_files/twin_range.fa | 4 + 4 files changed, 160 insertions(+), 8 deletions(-) create mode 100644 test_files/config/range/config.tsa create mode 100644 test_files/twin_range.fa diff --git a/lib_tsalign/src/a_star_aligner.rs b/lib_tsalign/src/a_star_aligner.rs index 967faad6..12cbab4d 100644 --- a/lib_tsalign/src/a_star_aligner.rs +++ b/lib_tsalign/src/a_star_aligner.rs @@ -178,12 +178,12 @@ pub fn template_switch_distance_a_star_align< query, reference_name, query_name, - range, + range.clone(), config.clone(), memory, cost_limit, memory_limit, )); - result.compute_ts_equal_cost_ranges(reference, query, &config); + result.compute_ts_equal_cost_ranges(reference, query, &range, &config); result } diff --git a/lib_tsalign/src/a_star_aligner/alignment_result.rs b/lib_tsalign/src/a_star_aligner/alignment_result.rs index 7452511e..655ffaea 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result.rs @@ -2,15 +2,21 @@ use std::fmt::{Display, Formatter, Result, Write}; use a_star_sequences::SequencePair; use alignment::{Alignment, stream::AlignmentStream}; -use compact_genome::interface::{alphabet::Alphabet, sequence::GenomeSequence}; +use compact_genome::interface::{ + alphabet::{Alphabet, AlphabetCharacter}, + sequence::GenomeSequence, +}; use generic_a_star::{AStarResult, cost::AStarCost}; use noisy_float::types::{R64, r64}; use num_traits::{Float, Zero}; use crate::config::TemplateSwitchConfig; -use super::template_switch_distance::{ - TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, +use super::{ + alignment_geometry::AlignmentRange, + template_switch_distance::{ + TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, + }, }; pub mod a_star_sequences; @@ -239,6 +245,7 @@ impl AlignmentResult, config: &TemplateSwitchConfig, ) { let Self::WithTarget { alignment, .. } = self else { @@ -266,8 +273,13 @@ impl AlignmentResult preceding_reference_index, @@ -285,6 +297,7 @@ impl AlignmentResult inner_secondary_index -= 1, @@ -296,7 +309,7 @@ impl AlignmentResult AlignmentResultREF +TTTTTTTTTTAAACAAAAATT +>ALT +TTTTTTTTTTAAAAAAAAATT \ No newline at end of file From 9d2828dc5bd01bee9dc748531d2590d0eacaa33c Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 6 May 2025 13:25:55 +0300 Subject: [PATCH 05/10] Fix equal extension secondary index. --- .../src/a_star_aligner/alignment_result.rs | 192 ++++++++++++------ .../alignment_result/alignment/stream.rs | 8 +- test_files/twin_range.fa | 4 +- 3 files changed, 143 insertions(+), 61 deletions(-) diff --git a/lib_tsalign/src/a_star_aligner/alignment_result.rs b/lib_tsalign/src/a_star_aligner/alignment_result.rs index 655ffaea..5a0162b1 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result.rs @@ -272,68 +272,144 @@ impl AlignmentResult preceding_reference_index, - TemplateSwitchPrimary::Query => preceding_query_index, - }; - let mut inner_secondary_index = (match secondary { - TemplateSwitchSecondary::Reference => preceding_reference_index, - TemplateSwitchSecondary::Query => preceding_query_index, - } as isize - + first_offset) - as usize; - - while let Some( - super::template_switch_distance::AlignmentType::PrimaryFlankMatch - | super::template_switch_distance::AlignmentType::PrimaryMatch, - ) = preceding_alignment.next() { - println!("Entering loop"); - inner_primary_index -= 1; - match direction { - TemplateSwitchDirection::Forward => inner_secondary_index -= 1, - TemplateSwitchDirection::Reverse => inner_secondary_index += 1, - } - preceding_reference_index -= 1; - preceding_query_index -= 1; - - let primary_character = - primary.get(reference, query)[inner_primary_index].clone(); - let secondary_character = - secondary.get(reference, query)[inner_secondary_index].complement(); - let reference_character = reference[preceding_reference_index].clone(); - let query_character = query[preceding_query_index].clone(); - - let preceding_cost = if remaining_left_flank > 0 { - remaining_left_flank -= 1; - &config.left_flank_edit_costs - } else { - &config.primary_edit_costs - } - .match_or_substitution_cost(reference_character, query_character); - let inner_cost = config - .secondary_edit_costs(direction) - .match_or_substitution_cost(primary_character, secondary_character); - - if preceding_cost.is_zero() && inner_cost.is_zero() { - equal_cost_range.min_start -= 1; - } else { - println!( - "Breaking loop due to cost: preceding: {preceding_cost}; inner: {inner_cost}" - ); - break; + let mut remaining_left_flank = config.left_flank_length; + let mut preceding_reference_index = stream.head_coordinates().reference() + + range + .clone() + .map(|range| range.reference_offset()) + .unwrap_or(0); + let mut preceding_query_index = stream.head_coordinates().query() + + range.clone().map(|range| range.query_offset()).unwrap_or(0); + let mut preceding_alignment = stream.stream_iter_flat_rev(); + let mut inner_primary_index = match primary { + TemplateSwitchPrimary::Reference => preceding_reference_index, + TemplateSwitchPrimary::Query => preceding_query_index, + }; + let mut inner_secondary_index = (match secondary { + TemplateSwitchSecondary::Reference => preceding_reference_index, + TemplateSwitchSecondary::Query => preceding_query_index, + } as isize + + first_offset) + as usize; + + while let Some( + super::template_switch_distance::AlignmentType::PrimaryFlankMatch + | super::template_switch_distance::AlignmentType::PrimaryMatch, + ) = preceding_alignment.next() + { + println!("Entering loop"); + inner_primary_index -= 1; + match direction { + TemplateSwitchDirection::Forward => inner_secondary_index -= 1, + TemplateSwitchDirection::Reverse => inner_secondary_index += 1, + } + preceding_reference_index -= 1; + preceding_query_index -= 1; + + let primary_character = + primary.get(reference, query)[inner_primary_index].clone(); + let secondary_character = secondary.get(reference, query) + [match direction { + TemplateSwitchDirection::Forward => inner_secondary_index, + TemplateSwitchDirection::Reverse => inner_secondary_index - 1, + }] + .complement(); + let reference_character = reference[preceding_reference_index].clone(); + let query_character = query[preceding_query_index].clone(); + + let preceding_cost = if remaining_left_flank > 0 { + remaining_left_flank -= 1; + &config.left_flank_edit_costs + } else { + &config.primary_edit_costs + } + .match_or_substitution_cost(reference_character, query_character); + let inner_cost = config + .secondary_edit_costs(direction) + .match_or_substitution_cost(primary_character, secondary_character); + + if preceding_cost.is_zero() && inner_cost.is_zero() { + equal_cost_range.min_start -= 1; + } else { + println!( + "Breaking loop due to cost: preceding: {preceding_cost}; inner: {inner_cost}" + ); + break; + } } } + // Extend start forwards. + // TODO the following is mostly a copy of above. + /*{ + let mut remaining_right_flank = config.right_flank_length; + let mut preceding_reference_index = stream.head_coordinates().reference() + + range + .clone() + .map(|range| range.reference_offset()) + .unwrap_or(0); + let mut preceding_query_index = stream.head_coordinates().query() + + range.clone().map(|range| range.query_offset()).unwrap_or(0); + let mut preceding_alignment = alignment.iter_flat_cloned().rev(); + let mut inner_primary_index = match primary { + TemplateSwitchPrimary::Reference => preceding_reference_index, + TemplateSwitchPrimary::Query => preceding_query_index, + }; + let mut inner_secondary_index = (match secondary { + TemplateSwitchSecondary::Reference => preceding_reference_index, + TemplateSwitchSecondary::Query => preceding_query_index, + } as isize + + first_offset) + as usize; + + while let Some( + super::template_switch_distance::AlignmentType::PrimaryFlankMatch + | super::template_switch_distance::AlignmentType::PrimaryMatch, + ) = preceding_alignment.next() + { + println!("Entering loop"); + inner_primary_index -= 1; + match direction { + TemplateSwitchDirection::Forward => inner_secondary_index -= 1, + TemplateSwitchDirection::Reverse => inner_secondary_index += 1, + } + preceding_reference_index -= 1; + preceding_query_index -= 1; + + let primary_character = + primary.get(reference, query)[inner_primary_index].clone(); + let secondary_character = secondary.get(reference, query) + [match direction { + TemplateSwitchDirection::Forward => inner_secondary_index, + TemplateSwitchDirection::Reverse => inner_secondary_index - 1, + }] + .complement(); + let reference_character = reference[preceding_reference_index].clone(); + let query_character = query[preceding_query_index].clone(); + + let preceding_cost = if remaining_left_flank > 0 { + remaining_left_flank -= 1; + &config.left_flank_edit_costs + } else { + &config.primary_edit_costs + } + .match_or_substitution_cost(reference_character, query_character); + let inner_cost = config + .secondary_edit_costs(direction) + .match_or_substitution_cost(primary_character, secondary_character); + + if preceding_cost.is_zero() && inner_cost.is_zero() { + equal_cost_range.min_start -= 1; + } else { + println!( + "Breaking loop due to cost: preceding: {preceding_cost}; inner: {inner_cost}" + ); + break; + } + } + }*/ + let super::template_switch_distance::AlignmentType::TemplateSwitchEntrance { equal_cost_range: alignment_equal_cost_range, .. diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs index 176f31c4..321dee2f 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs @@ -41,7 +41,7 @@ impl AlignmentStream { self.length } - pub fn stream_iter(&self) -> impl use<'_> + Iterator { + pub fn stream_iter(&self) -> impl use<'_> + DoubleEndedIterator { self.stream.iter().copied() } @@ -50,6 +50,12 @@ impl AlignmentStream { .flat_map(|(multiplicity, alignment_type)| iter::repeat_n(alignment_type, multiplicity)) } + pub fn stream_iter_flat_rev(&self) -> impl use<'_> + Iterator { + self.stream_iter() + .rev() + .flat_map(|(multiplicity, alignment_type)| iter::repeat_n(alignment_type, multiplicity)) + } + fn stream_vec(&self) -> Vec<(usize, AlignmentType)> { self.stream_iter().collect() } diff --git a/test_files/twin_range.fa b/test_files/twin_range.fa index cbf7b710..50ed63dc 100644 --- a/test_files/twin_range.fa +++ b/test_files/twin_range.fa @@ -1,4 +1,4 @@ >REF -TTTTTTTTTTAAACAAAAATT +TTTTTCTTTTTAAACAAAAATT >ALT -TTTTTTTTTTAAAAAAAAATT \ No newline at end of file +TTTTTTTTTTTAAAAGAAAATT \ No newline at end of file From 88f1672349d244fecc84901ba3c6c08273c2551e Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 14 May 2025 12:11:21 +0300 Subject: [PATCH 06/10] Implement TS extension geometry. --- .../src/a_star_aligner/alignment_result.rs | 6 + .../alignment_result/alignment.rs | 1 + .../alignment_result/alignment/stream.rs | 6 + .../alignment/template_switch_specifics.rs | 799 ++++++++++++++++++ .../gap_affine_edit_distance.rs | 4 + .../template_switch_distance.rs | 3 +- .../alignment_type.rs | 4 + .../alignment_type/equal_cost_range.rs | 2 +- .../template_switch_distance/display.rs | 19 +- 9 files changed, 833 insertions(+), 11 deletions(-) create mode 100644 lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs diff --git a/lib_tsalign/src/a_star_aligner/alignment_result.rs b/lib_tsalign/src/a_star_aligner/alignment_result.rs index 5a0162b1..ed09595f 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result.rs @@ -29,6 +29,8 @@ pub trait IAlignmentType { fn is_internal(&self) -> bool; + fn is_template_switch_entrance(&self) -> bool; + fn is_template_switch_exit(&self) -> bool; } @@ -266,6 +268,10 @@ impl AlignmentResult { + if config.left_flank_length > 0 || config.right_flank_length > 0 { + continue; + } + equal_cost_range.min_start = 0; equal_cost_range.max_start = 0; equal_cost_range.min_end = 0; diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment.rs index fc3df58d..a31d2e8b 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result/alignment.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment.rs @@ -8,6 +8,7 @@ use super::IAlignmentType; pub mod iter; pub mod stream; +pub mod template_switch_specifics; #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs index 321dee2f..a5ffdbe3 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/stream.rs @@ -109,6 +109,12 @@ impl AlignmentStream { self.length += multiplicity * Self::stream_length(alignment_type); } + pub fn push_all(&mut self, iterator: impl IntoIterator) { + for (multiplicity, alignment_type) in iterator { + self.push(multiplicity, alignment_type); + } + } + /// Pops one unit of length from the tail of the stream. pub fn pop_one(&mut self) { self.pop(self.length.saturating_sub(1)); diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs new file mode 100644 index 00000000..10dd32e6 --- /dev/null +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs @@ -0,0 +1,799 @@ +use compact_genome::interface::{ + alphabet::{Alphabet, AlphabetCharacter}, + sequence::GenomeSequence, +}; + +use crate::a_star_aligner::{ + alignment_result::{IAlignmentType, alignment::stream::AlignmentStream}, + template_switch_distance::{ + AlignmentType, TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, + }, +}; + +use super::Alignment; + +impl Alignment { + /// Moves the start of a template switch one character pair to the left if possible, moving the character pair into the template switch. + /// + /// Only works if the alignment preceding the template switch is a match or substitution, and not a flank. + /// In this case it returns true, and otherwise false. + /// + /// Compact index identifies the start of the template switch in terms of the compact representation of the alignment. + /// See [`Self::iter_compact`]. + pub fn move_template_switch_start_backwards< + AlphabetType: Alphabet, + SubsequenceType: GenomeSequence + ?Sized, + >( + &mut self, + reference: &SubsequenceType, + query: &SubsequenceType, + reference_offset: usize, + query_offset: usize, + mut compact_index: usize, + ) -> bool { + let AlignmentType::TemplateSwitchEntrance { + first_offset, + primary, + secondary, + direction, + .. + } = self.alignment[compact_index].1 + else { + panic!() + }; + + if let Some((_, AlignmentType::PrimaryMatch | AlignmentType::PrimarySubstitution)) = + self.alignment.get(compact_index - 1) + { + // Compute TS inner first indices. + let mut stream = AlignmentStream::new(); + stream.push_all(self.iter_compact_cloned().take(compact_index)); + let ts_inner_primary_index = match primary { + TemplateSwitchPrimary::Reference => { + stream.head_coordinates().reference() + reference_offset + } + TemplateSwitchPrimary::Query => stream.head_coordinates().query() + query_offset, + }; + let ts_inner_secondary_index = usize::try_from( + isize::try_from(match secondary { + TemplateSwitchSecondary::Reference => { + stream.head_coordinates().reference() + reference_offset + } + TemplateSwitchSecondary::Query => { + stream.head_coordinates().query() + query_offset + } + }) + .unwrap() + .checked_add(first_offset) + .unwrap(), + ) + .unwrap(); + + // Remove one match or substitution from before the TS. + let multiplicity = &mut self.alignment[compact_index - 1].0; + assert!(*multiplicity > 0); + *multiplicity -= 1; + + // Remove the alignment entry if it has zero multiplicity. + if *multiplicity == 0 { + compact_index -= 1; + self.alignment.remove(compact_index); + } + + // Check if the new inner pair is a match or a substitution. + let primary_char = primary.get(reference, query)[ts_inner_primary_index - 1].clone(); + let secondary_char = match direction { + TemplateSwitchDirection::Forward => { + secondary.get(reference, query)[ts_inner_secondary_index - 1].clone() + } + TemplateSwitchDirection::Reverse => { + secondary.get(reference, query)[ts_inner_secondary_index].complement() + } + }; + let inner_alignment_type = if primary_char == secondary_char { + AlignmentType::SecondaryMatch + } else { + AlignmentType::SecondarySubstitution + }; + + // Insert new inner alignment. + if self.alignment[compact_index + 1].1 == inner_alignment_type { + self.alignment[compact_index + 1].0 += 1; + } else { + self.alignment + .insert(compact_index + 1, (1, inner_alignment_type)); + } + + // If reverse TS, then fix first offset. + // (If forward TS, the changes to points 1 and 2 cancel out.) + if direction == TemplateSwitchDirection::Reverse { + let AlignmentType::TemplateSwitchEntrance { first_offset, .. } = + &mut self.alignment[compact_index].1 + else { + unreachable!(); + }; + *first_offset += 2; + } + + // Fix anti-primary gap. + let Some((_, AlignmentType::TemplateSwitchExit { anti_primary_gap })) = self.alignment + [compact_index..] + .iter_mut() + .find(|(_, alignment_type)| alignment_type.is_template_switch_exit()) + else { + unreachable!(); + }; + *anti_primary_gap += 1; + + true + } else { + false + } + } + + /// Moves the start of a template switch one character pair to the right if possible, moving the character pair out of the template switch. + /// + /// Only works if the first alignment inside the template switch is a match or substitution. + /// In this case it returns true, and otherwise false. + /// + /// Assumes that the template switch is not preceded by a flank. + /// + /// Compact index identifies the start of the template switch in terms of the compact representation of the alignment. + /// See [`Self::iter_compact`]. + pub fn move_template_switch_start_forwards< + AlphabetType: Alphabet, + SubsequenceType: GenomeSequence + ?Sized, + >( + &mut self, + reference: &SubsequenceType, + query: &SubsequenceType, + reference_offset: usize, + query_offset: usize, + mut compact_index: usize, + ) -> bool { + let AlignmentType::TemplateSwitchEntrance { direction, .. } = + self.alignment[compact_index].1 + else { + panic!() + }; + + // Assert that no flanks are involved. + assert!( + self.alignment + .get(compact_index - 1) + .map(|(_, alignment_type)| !matches!( + alignment_type, + AlignmentType::PrimaryFlankDeletion + | AlignmentType::PrimaryFlankInsertion + | AlignmentType::PrimaryFlankSubstitution + | AlignmentType::PrimaryFlankMatch + )) + .unwrap_or(true) + ); + + if let Some((_, AlignmentType::SecondaryMatch | AlignmentType::SecondarySubstitution)) = + self.alignment.get(compact_index + 1) + { + // Compute TS outer first indices. + let mut stream = AlignmentStream::new(); + stream.push_all(self.iter_compact_cloned().take(compact_index)); + let ts_outer_reference_index = stream.head_coordinates().reference() + reference_offset; + let ts_outer_query_index = stream.head_coordinates().query() + query_offset; + + // Remove one match or substitution from inside the TS. + let multiplicity = &mut self.alignment[compact_index + 1].0; + assert!(*multiplicity > 0); + *multiplicity -= 1; + + // Remove the alignment entry if it has zero multiplicity. + if *multiplicity == 0 { + self.alignment.remove(compact_index + 1); + } + + // Check if the new outer pair is a match or a substitution. + let reference_char = reference[ts_outer_reference_index].clone(); + let query_char = query[ts_outer_query_index].clone(); + let outer_alignment_type = if reference_char == query_char { + AlignmentType::PrimaryMatch + } else { + AlignmentType::PrimarySubstitution + }; + + // Insert new outer alignment. + if self.alignment[compact_index - 1].1 == outer_alignment_type { + self.alignment[compact_index - 1].0 += 1; + } else { + self.alignment + .insert(compact_index - 1, (1, outer_alignment_type)); + compact_index += 1; + } + + // If reverse TS, then fix first offset. + // (If forward TS, the changes to points 1 and 2 cancel out.) + if direction == TemplateSwitchDirection::Reverse { + let AlignmentType::TemplateSwitchEntrance { first_offset, .. } = + &mut self.alignment[compact_index].1 + else { + unreachable!(); + }; + *first_offset -= 2; + } + + // Fix anti-primary gap. + let Some((_, AlignmentType::TemplateSwitchExit { anti_primary_gap })) = self.alignment + [compact_index..] + .iter_mut() + .find(|(_, alignment_type)| alignment_type.is_template_switch_exit()) + else { + unreachable!(); + }; + *anti_primary_gap -= 1; + + true + } else { + false + } + } + + /// Moves the end of a template switch one character pair to the right if possible, moving the character pair into the template switch. + /// + /// Only works if the alignment preceding the template switch is a match or substitution, and not a flank. + /// In this case it returns true, and otherwise false. + /// + /// Compact index identifies the start of the template switch in terms of the compact representation of the alignment. + /// See [`Self::iter_compact`]. + pub fn move_template_switch_end_forwards< + AlphabetType: Alphabet, + SubsequenceType: GenomeSequence + ?Sized, + >( + &mut self, + reference: &SubsequenceType, + query: &SubsequenceType, + reference_offset: usize, + query_offset: usize, + compact_index: usize, + ) -> bool { + let AlignmentType::TemplateSwitchEntrance { + first_offset, + primary, + secondary, + direction, + .. + } = self.alignment[compact_index].1 + else { + panic!() + }; + let mut exit_index = compact_index + + self + .alignment + .iter() + .skip(compact_index) + .enumerate() + .find(|(_, (_, alignment_type))| { + matches!(alignment_type, AlignmentType::TemplateSwitchExit { .. }) + }) + .unwrap() + .0; + let inner_secondary_length = self + .alignment + .iter() + .take(exit_index) + .skip(compact_index + 1) + .fold( + 0, + |secondary_length, (multiplicity, alignment_type)| match alignment_type { + AlignmentType::SecondaryInsertion => secondary_length, + AlignmentType::SecondaryDeletion + | AlignmentType::SecondarySubstitution + | AlignmentType::SecondaryMatch => secondary_length + *multiplicity, + _ => unreachable!(), + }, + ); + println!("Inner secondary length: {inner_secondary_length}"); + + if let Some((_, AlignmentType::PrimaryMatch | AlignmentType::PrimarySubstitution)) = + self.alignment.get(exit_index + 1) + { + // Compute TS inner last indices. + let mut stream = AlignmentStream::new(); + stream.push_all(self.iter_compact_cloned().take(compact_index)); + stream.clear(); + stream.push_all( + self.iter_compact_cloned() + .take(exit_index + 1) + .skip(compact_index), + ); + let ts_inner_primary_index = match primary { + TemplateSwitchPrimary::Reference => { + stream.head_coordinates().reference() + reference_offset + } + TemplateSwitchPrimary::Query => stream.head_coordinates().query() + query_offset, + }; + let ts_inner_secondary_index = usize::try_from( + isize::try_from(match secondary { + TemplateSwitchSecondary::Reference => { + stream.tail_coordinates().reference() + reference_offset + } + TemplateSwitchSecondary::Query => { + stream.tail_coordinates().query() + query_offset + } + }) + .unwrap() + .checked_add(first_offset) + .unwrap(), + ) + .unwrap(); + let ts_inner_secondary_index = match direction { + TemplateSwitchDirection::Forward => ts_inner_secondary_index + .checked_add(inner_secondary_length) + .unwrap(), + TemplateSwitchDirection::Reverse => ts_inner_secondary_index + .checked_sub(inner_secondary_length) + .unwrap(), + }; + + // Remove one match or substitution from after the TS. + let multiplicity = &mut self.alignment[exit_index + 1].0; + assert!(*multiplicity > 0); + *multiplicity -= 1; + + // Remove the alignment entry if it has zero multiplicity. + if *multiplicity == 0 { + self.alignment.remove(exit_index + 1); + } + + // Check if the new inner pair is a match or a substitution. + println!("Primary index: {ts_inner_primary_index}"); + let primary_char = primary.get(reference, query)[ts_inner_primary_index].clone(); + let secondary_char = match direction { + TemplateSwitchDirection::Forward => { + secondary.get(reference, query)[ts_inner_secondary_index].clone() + } + TemplateSwitchDirection::Reverse => { + println!("Secondary index: {}", ts_inner_secondary_index - 1); + secondary.get(reference, query)[ts_inner_secondary_index - 1].complement() + } + }; + let inner_alignment_type = if primary_char == secondary_char { + AlignmentType::SecondaryMatch + } else { + AlignmentType::SecondarySubstitution + }; + + // Insert new inner alignment. + if self.alignment[exit_index - 1].1 == inner_alignment_type { + self.alignment[exit_index - 1].0 += 1; + } else { + self.alignment.insert(exit_index, (1, inner_alignment_type)); + exit_index += 1; + } + + // Fix anti-primary gap. + let AlignmentType::TemplateSwitchExit { anti_primary_gap } = + &mut self.alignment[exit_index].1 + else { + unreachable!(); + }; + *anti_primary_gap += 1; + + true + } else { + false + } + } + + /// Moves the end of a template switch one character pair to the left if possible, moving the character pair out of the template switch. + /// + /// Only works if the last alignment inside the template switch is a match or substitution. + /// In this case it returns true, and otherwise false. + /// + /// Assumes that the template switch is not succeeded by a flank. + /// + /// Compact index identifies the start of the template switch in terms of the compact representation of the alignment. + /// See [`Self::iter_compact`]. + pub fn move_template_switch_end_backwards< + AlphabetType: Alphabet, + SubsequenceType: GenomeSequence + ?Sized, + >( + &mut self, + reference: &SubsequenceType, + query: &SubsequenceType, + reference_offset: usize, + query_offset: usize, + compact_index: usize, + ) -> bool { + assert!(matches!( + self.alignment[compact_index].1, + AlignmentType::TemplateSwitchEntrance { .. } + )); + let mut exit_index = compact_index + + self + .alignment + .iter() + .skip(compact_index) + .enumerate() + .find(|(_, (_, alignment_type))| { + matches!(alignment_type, AlignmentType::TemplateSwitchExit { .. }) + }) + .unwrap() + .0; + + // Assert that no flanks are involved. + assert!( + self.alignment + .get(exit_index + 1) + .map(|(_, alignment_type)| !matches!( + alignment_type, + AlignmentType::PrimaryFlankDeletion + | AlignmentType::PrimaryFlankInsertion + | AlignmentType::PrimaryFlankSubstitution + | AlignmentType::PrimaryFlankMatch + )) + .unwrap_or(true) + ); + + if let Some((_, AlignmentType::SecondaryMatch | AlignmentType::SecondarySubstitution)) = + self.alignment.get(exit_index - 1) + { + // Compute TS outer first indices. + let mut stream = AlignmentStream::new(); + stream.push_all(self.iter_compact_cloned().take(exit_index + 1)); + let ts_outer_reference_index = stream.head_coordinates().reference() + reference_offset; + let ts_outer_query_index = stream.head_coordinates().query() + query_offset; + + // Remove one match or substitution from inside the TS. + let multiplicity = &mut self.alignment[exit_index - 1].0; + assert!(*multiplicity > 0); + *multiplicity -= 1; + + // Remove the alignment entry if it has zero multiplicity. + if *multiplicity == 0 { + exit_index -= 1; + self.alignment.remove(exit_index); + } + + // Check if the new outer pair is a match or a substitution. + let reference_char = reference[ts_outer_reference_index].clone(); + let query_char = query[ts_outer_query_index].clone(); + let outer_alignment_type = if reference_char == query_char { + AlignmentType::PrimaryMatch + } else { + AlignmentType::PrimarySubstitution + }; + + // Insert new outer alignment. + if self.alignment[exit_index + 1].1 == outer_alignment_type { + self.alignment[exit_index + 1].0 += 1; + } else { + self.alignment + .insert(exit_index + 1, (1, outer_alignment_type)); + } + + // Fix anti-primary gap. + let Some((_, AlignmentType::TemplateSwitchExit { anti_primary_gap })) = self.alignment + [compact_index..] + .iter_mut() + .find(|(_, alignment_type)| alignment_type.is_template_switch_exit()) + else { + unreachable!(); + }; + *anti_primary_gap -= 1; + + true + } else { + false + } + } +} + +#[cfg(test)] +mod tests { + use compact_genome::{ + implementation::{alphabets::dna_alphabet::DnaAlphabet, vec_sequence::VectorGenome}, + interface::sequence::{GenomeSequence, OwnedGenomeSequence}, + }; + + use crate::a_star_aligner::{ + alignment_result::alignment::Alignment, + template_switch_distance::{ + AlignmentType, EqualCostRange, TemplateSwitchDirection, TemplateSwitchPrimary, + TemplateSwitchSecondary, + }, + }; + + static START_REFERENCE: &[u8] = b"AGAGAGCTCTAA"; + static START_QUERY: &[u8] = b"AGAGAGCTTTAA"; + static START_ALIGNMENTS: &[&[(usize, AlignmentType)]] = &[ + &[ + (6, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: -6, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (2, AlignmentType::SecondaryMatch), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 2, + }, + ), + (2, AlignmentType::PrimaryMatch), + ], + &[ + (5, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: -4, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (3, AlignmentType::SecondaryMatch), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 3, + }, + ), + (2, AlignmentType::PrimaryMatch), + ], + &[ + (4, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: -2, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (4, AlignmentType::SecondaryMatch), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 4, + }, + ), + (2, AlignmentType::PrimaryMatch), + ], + &[ + (3, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: 0, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (1, AlignmentType::SecondarySubstitution), + (4, AlignmentType::SecondaryMatch), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 5, + }, + ), + (2, AlignmentType::PrimaryMatch), + ], + &[ + (2, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: 2, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (2, AlignmentType::SecondarySubstitution), + (4, AlignmentType::SecondaryMatch), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 6, + }, + ), + (2, AlignmentType::PrimaryMatch), + ], + ]; + + static END_REFERENCE: &[u8] = b"AACTCTAGAGAG"; + static END_QUERY: &[u8] = b"AATTCTAGAGAG"; + static END_ALIGNMENTS: &[&[(usize, AlignmentType)]] = &[ + &[ + (1, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: 10, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (2, AlignmentType::SecondaryMatch), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 2, + }, + ), + (6, AlignmentType::PrimaryMatch), + ], + &[ + (1, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: 10, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (3, AlignmentType::SecondaryMatch), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 3, + }, + ), + (5, AlignmentType::PrimaryMatch), + ], + &[ + (1, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: 10, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (4, AlignmentType::SecondaryMatch), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 4, + }, + ), + (4, AlignmentType::PrimaryMatch), + ], + &[ + (1, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: 10, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (4, AlignmentType::SecondaryMatch), + (1, AlignmentType::SecondarySubstitution), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 5, + }, + ), + (3, AlignmentType::PrimaryMatch), + ], + &[ + (1, AlignmentType::PrimaryMatch), + ( + 1, + AlignmentType::TemplateSwitchEntrance { + first_offset: 10, + equal_cost_range: EqualCostRange::new_invalid(), + primary: TemplateSwitchPrimary::Reference, + secondary: TemplateSwitchSecondary::Query, + direction: TemplateSwitchDirection::Reverse, + }, + ), + (4, AlignmentType::SecondaryMatch), + (2, AlignmentType::SecondarySubstitution), + ( + 1, + AlignmentType::TemplateSwitchExit { + anti_primary_gap: 6, + }, + ), + (2, AlignmentType::PrimaryMatch), + ], + ]; + + #[test] + fn move_template_switch_start_backwards() { + let reference = VectorGenome::::from_slice_u8(START_REFERENCE).unwrap(); + let query = VectorGenome::from_slice_u8(START_QUERY).unwrap(); + let mut alignment = Alignment::from(START_ALIGNMENTS[0].to_vec()); + + for expected_alignment in &START_ALIGNMENTS[1..] { + assert!(alignment.move_template_switch_start_backwards( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 2, + 2, + 1 + )); + assert_eq!(alignment, Alignment::from(expected_alignment.to_vec())); + } + } + + #[test] + fn move_template_switch_start_forwards() { + let reference = VectorGenome::::from_slice_u8(START_REFERENCE).unwrap(); + let query = VectorGenome::from_slice_u8(START_QUERY).unwrap(); + let mut alignment = Alignment::from(START_ALIGNMENTS.last().unwrap().to_vec()); + + for expected_alignment in START_ALIGNMENTS.iter().rev().skip(1) { + assert!(alignment.move_template_switch_start_forwards( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 2, + 2, + 1 + )); + assert_eq!(alignment, Alignment::from(expected_alignment.to_vec())); + } + } + + #[test] + fn move_template_switch_end_backwards() { + let reference = VectorGenome::::from_slice_u8(END_REFERENCE).unwrap(); + let query = VectorGenome::from_slice_u8(END_QUERY).unwrap(); + let mut alignment = Alignment::from(END_ALIGNMENTS.last().unwrap().to_vec()); + + for expected_alignment in END_ALIGNMENTS.iter().rev().skip(1) { + assert!(alignment.move_template_switch_end_backwards( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 1, + 1, + 1 + )); + assert_eq!(alignment, Alignment::from(expected_alignment.to_vec())); + } + } + + #[test] + fn move_template_switch_end_forwards() { + let reference = VectorGenome::::from_slice_u8(END_REFERENCE).unwrap(); + let query = VectorGenome::from_slice_u8(END_QUERY).unwrap(); + let mut alignment = Alignment::from(END_ALIGNMENTS[0].to_vec()); + + for expected_alignment in &END_ALIGNMENTS[1..] { + assert!(alignment.move_template_switch_end_forwards( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 1, + 1, + 1 + )); + assert_eq!(alignment, Alignment::from(expected_alignment.to_vec())); + } + } +} diff --git a/lib_tsalign/src/a_star_aligner/gap_affine_edit_distance.rs b/lib_tsalign/src/a_star_aligner/gap_affine_edit_distance.rs index 721eeefa..13cfc01a 100644 --- a/lib_tsalign/src/a_star_aligner/gap_affine_edit_distance.rs +++ b/lib_tsalign/src/a_star_aligner/gap_affine_edit_distance.rs @@ -374,6 +374,10 @@ impl IAlignmentType for AlignmentType { self == &Self::Root } + fn is_template_switch_entrance(&self) -> bool { + false + } + fn is_template_switch_exit(&self) -> bool { false } diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance.rs index 4446fd60..400a1981 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance.rs @@ -1,6 +1,5 @@ use std::fmt::Display; -use alignment_type::equal_cost_range::EqualCostRange; use compact_genome::interface::sequence::GenomeSequence; use generic_a_star::{AStarNode, cost::AStarCost}; use identifier::GapType; @@ -18,7 +17,7 @@ mod identifier; pub mod lower_bounds; pub mod strategies; -pub use alignment_type::AlignmentType; +pub use alignment_type::{AlignmentType, equal_cost_range::EqualCostRange}; pub use context::Context; pub use identifier::{ Identifier, TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type.rs index e6b3dce9..c55a6379 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type.rs @@ -145,6 +145,10 @@ impl IAlignmentType for AlignmentType { ) } + fn is_template_switch_entrance(&self) -> bool { + matches!(self, Self::TemplateSwitchEntrance { .. }) + } + fn is_template_switch_exit(&self) -> bool { matches!(self, Self::TemplateSwitchExit { .. }) } diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type/equal_cost_range.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type/equal_cost_range.rs index 00942cf7..c5a53675 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type/equal_cost_range.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance/alignment_type/equal_cost_range.rs @@ -26,7 +26,7 @@ pub struct EqualCostRange { } impl EqualCostRange { - pub fn new_invalid() -> Self { + pub const fn new_invalid() -> Self { Self { min_start: 1, max_start: -1, diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance/display.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance/display.rs index cfbf05b7..6467f3ed 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance/display.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance/display.rs @@ -79,14 +79,17 @@ impl Display for TemplateSwitchDirection { impl Display for EqualCostRange { fn fmt(&self, f: &mut Formatter<'_>) -> Result { - assert!(self.is_valid()); - let Self { - min_start, - max_start, - min_end, - max_end, - } = self; - write!(f, "[{min_start},{max_start}]:[{min_end},{max_end}]") + if self.is_valid() { + let Self { + min_start, + max_start, + min_end, + max_end, + } = self; + write!(f, "[{min_start},{max_start}]:[{min_end},{max_end}]") + } else { + write!(f, "[-]:[-]") + } } } From a0ef04e834e1141d53be85aa2be34cd4c0bb10af Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 14 May 2025 14:47:00 +0300 Subject: [PATCH 07/10] Compute alignment costs. Not supporting flanks. --- .../alignment/template_switch_specifics.rs | 542 +++++++++++++++++- .../template_switch_distance.rs | 51 +- lib_tsalign/src/config.rs | 56 +- 3 files changed, 585 insertions(+), 64 deletions(-) diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs index 10dd32e6..87de694b 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs @@ -2,12 +2,16 @@ use compact_genome::interface::{ alphabet::{Alphabet, AlphabetCharacter}, sequence::GenomeSequence, }; +use generic_a_star::cost::AStarCost; -use crate::a_star_aligner::{ - alignment_result::{IAlignmentType, alignment::stream::AlignmentStream}, - template_switch_distance::{ - AlignmentType, TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, +use crate::{ + a_star_aligner::{ + alignment_result::{IAlignmentType, alignment::stream::AlignmentStream}, + template_switch_distance::{ + AlignmentType, TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, + }, }, + config::TemplateSwitchConfig, }; use super::Alignment; @@ -289,7 +293,6 @@ impl Alignment { _ => unreachable!(), }, ); - println!("Inner secondary length: {inner_secondary_length}"); if let Some((_, AlignmentType::PrimaryMatch | AlignmentType::PrimarySubstitution)) = self.alignment.get(exit_index + 1) @@ -343,14 +346,12 @@ impl Alignment { } // Check if the new inner pair is a match or a substitution. - println!("Primary index: {ts_inner_primary_index}"); let primary_char = primary.get(reference, query)[ts_inner_primary_index].clone(); let secondary_char = match direction { TemplateSwitchDirection::Forward => { secondary.get(reference, query)[ts_inner_secondary_index].clone() } TemplateSwitchDirection::Reverse => { - println!("Secondary index: {}", ts_inner_secondary_index - 1); secondary.get(reference, query)[ts_inner_secondary_index - 1].complement() } }; @@ -484,25 +485,328 @@ impl Alignment { false } } + + /// Compute the cost of an alignment. + /// + /// Flanks are not supported. + pub fn compute_cost< + AlphabetType: Alphabet, + SubsequenceType: GenomeSequence + ?Sized, + Cost: AStarCost, + >( + &mut self, + reference: &SubsequenceType, + query: &SubsequenceType, + reference_offset: usize, + query_offset: usize, + config: &TemplateSwitchConfig, + ) -> Cost { + let mut cost = Cost::zero(); + + let mut last_alignment_type = None; + let mut reference_index = reference_offset; + let mut query_index = query_offset; + let mut primary_index = 0; + let mut secondary_index = 0; + let mut primary = TemplateSwitchPrimary::Reference; + let mut secondary = TemplateSwitchSecondary::Reference; + let mut direction = TemplateSwitchDirection::Forward; + for alignment_type in self.iter_flat_cloned() { + let cost_increment = match alignment_type { + AlignmentType::PrimaryInsertion => { + let cost_increment = if Some(alignment_type) == last_alignment_type { + config + .primary_edit_costs + .gap_extend_cost(query[query_index].clone()) + } else { + config + .primary_edit_costs + .gap_open_cost(query[query_index].clone()) + }; + query_index += 1; + cost_increment + } + AlignmentType::PrimaryDeletion => { + let cost_increment = if Some(alignment_type) == last_alignment_type { + config + .primary_edit_costs + .gap_extend_cost(reference[reference_index].clone()) + } else { + config + .primary_edit_costs + .gap_open_cost(reference[reference_index].clone()) + }; + reference_index += 1; + cost_increment + } + AlignmentType::PrimarySubstitution | AlignmentType::PrimaryMatch => { + let cost_increment = config.primary_edit_costs.match_or_substitution_cost( + reference[reference_index].clone(), + query[query_index].clone(), + ); + reference_index += 1; + query_index += 1; + cost_increment + } + AlignmentType::PrimaryFlankInsertion + | AlignmentType::PrimaryFlankDeletion + | AlignmentType::PrimaryFlankSubstitution + | AlignmentType::PrimaryFlankMatch => { + todo!("Flanks are not yet supported") + } + AlignmentType::SecondaryInsertion => { + let primary_character = match primary { + TemplateSwitchPrimary::Reference => reference[primary_index].clone(), + TemplateSwitchPrimary::Query => query[primary_index].clone(), + }; + let cost_increment = if Some(alignment_type) == last_alignment_type { + config + .secondary_edit_costs(direction) + .gap_extend_cost(primary_character) + } else { + config + .secondary_edit_costs(direction) + .gap_open_cost(primary_character) + }; + primary_index += 1; + cost_increment + } + AlignmentType::SecondaryDeletion => { + let secondary_character = match (secondary, direction) { + (TemplateSwitchSecondary::Reference, TemplateSwitchDirection::Forward) => { + reference[secondary_index].clone() + } + (TemplateSwitchSecondary::Reference, TemplateSwitchDirection::Reverse) => { + reference[secondary_index - 1].complement() + } + (TemplateSwitchSecondary::Query, TemplateSwitchDirection::Forward) => { + query[secondary_index].clone() + } + (TemplateSwitchSecondary::Query, TemplateSwitchDirection::Reverse) => { + query[secondary_index - 1].complement() + } + }; + let cost_increment = if Some(alignment_type) == last_alignment_type { + config + .secondary_edit_costs(direction) + .gap_extend_cost(secondary_character) + } else { + config + .secondary_edit_costs(direction) + .gap_open_cost(secondary_character) + }; + match direction { + TemplateSwitchDirection::Forward => secondary_index += 1, + TemplateSwitchDirection::Reverse => secondary_index -= 1, + } + cost_increment + } + AlignmentType::SecondarySubstitution | AlignmentType::SecondaryMatch => { + let primary_character = match primary { + TemplateSwitchPrimary::Reference => reference[primary_index].clone(), + TemplateSwitchPrimary::Query => query[primary_index].clone(), + }; + let secondary_character = match (secondary, direction) { + (TemplateSwitchSecondary::Reference, TemplateSwitchDirection::Forward) => { + reference[secondary_index].clone() + } + (TemplateSwitchSecondary::Reference, TemplateSwitchDirection::Reverse) => { + reference[secondary_index - 1].complement() + } + (TemplateSwitchSecondary::Query, TemplateSwitchDirection::Forward) => { + query[secondary_index].clone() + } + (TemplateSwitchSecondary::Query, TemplateSwitchDirection::Reverse) => { + query[secondary_index - 1].complement() + } + }; + println!( + "Primary character: {primary_character}; secondary character: {secondary_character}" + ); + let cost_increment = config + .secondary_edit_costs(direction) + .match_or_substitution_cost(primary_character, secondary_character); + primary_index += 1; + match direction { + TemplateSwitchDirection::Forward => secondary_index += 1, + TemplateSwitchDirection::Reverse => secondary_index -= 1, + } + cost_increment + } + AlignmentType::TemplateSwitchEntrance { + first_offset, + primary: ts_primary, + secondary: ts_secondary, + direction: ts_direction, + .. + } => { + assert!(!matches!( + last_alignment_type, + Some(AlignmentType::TemplateSwitchEntrance { .. }) + )); + primary = ts_primary; + secondary = ts_secondary; + direction = ts_direction; + let cost_increment = config.base_cost.get(primary, secondary, direction); + let Some(cost_increment) = + cost_increment.checked_add(&config.offset_costs.evaluate(&first_offset)) + else { + return Cost::max_value(); + }; + primary_index = match primary { + TemplateSwitchPrimary::Reference => reference_index, + TemplateSwitchPrimary::Query => query_index, + }; + secondary_index = usize::try_from( + isize::try_from(match secondary { + TemplateSwitchSecondary::Reference => reference_index, + TemplateSwitchSecondary::Query => query_index, + }) + .unwrap() + .checked_add(first_offset) + .unwrap(), + ) + .unwrap(); + cost_increment + } + AlignmentType::TemplateSwitchExit { anti_primary_gap } => { + assert!(!matches!( + last_alignment_type, + Some(AlignmentType::TemplateSwitchExit { .. }) + )); + let length = match primary { + TemplateSwitchPrimary::Reference => { + let length = primary_index - reference_index; + reference_index = primary_index; + query_index = usize::try_from( + isize::try_from(query_index) + .unwrap() + .checked_add(anti_primary_gap) + .unwrap(), + ) + .unwrap(); + length + } + TemplateSwitchPrimary::Query => { + let length = primary_index - query_index; + query_index = primary_index; + reference_index = usize::try_from( + isize::try_from(reference_index) + .unwrap() + .checked_add(anti_primary_gap) + .unwrap(), + ) + .unwrap(); + length + } + }; + let length_difference = anti_primary_gap - isize::try_from(length).unwrap(); + let cost_increment = config + .anti_primary_gap_costs(direction) + .evaluate(&anti_primary_gap); + let Some(cost_increment) = + cost_increment.checked_add(&config.length_costs.evaluate(&length)) + else { + return Cost::max_value(); + }; + let Some(cost_increment) = cost_increment + .checked_add(&config.length_difference_costs.evaluate(&length_difference)) + else { + return Cost::max_value(); + }; + cost_increment + } + AlignmentType::Root + | AlignmentType::SecondaryRoot + | AlignmentType::PrimaryReentry => { + // Do nothing + Cost::zero() + } + AlignmentType::PrimaryShortcut { .. } => panic!("Not supported"), + }; + + println!("Got cost increment of {cost_increment} for {alignment_type}"); + cost = if let Some(cost) = cost.checked_add(&cost_increment) { + cost + } else { + return Cost::max_value(); + }; + last_alignment_type = Some(alignment_type); + } + + cost + } } #[cfg(test)] mod tests { + use std::sync::LazyLock; + use compact_genome::{ implementation::{alphabets::dna_alphabet::DnaAlphabet, vec_sequence::VectorGenome}, - interface::sequence::{GenomeSequence, OwnedGenomeSequence}, + interface::{ + alphabet::Alphabet, + sequence::{GenomeSequence, OwnedGenomeSequence}, + }, }; + use generic_a_star::cost::U64Cost; - use crate::a_star_aligner::{ - alignment_result::alignment::Alignment, - template_switch_distance::{ - AlignmentType, EqualCostRange, TemplateSwitchDirection, TemplateSwitchPrimary, - TemplateSwitchSecondary, + use crate::{ + a_star_aligner::{ + alignment_result::alignment::Alignment, + template_switch_distance::{ + AlignmentType, EqualCostRange, TemplateSwitchDirection, TemplateSwitchPrimary, + TemplateSwitchSecondary, + }, }, + config::{BaseCost, TemplateSwitchConfig}, + costs::{cost_function::CostFunction, gap_affine::GapAffineAlignmentCostTable}, }; static START_REFERENCE: &[u8] = b"AGAGAGCTCTAA"; static START_QUERY: &[u8] = b"AGAGAGCTTTAA"; + static START_COSTS: LazyLock> = LazyLock::new(|| { + [ + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&-6) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&2) + + CONFIG.length_costs.evaluate(&2) + + CONFIG.length_difference_costs.evaluate(&0), + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&-4) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&3) + + CONFIG.length_costs.evaluate(&3) + + CONFIG.length_difference_costs.evaluate(&0), + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&-2) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&4) + + CONFIG.length_costs.evaluate(&4) + + CONFIG.length_difference_costs.evaluate(&0), + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&0) + + CONFIG.secondary_reverse_edit_costs.substitution_cost( + DnaAlphabet::ascii_to_character(b'G').unwrap(), + DnaAlphabet::ascii_to_character(b'T').unwrap(), + ) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&5) + + CONFIG.length_costs.evaluate(&5) + + CONFIG.length_difference_costs.evaluate(&0), + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&2) + + CONFIG.secondary_reverse_edit_costs.substitution_cost( + DnaAlphabet::ascii_to_character(b'G').unwrap(), + DnaAlphabet::ascii_to_character(b'T').unwrap(), + ) + + CONFIG.secondary_reverse_edit_costs.substitution_cost( + DnaAlphabet::ascii_to_character(b'A').unwrap(), + DnaAlphabet::ascii_to_character(b'C').unwrap(), + ) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&6) + + CONFIG.length_costs.evaluate(&6) + + CONFIG.length_difference_costs.evaluate(&0), + ] + .to_vec() + }); static START_ALIGNMENTS: &[&[(usize, AlignmentType)]] = &[ &[ (6, AlignmentType::PrimaryMatch), @@ -615,6 +919,48 @@ mod tests { static END_REFERENCE: &[u8] = b"AACTCTAGAGAG"; static END_QUERY: &[u8] = b"AATTCTAGAGAG"; + static END_COSTS: LazyLock> = LazyLock::new(|| { + [ + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&10) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&2) + + CONFIG.length_costs.evaluate(&2) + + CONFIG.length_difference_costs.evaluate(&0), + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&10) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&3) + + CONFIG.length_costs.evaluate(&3) + + CONFIG.length_difference_costs.evaluate(&0), + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&10) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&4) + + CONFIG.length_costs.evaluate(&4) + + CONFIG.length_difference_costs.evaluate(&0), + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&10) + + CONFIG.secondary_reverse_edit_costs.substitution_cost( + DnaAlphabet::ascii_to_character(b'A').unwrap(), + DnaAlphabet::ascii_to_character(b'C').unwrap(), + ) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&5) + + CONFIG.length_costs.evaluate(&5) + + CONFIG.length_difference_costs.evaluate(&0), + CONFIG.base_cost.rqr + + CONFIG.offset_costs.evaluate(&10) + + CONFIG.secondary_reverse_edit_costs.substitution_cost( + DnaAlphabet::ascii_to_character(b'A').unwrap(), + DnaAlphabet::ascii_to_character(b'C').unwrap(), + ) + + CONFIG.secondary_reverse_edit_costs.substitution_cost( + DnaAlphabet::ascii_to_character(b'G').unwrap(), + DnaAlphabet::ascii_to_character(b'T').unwrap(), + ) + + CONFIG.reverse_anti_primary_gap_costs.evaluate(&6) + + CONFIG.length_costs.evaluate(&6) + + CONFIG.length_difference_costs.evaluate(&0), + ] + .to_vec() + }); static END_ALIGNMENTS: &[&[(usize, AlignmentType)]] = &[ &[ (1, AlignmentType::PrimaryMatch), @@ -725,13 +1071,98 @@ mod tests { ], ]; + static CONFIG: LazyLock> = + LazyLock::new(|| TemplateSwitchConfig { + left_flank_length: 0, + right_flank_length: 0, + min_length: 3, + base_cost: BaseCost { + rrf: 10u64.into(), + rqf: 100u64.into(), + qrf: 1000u64.into(), + qqf: 10000u64.into(), + rrr: 100000u64.into(), + rqr: 1000000u64.into(), + qrr: 10000000u64.into(), + qqr: 100000000u64.into(), + }, + primary_edit_costs: GapAffineAlignmentCostTable::new( + "", + [0u64, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0] + .map(Into::into) + .to_vec(), + [3u64, 3, 3, 3].map(Into::into).to_vec(), + [1u64, 1, 1, 1].map(Into::into).to_vec(), + ), + secondary_forward_edit_costs: GapAffineAlignmentCostTable::new( + "", + [0u64, 3, 3, 3, 3, 0, 3, 3, 3, 3, 0, 3, 3, 3, 3, 0] + .map(Into::into) + .to_vec(), + [3u64, 6, 6, 6].map(Into::into).to_vec(), + [1u64, 1, 1, 1].map(Into::into).to_vec(), + ), + secondary_reverse_edit_costs: GapAffineAlignmentCostTable::new( + "", + [0u64, 5, 5, 5, 5, 0, 5, 5, 5, 5, 0, 5, 5, 5, 5, 0] + .map(Into::into) + .to_vec(), + [3u64, 7, 7, 7].map(Into::into).to_vec(), + [1u64, 1, 1, 1].map(Into::into).to_vec(), + ), + left_flank_edit_costs: GapAffineAlignmentCostTable::new_zero(), + right_flank_edit_costs: GapAffineAlignmentCostTable::new_zero(), + offset_costs: CostFunction::try_from( + (-20..=20) + .map(|i| (i, U64Cost::from(17 * u64::try_from(i + 21).unwrap()))) + .collect::>(), + ) + .unwrap(), + length_costs: CostFunction::try_from( + (0..=20) + .map(|i| (i, U64Cost::from(19 * u64::try_from(i + 21).unwrap()))) + .collect::>(), + ) + .unwrap(), + length_difference_costs: CostFunction::try_from( + (-20..=20) + .map(|i| (i, U64Cost::from(23 * u64::try_from(i + 21).unwrap()))) + .collect::>(), + ) + .unwrap(), + forward_anti_primary_gap_costs: CostFunction::try_from( + (-20..=20) + .map(|i| (i, U64Cost::from(29 * u64::try_from(i + 21).unwrap()))) + .collect::>(), + ) + .unwrap(), + reverse_anti_primary_gap_costs: CostFunction::try_from( + (-20..=20) + .map(|i| (i, U64Cost::from(31 * u64::try_from(i + 21).unwrap()))) + .collect::>(), + ) + .unwrap(), + }); + #[test] fn move_template_switch_start_backwards() { let reference = VectorGenome::::from_slice_u8(START_REFERENCE).unwrap(); let query = VectorGenome::from_slice_u8(START_QUERY).unwrap(); let mut alignment = Alignment::from(START_ALIGNMENTS[0].to_vec()); + assert_eq!( + alignment.compute_cost( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 2, + 2, + &CONFIG + ), + START_COSTS[0] + ); - for expected_alignment in &START_ALIGNMENTS[1..] { + for (expected_alignment, expected_cost) in + START_ALIGNMENTS[1..].iter().zip(&START_COSTS[1..]) + { assert!(alignment.move_template_switch_start_backwards( reference.as_genome_subsequence(), query.as_genome_subsequence(), @@ -740,6 +1171,16 @@ mod tests { 1 )); assert_eq!(alignment, Alignment::from(expected_alignment.to_vec())); + assert_eq!( + alignment.compute_cost( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 2, + 2, + &CONFIG + ), + *expected_cost + ); } } @@ -748,8 +1189,23 @@ mod tests { let reference = VectorGenome::::from_slice_u8(START_REFERENCE).unwrap(); let query = VectorGenome::from_slice_u8(START_QUERY).unwrap(); let mut alignment = Alignment::from(START_ALIGNMENTS.last().unwrap().to_vec()); + assert_eq!( + alignment.compute_cost( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 2, + 2, + &CONFIG + ), + *START_COSTS.last().unwrap() + ); - for expected_alignment in START_ALIGNMENTS.iter().rev().skip(1) { + for (expected_alignment, expected_cost) in START_ALIGNMENTS + .iter() + .zip(START_COSTS.iter()) + .rev() + .skip(1) + { assert!(alignment.move_template_switch_start_forwards( reference.as_genome_subsequence(), query.as_genome_subsequence(), @@ -758,6 +1214,16 @@ mod tests { 1 )); assert_eq!(alignment, Alignment::from(expected_alignment.to_vec())); + assert_eq!( + alignment.compute_cost( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 2, + 2, + &CONFIG + ), + *expected_cost + ); } } @@ -766,8 +1232,20 @@ mod tests { let reference = VectorGenome::::from_slice_u8(END_REFERENCE).unwrap(); let query = VectorGenome::from_slice_u8(END_QUERY).unwrap(); let mut alignment = Alignment::from(END_ALIGNMENTS.last().unwrap().to_vec()); + assert_eq!( + alignment.compute_cost( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 1, + 1, + &CONFIG + ), + *END_COSTS.last().unwrap() + ); - for expected_alignment in END_ALIGNMENTS.iter().rev().skip(1) { + for (expected_alignment, expected_cost) in + END_ALIGNMENTS.iter().zip(END_COSTS.iter()).rev().skip(1) + { assert!(alignment.move_template_switch_end_backwards( reference.as_genome_subsequence(), query.as_genome_subsequence(), @@ -776,6 +1254,16 @@ mod tests { 1 )); assert_eq!(alignment, Alignment::from(expected_alignment.to_vec())); + assert_eq!( + alignment.compute_cost( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 1, + 1, + &CONFIG + ), + *expected_cost + ); } } @@ -784,8 +1272,18 @@ mod tests { let reference = VectorGenome::::from_slice_u8(END_REFERENCE).unwrap(); let query = VectorGenome::from_slice_u8(END_QUERY).unwrap(); let mut alignment = Alignment::from(END_ALIGNMENTS[0].to_vec()); + assert_eq!( + alignment.compute_cost( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 1, + 1, + &CONFIG + ), + *END_COSTS.first().unwrap() + ); - for expected_alignment in &END_ALIGNMENTS[1..] { + for (expected_alignment, expected_cost) in END_ALIGNMENTS[1..].iter().zip(&END_COSTS[1..]) { assert!(alignment.move_template_switch_end_forwards( reference.as_genome_subsequence(), query.as_genome_subsequence(), @@ -794,6 +1292,16 @@ mod tests { 1 )); assert_eq!(alignment, Alignment::from(expected_alignment.to_vec())); + assert_eq!( + alignment.compute_cost( + reference.as_genome_subsequence(), + query.as_genome_subsequence(), + 1, + 1, + &CONFIG + ), + *expected_cost + ); } } } diff --git a/lib_tsalign/src/a_star_aligner/template_switch_distance.rs b/lib_tsalign/src/a_star_aligner/template_switch_distance.rs index 400a1981..9d019289 100644 --- a/lib_tsalign/src/a_star_aligner/template_switch_distance.rs +++ b/lib_tsalign/src/a_star_aligner/template_switch_distance.rs @@ -241,52 +241,11 @@ impl Node { unreachable!("This closure is only called on template switch entrances.") }; - let base_cost = match ( - template_switch_primary, - template_switch_secondary, - template_switch_direction, - ) { - ( - TemplateSwitchPrimary::Reference, - TemplateSwitchSecondary::Reference, - TemplateSwitchDirection::Forward, - ) => base_cost.rrf, - ( - TemplateSwitchPrimary::Reference, - TemplateSwitchSecondary::Query, - TemplateSwitchDirection::Forward, - ) => base_cost.rqf, - ( - TemplateSwitchPrimary::Query, - TemplateSwitchSecondary::Reference, - TemplateSwitchDirection::Forward, - ) => base_cost.qrf, - ( - TemplateSwitchPrimary::Query, - TemplateSwitchSecondary::Query, - TemplateSwitchDirection::Forward, - ) => base_cost.qqf, - ( - TemplateSwitchPrimary::Reference, - TemplateSwitchSecondary::Reference, - TemplateSwitchDirection::Reverse, - ) => base_cost.rrr, - ( - TemplateSwitchPrimary::Reference, - TemplateSwitchSecondary::Query, - TemplateSwitchDirection::Reverse, - ) => base_cost.rqr, - ( - TemplateSwitchPrimary::Query, - TemplateSwitchSecondary::Reference, - TemplateSwitchDirection::Reverse, - ) => base_cost.qrr, - ( - TemplateSwitchPrimary::Query, - TemplateSwitchSecondary::Query, - TemplateSwitchDirection::Reverse, - ) => base_cost.qqr, - }; + let base_cost = base_cost.get( + *template_switch_primary, + *template_switch_secondary, + *template_switch_direction, + ); (base_cost != Strategies::Cost::max_value()).then(|| { self.generate_successor( diff --git a/lib_tsalign/src/config.rs b/lib_tsalign/src/config.rs index 4257a189..865b4232 100644 --- a/lib_tsalign/src/config.rs +++ b/lib_tsalign/src/config.rs @@ -2,7 +2,9 @@ use compact_genome::interface::alphabet::Alphabet; use num_traits::{Bounded, bounds::UpperBounded}; use crate::{ - a_star_aligner::template_switch_distance::TemplateSwitchDirection, + a_star_aligner::template_switch_distance::{ + TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, + }, costs::{cost_function::CostFunction, gap_affine::GapAffineAlignmentCostTable}, error::{Error, Result}, }; @@ -104,6 +106,58 @@ impl BaseCost { } } +impl BaseCost { + pub fn get( + &self, + primary: TemplateSwitchPrimary, + secondary: TemplateSwitchSecondary, + direction: TemplateSwitchDirection, + ) -> Cost { + match (primary, secondary, direction) { + ( + TemplateSwitchPrimary::Reference, + TemplateSwitchSecondary::Reference, + TemplateSwitchDirection::Forward, + ) => self.rrf.clone(), + ( + TemplateSwitchPrimary::Reference, + TemplateSwitchSecondary::Reference, + TemplateSwitchDirection::Reverse, + ) => self.rrr.clone(), + ( + TemplateSwitchPrimary::Reference, + TemplateSwitchSecondary::Query, + TemplateSwitchDirection::Forward, + ) => self.rqf.clone(), + ( + TemplateSwitchPrimary::Reference, + TemplateSwitchSecondary::Query, + TemplateSwitchDirection::Reverse, + ) => self.rqr.clone(), + ( + TemplateSwitchPrimary::Query, + TemplateSwitchSecondary::Reference, + TemplateSwitchDirection::Forward, + ) => self.qrf.clone(), + ( + TemplateSwitchPrimary::Query, + TemplateSwitchSecondary::Reference, + TemplateSwitchDirection::Reverse, + ) => self.qrr.clone(), + ( + TemplateSwitchPrimary::Query, + TemplateSwitchSecondary::Query, + TemplateSwitchDirection::Forward, + ) => self.qqf.clone(), + ( + TemplateSwitchPrimary::Query, + TemplateSwitchSecondary::Query, + TemplateSwitchDirection::Reverse, + ) => self.qqr.clone(), + } + } +} + impl Clone for TemplateSwitchConfig { fn clone(&self) -> Self { Self { From c9b70f686f77eb68f402242aeef02b273b8d1ac8 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 14 May 2025 14:56:00 +0300 Subject: [PATCH 08/10] Use alignment cost computation in equal ts cost range computation. --- lib_tsalign/src/a_star_aligner.rs | 5 +- .../src/a_star_aligner/alignment_result.rs | 180 +++--------------- 2 files changed, 30 insertions(+), 155 deletions(-) diff --git a/lib_tsalign/src/a_star_aligner.rs b/lib_tsalign/src/a_star_aligner.rs index 12cbab4d..9e547d19 100644 --- a/lib_tsalign/src/a_star_aligner.rs +++ b/lib_tsalign/src/a_star_aligner.rs @@ -159,7 +159,10 @@ pub fn template_switch_distance_a_star_align< cost_limit: Option, memory_limit: Option, template_switch_count_memory: ::Memory, -) -> AlignmentResult { +) -> AlignmentResult +where + Strategies::Cost: From, +{ let memory = Memory { template_switch_min_length: Default::default(), chaining: <::Chaining as ChainingStrategy< diff --git a/lib_tsalign/src/a_star_aligner/alignment_result.rs b/lib_tsalign/src/a_star_aligner/alignment_result.rs index ed09595f..9d27ecf8 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result.rs @@ -2,22 +2,14 @@ use std::fmt::{Display, Formatter, Result, Write}; use a_star_sequences::SequencePair; use alignment::{Alignment, stream::AlignmentStream}; -use compact_genome::interface::{ - alphabet::{Alphabet, AlphabetCharacter}, - sequence::GenomeSequence, -}; +use compact_genome::interface::{alphabet::Alphabet, sequence::GenomeSequence}; use generic_a_star::{AStarResult, cost::AStarCost}; use noisy_float::types::{R64, r64}; use num_traits::{Float, Zero}; use crate::config::TemplateSwitchConfig; -use super::{ - alignment_geometry::AlignmentRange, - template_switch_distance::{ - TemplateSwitchDirection, TemplateSwitchPrimary, TemplateSwitchSecondary, - }, -}; +use super::alignment_geometry::AlignmentRange; pub mod a_star_sequences; pub mod alignment; @@ -239,7 +231,9 @@ impl AlignmentResult AlignmentResult { +impl> + AlignmentResult +{ pub fn compute_ts_equal_cost_ranges< AlphabetType: Alphabet, SubsequenceType: GenomeSequence + ?Sized, @@ -250,10 +244,22 @@ impl AlignmentResult, config: &TemplateSwitchConfig, ) { - let Self::WithTarget { alignment, .. } = self else { + let Self::WithTarget { + alignment, + statistics, + } = self + else { return; }; let mut stream = AlignmentStream::new(); + let reference_offset = range + .as_ref() + .map(|range| range.reference_offset()) + .unwrap_or(0); + let query_offset = range + .as_ref() + .map(|range| range.query_offset()) + .unwrap_or(0); for i in 0..alignment.inner_mut().len() { let multiplicity = alignment.inner_mut()[i].0; @@ -261,10 +267,6 @@ impl AlignmentResult { @@ -277,144 +279,14 @@ impl AlignmentResult preceding_reference_index, - TemplateSwitchPrimary::Query => preceding_query_index, - }; - let mut inner_secondary_index = (match secondary { - TemplateSwitchSecondary::Reference => preceding_reference_index, - TemplateSwitchSecondary::Query => preceding_query_index, - } as isize - + first_offset) - as usize; - - while let Some( - super::template_switch_distance::AlignmentType::PrimaryFlankMatch - | super::template_switch_distance::AlignmentType::PrimaryMatch, - ) = preceding_alignment.next() - { - println!("Entering loop"); - inner_primary_index -= 1; - match direction { - TemplateSwitchDirection::Forward => inner_secondary_index -= 1, - TemplateSwitchDirection::Reverse => inner_secondary_index += 1, - } - preceding_reference_index -= 1; - preceding_query_index -= 1; - - let primary_character = - primary.get(reference, query)[inner_primary_index].clone(); - let secondary_character = secondary.get(reference, query) - [match direction { - TemplateSwitchDirection::Forward => inner_secondary_index, - TemplateSwitchDirection::Reverse => inner_secondary_index - 1, - }] - .complement(); - let reference_character = reference[preceding_reference_index].clone(); - let query_character = query[preceding_query_index].clone(); - - let preceding_cost = if remaining_left_flank > 0 { - remaining_left_flank -= 1; - &config.left_flank_edit_costs - } else { - &config.primary_edit_costs - } - .match_or_substitution_cost(reference_character, query_character); - let inner_cost = config - .secondary_edit_costs(direction) - .match_or_substitution_cost(primary_character, secondary_character); - - if preceding_cost.is_zero() && inner_cost.is_zero() { - equal_cost_range.min_start -= 1; - } else { - println!( - "Breaking loop due to cost: preceding: {preceding_cost}; inner: {inner_cost}" - ); - break; - } - } - } - - // Extend start forwards. - // TODO the following is mostly a copy of above. - /*{ - let mut remaining_right_flank = config.right_flank_length; - let mut preceding_reference_index = stream.head_coordinates().reference() - + range - .clone() - .map(|range| range.reference_offset()) - .unwrap_or(0); - let mut preceding_query_index = stream.head_coordinates().query() - + range.clone().map(|range| range.query_offset()).unwrap_or(0); - let mut preceding_alignment = alignment.iter_flat_cloned().rev(); - let mut inner_primary_index = match primary { - TemplateSwitchPrimary::Reference => preceding_reference_index, - TemplateSwitchPrimary::Query => preceding_query_index, - }; - let mut inner_secondary_index = (match secondary { - TemplateSwitchSecondary::Reference => preceding_reference_index, - TemplateSwitchSecondary::Query => preceding_query_index, - } as isize - + first_offset) - as usize; - - while let Some( - super::template_switch_distance::AlignmentType::PrimaryFlankMatch - | super::template_switch_distance::AlignmentType::PrimaryMatch, - ) = preceding_alignment.next() - { - println!("Entering loop"); - inner_primary_index -= 1; - match direction { - TemplateSwitchDirection::Forward => inner_secondary_index -= 1, - TemplateSwitchDirection::Reverse => inner_secondary_index += 1, - } - preceding_reference_index -= 1; - preceding_query_index -= 1; - - let primary_character = - primary.get(reference, query)[inner_primary_index].clone(); - let secondary_character = secondary.get(reference, query) - [match direction { - TemplateSwitchDirection::Forward => inner_secondary_index, - TemplateSwitchDirection::Reverse => inner_secondary_index - 1, - }] - .complement(); - let reference_character = reference[preceding_reference_index].clone(); - let query_character = query[preceding_query_index].clone(); - - let preceding_cost = if remaining_left_flank > 0 { - remaining_left_flank -= 1; - &config.left_flank_edit_costs - } else { - &config.primary_edit_costs - } - .match_or_substitution_cost(reference_character, query_character); - let inner_cost = config - .secondary_edit_costs(direction) - .match_or_substitution_cost(primary_character, secondary_character); - - if preceding_cost.is_zero() && inner_cost.is_zero() { - equal_cost_range.min_start -= 1; - } else { - println!( - "Breaking loop due to cost: preceding: {preceding_cost}; inner: {inner_cost}" - ); - break; - } - } - }*/ + let initial_cost = alignment.compute_cost( + reference, + query, + reference_offset, + query_offset, + config, + ); + assert_eq!(initial_cost, (statistics.cost.round().raw() as u64).into()); let super::template_switch_distance::AlignmentType::TemplateSwitchEntrance { equal_cost_range: alignment_equal_cost_range, From 87d0eb6957ab985156630a9ec2c9c0a601f246f1 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Thu, 15 May 2025 10:42:19 +0300 Subject: [PATCH 09/10] Implement TS equal cost range finding. --- .../src/a_star_aligner/alignment_result.rs | 138 +++++++++++++++--- .../alignment/template_switch_specifics.rs | 8 +- test_files/config/small/config.tsa | 41 +++++- test_files/twin_small_range.fa | 4 + 4 files changed, 160 insertions(+), 31 deletions(-) create mode 100644 test_files/twin_small_range.fa diff --git a/lib_tsalign/src/a_star_aligner/alignment_result.rs b/lib_tsalign/src/a_star_aligner/alignment_result.rs index 9d27ecf8..7774a2fe 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result.rs @@ -270,32 +270,130 @@ impl> mut equal_cost_range, .. } => { - if config.left_flank_length > 0 || config.right_flank_length > 0 { - continue; - } - - equal_cost_range.min_start = 0; - equal_cost_range.max_start = 0; - equal_cost_range.min_end = 0; - equal_cost_range.max_end = 0; - - let initial_cost = alignment.compute_cost( - reference, - query, - reference_offset, - query_offset, - config, - ); - assert_eq!(initial_cost, (statistics.cost.round().raw() as u64).into()); - - let super::template_switch_distance::AlignmentType::TemplateSwitchEntrance { + if config.left_flank_length == 0 && config.right_flank_length == 0 { + equal_cost_range.min_start = 0; + equal_cost_range.max_start = 0; + equal_cost_range.min_end = 0; + equal_cost_range.max_end = 0; + + let initial_cost = alignment.compute_cost( + reference, + query, + reference_offset, + query_offset, + config, + ); + assert_eq!(initial_cost, (statistics.cost.round().raw() as u64).into()); + + { + let mut min_start_alignment = alignment.clone(); + while min_start_alignment.move_template_switch_end_forwards( + reference, + query, + reference_offset, + query_offset, + i, + ) { + let new_cost = min_start_alignment.compute_cost( + reference, + query, + reference_offset, + query_offset, + config, + ); + if new_cost > initial_cost { + break; + } else { + assert_eq!(new_cost, initial_cost); + equal_cost_range.min_start -= 1; + } + } + } + + { + let mut max_start_alignment = alignment.clone(); + while max_start_alignment.move_template_switch_start_forwards( + reference, + query, + reference_offset, + query_offset, + i, + ) { + let new_cost = max_start_alignment.compute_cost( + reference, + query, + reference_offset, + query_offset, + config, + ); + if new_cost > initial_cost { + break; + } else { + assert_eq!(new_cost, initial_cost); + equal_cost_range.max_start += 1; + } + } + } + + { + let mut min_end_alignment = alignment.clone(); + while min_end_alignment.move_template_switch_end_backwards( + reference, + query, + reference_offset, + query_offset, + i, + ) { + let new_cost = min_end_alignment.compute_cost( + reference, + query, + reference_offset, + query_offset, + config, + ); + if new_cost > initial_cost { + break; + } else { + assert_eq!(new_cost, initial_cost); + equal_cost_range.min_end -= 1; + } + } + } + + { + let mut max_end_alignment = alignment.clone(); + while max_end_alignment.move_template_switch_end_forwards( + reference, + query, + reference_offset, + query_offset, + i, + ) { + let new_cost = max_end_alignment.compute_cost( + reference, + query, + reference_offset, + query_offset, + config, + ); + if new_cost > initial_cost { + break; + } else { + assert_eq!(new_cost, initial_cost); + equal_cost_range.max_end += 1; + } + } + } + + let super::template_switch_distance::AlignmentType::TemplateSwitchEntrance { equal_cost_range: alignment_equal_cost_range, .. } = &mut alignment.inner_mut()[i].1 else { unreachable!() }; - *alignment_equal_cost_range = equal_cost_range; + *alignment_equal_cost_range = equal_cost_range; + } } _ => { /* Do nothing. */ } } diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs index 87de694b..173679b9 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs @@ -208,7 +208,7 @@ impl Alignment { self.alignment[compact_index - 1].0 += 1; } else { self.alignment - .insert(compact_index - 1, (1, outer_alignment_type)); + .insert(compact_index, (1, outer_alignment_type)); compact_index += 1; } @@ -620,9 +620,6 @@ impl Alignment { query[secondary_index - 1].complement() } }; - println!( - "Primary character: {primary_character}; secondary character: {secondary_character}" - ); let cost_increment = config .secondary_edit_costs(direction) .match_or_substitution_cost(primary_character, secondary_character); @@ -725,7 +722,6 @@ impl Alignment { AlignmentType::PrimaryShortcut { .. } => panic!("Not supported"), }; - println!("Got cost increment of {cost_increment} for {alignment_type}"); cost = if let Some(cost) = cost.checked_add(&cost_increment) { cost } else { @@ -1163,7 +1159,7 @@ mod tests { for (expected_alignment, expected_cost) in START_ALIGNMENTS[1..].iter().zip(&START_COSTS[1..]) { - assert!(alignment.move_template_switch_start_backwards( + assert!(alignment.move_template_switch_end_forwards( reference.as_genome_subsequence(), query.as_genome_subsequence(), 2, diff --git a/test_files/config/small/config.tsa b/test_files/config/small/config.tsa index f772d360..a0e50127 100644 --- a/test_files/config/small/config.tsa +++ b/test_files/config/small/config.tsa @@ -5,10 +5,14 @@ right_flank_length = 0 # Base Cost -rr_cost = 4 -rq_cost = 4 -qr_cost = 4 -qq_cost = 4 +rrf_cost = 300 +rqf_cost = 200 +qrf_cost = 200 +qqf_cost = 300 +rrr_cost = 4 +rqr_cost = 4 +qrr_cost = 4 +qqr_cost = 4 # Jump Costs @@ -24,6 +28,14 @@ LengthDifference -inf -100 11 inf 0 inf +ForwardAntiPrimaryGap + -inf 1 + 0 inf + +ReverseAntiPrimaryGap + -inf + 0 + # Primary Edit Costs SubstitutionCostTable @@ -43,7 +55,26 @@ GapExtendCostVector A C G T N 1 1 1 1 1 -# Secondary Edit Costs +# Secondary Forward Edit Costs + +SubstitutionCostTable + | A C G T N +--+--------------- +A | 0 8 8 8 0 +C | 8 0 8 8 0 +G | 8 8 0 8 0 +T | 8 8 8 0 0 +N | 0 0 0 0 0 + +GapOpenCostVector + A C G T N + 9 9 9 9 9 + +GapExtendCostVector + A C G T N + 2 2 2 2 2 + +# Secondary Reverse Edit Costs SubstitutionCostTable | A C G T N diff --git a/test_files/twin_small_range.fa b/test_files/twin_small_range.fa new file mode 100644 index 00000000..3a1d3352 --- /dev/null +++ b/test_files/twin_small_range.fa @@ -0,0 +1,4 @@ +>REF +ACAGCGGGCCCACTGT +>QRY +ACAGCGGAAACCACTGT \ No newline at end of file From bc59c6f6cab7101fb9a15ecbcd9e3dc31e8af934 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Thu, 15 May 2025 10:53:00 +0300 Subject: [PATCH 10/10] Fix failing test. --- .../alignment/template_switch_specifics.rs | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs index 173679b9..753f7dd2 100644 --- a/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs +++ b/lib_tsalign/src/a_star_aligner/alignment_result/alignment/template_switch_specifics.rs @@ -72,6 +72,18 @@ impl Alignment { .unwrap(), ) .unwrap(); + match direction { + TemplateSwitchDirection::Forward => { + if ts_inner_secondary_index == 0 { + return false; + } + } + TemplateSwitchDirection::Reverse => { + if ts_inner_secondary_index >= secondary.get(reference, query).len() { + return false; + } + } + } // Remove one match or substitution from before the TS. let multiplicity = &mut self.alignment[compact_index - 1].0; @@ -327,12 +339,24 @@ impl Alignment { ) .unwrap(); let ts_inner_secondary_index = match direction { - TemplateSwitchDirection::Forward => ts_inner_secondary_index - .checked_add(inner_secondary_length) - .unwrap(), - TemplateSwitchDirection::Reverse => ts_inner_secondary_index - .checked_sub(inner_secondary_length) - .unwrap(), + TemplateSwitchDirection::Forward => { + let ts_inner_secondary_index = ts_inner_secondary_index + .checked_add(inner_secondary_length) + .unwrap(); + if ts_inner_secondary_index >= secondary.get(reference, query).len() { + return false; + } + ts_inner_secondary_index + } + TemplateSwitchDirection::Reverse => { + let ts_inner_secondary_index = ts_inner_secondary_index + .checked_sub(inner_secondary_length) + .unwrap(); + if ts_inner_secondary_index == 0 { + return false; + } + ts_inner_secondary_index + } }; // Remove one match or substitution from after the TS. @@ -1159,7 +1183,8 @@ mod tests { for (expected_alignment, expected_cost) in START_ALIGNMENTS[1..].iter().zip(&START_COSTS[1..]) { - assert!(alignment.move_template_switch_end_forwards( + println!("{}", alignment.cigar()); + assert!(alignment.move_template_switch_start_backwards( reference.as_genome_subsequence(), query.as_genome_subsequence(), 2,