Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions lib_ts_chainalign/src/alignment/coordinates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ impl AlignmentCoordinates {
// Descendant is a, so it is limited by the descendant ordinate.
TsDescendant::Seq1 => *a < end.secondary_ordinate_descendant().unwrap(),
// Descendant is b, so a can go until the end of the sequence.
TsDescendant::Seq2 => *a < sequences.end().primary_ordinate_a().unwrap(),
TsDescendant::Seq2 => {
*a < sequences.primary_end().primary_ordinate_a().unwrap()
}
}
}
AlignmentCoordinates::Secondary { ancestor, .. } => 0 < *ancestor,
Expand Down Expand Up @@ -138,7 +140,9 @@ impl AlignmentCoordinates {
AlignmentCoordinates::Primary { b, .. } => {
match end.ts_kind().unwrap().descendant {
// Descendant is a, so b can go until the end of the sequence.
TsDescendant::Seq1 => *b < sequences.end().primary_ordinate_b().unwrap(),
TsDescendant::Seq1 => {
*b < sequences.primary_end().primary_ordinate_b().unwrap()
}
// Descendant is b, so it is limited by the descendant ordinate.
TsDescendant::Seq2 => *b < end.secondary_ordinate_descendant().unwrap(),
}
Expand Down
24 changes: 20 additions & 4 deletions lib_ts_chainalign/src/alignment/sequences.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::alignment::{
coordinates::AlignmentCoordinates,
ts_kind::{TsAncestor, TsDescendant},
ts_kind::{TsAncestor, TsDescendant, TsKind},
};

pub struct AlignmentSequences {
Expand Down Expand Up @@ -53,12 +53,28 @@ impl AlignmentSequences {
}
}

pub fn start(&self) -> AlignmentCoordinates {
pub fn primary_start(&self) -> AlignmentCoordinates {
AlignmentCoordinates::new_primary(0, 0)
}

pub fn end(&self) -> AlignmentCoordinates {
AlignmentCoordinates::new_primary(self.seq1.len(), self.seq2.len())
pub fn primary_end(&self) -> AlignmentCoordinates {
self.end(None)
}

pub fn secondary_end(&self, ts_kind: TsKind) -> AlignmentCoordinates {
self.end(Some(ts_kind))
}

pub fn end(&self, ts_kind: Option<TsKind>) -> AlignmentCoordinates {
match ts_kind {
None => AlignmentCoordinates::new_primary(self.seq1.len(), self.seq2.len()),
Some(ts_kind @ (TsKind::TS11 | TsKind::TS21)) => {
AlignmentCoordinates::new_secondary(0, self.seq1.len(), ts_kind)
}
Some(ts_kind @ (TsKind::TS12 | TsKind::TS22)) => {
AlignmentCoordinates::new_secondary(0, self.seq2.len(), ts_kind)
}
}
}

pub fn seq1(&self) -> &[u8] {
Expand Down
126 changes: 104 additions & 22 deletions lib_ts_chainalign/src/anchors/reverse_lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ use crate::{
anchors::{Anchors, index::AnchorIndex, primary::PrimaryAnchor, secondary::SecondaryAnchor},
};

struct PrimaryAnchorToIndexIter<CoordinateIter: Iterator, AnchorIter: Iterator> {
struct PrimaryAnchorToIndexIter<
Anchor,
CoordinateIter: Iterator<Item = Anchor>,
AnchorIter: Iterator,
> {
coordinate_iter: Peekable<CoordinateIter>,
anchor_iter: Peekable<AnchorIter>,
}
Expand All @@ -15,25 +19,38 @@ struct SecondaryAnchorToIndexIter<CoordinateIter: Iterator, AnchorIter: Iterator
anchor_iter: Peekable<AnchorIter>,
}

pub trait PartialIntoAnchorIndex {
type IntoPartSource;
type IntoTarget;

fn source_part(&self) -> &Self::IntoPartSource;

fn partial_into(self, target: AnchorIndex) -> Self::IntoTarget;
}

impl<
CoordinateIter: Iterator<Item = PrimaryAnchor>,
Anchor: PartialIntoAnchorIndex<IntoPartSource = PrimaryAnchor>,
CoordinateIter: Iterator<Item = Anchor>,
AnchorIter: Iterator<Item = (AnchorIndex, PrimaryAnchor)>,
> Iterator for PrimaryAnchorToIndexIter<CoordinateIter, AnchorIter>
> Iterator for PrimaryAnchorToIndexIter<Anchor, CoordinateIter, AnchorIter>
{
type Item = Option<AnchorIndex>;
type Item = Anchor::IntoTarget;

fn next(&mut self) -> Option<Self::Item> {
while let (Some(coordinate_anchor), Some((anchor_index, anchor))) =
(self.coordinate_iter.peek(), self.anchor_iter.peek())
{
match coordinate_anchor.cmp(anchor) {
match coordinate_anchor.source_part().cmp(anchor) {
Ordering::Less => {
self.coordinate_iter.next().unwrap();
return Some(None);
}
Ordering::Equal => {
let result = Some(Some(*anchor_index));
self.coordinate_iter.next().unwrap();
let result = Some(
self.coordinate_iter
.next()
.unwrap()
.partial_into(*anchor_index),
);
self.anchor_iter.next().unwrap();
return result;
}
Expand All @@ -43,29 +60,33 @@ impl<
}
}

self.coordinate_iter.next().map(|_| None)
None
}
}

impl<
CoordinateIter: Iterator<Item = SecondaryAnchor>,
Anchor: PartialIntoAnchorIndex<IntoPartSource = SecondaryAnchor>,
CoordinateIter: Iterator<Item = Anchor>,
AnchorIter: Iterator<Item = (AnchorIndex, SecondaryAnchor)>,
> Iterator for SecondaryAnchorToIndexIter<CoordinateIter, AnchorIter>
{
type Item = Option<AnchorIndex>;
type Item = Anchor::IntoTarget;

fn next(&mut self) -> Option<Self::Item> {
while let (Some(coordinate_anchor), Some((anchor_index, anchor))) =
(self.coordinate_iter.peek(), self.anchor_iter.peek())
{
match coordinate_anchor.cmp(anchor) {
match coordinate_anchor.source_part().cmp(anchor) {
Ordering::Less => {
self.coordinate_iter.next().unwrap();
return Some(None);
}
Ordering::Equal => {
let result = Some(Some(*anchor_index));
self.coordinate_iter.next().unwrap();
let result = Some(
self.coordinate_iter
.next()
.unwrap()
.partial_into(*anchor_index),
);
self.anchor_iter.next().unwrap();
return result;
}
Expand All @@ -75,34 +96,95 @@ impl<
}
}

self.coordinate_iter.next().map(|_| None)
None
}
}

impl Anchors {
/// Returns an iterator over the primary anchor indices that correspond to the given primary anchors.
///
/// If a primary anchor does not exist, then the iterator returns `Some(None)`.
pub fn primary_anchor_to_index_iter(
pub fn primary_anchor_to_index_iter<
Anchor: PartialIntoAnchorIndex<IntoPartSource = PrimaryAnchor>,
>(
&self,
iter: impl IntoIterator<Item = PrimaryAnchor>,
) -> impl Iterator<Item = Option<AnchorIndex>> {
iter: impl IntoIterator<Item = Anchor>,
) -> impl Iterator<Item = Anchor::IntoTarget> {
PrimaryAnchorToIndexIter {
coordinate_iter: iter.into_iter().peekable(),
anchor_iter: self.enumerate_primaries().peekable(),
}
}

/// Returns an iterator over the secondary anchor indices that correspond to the given secondary anchors.
///
/// If a secondary anchor does not exist, then the iterator returns `Some(None)`.
pub fn secondary_anchor_to_index_iter(
pub fn secondary_anchor_to_index_iter<
Anchor: PartialIntoAnchorIndex<IntoPartSource = SecondaryAnchor>,
>(
&self,
iter: impl IntoIterator<Item = SecondaryAnchor>,
iter: impl IntoIterator<Item = Anchor>,
ts_kind: TsKind,
) -> impl Iterator<Item = Option<AnchorIndex>> {
) -> impl Iterator<Item = Anchor::IntoTarget> {
SecondaryAnchorToIndexIter {
coordinate_iter: iter.into_iter().peekable(),
anchor_iter: self.enumerate_secondaries(ts_kind).peekable(),
}
}
}

impl PartialIntoAnchorIndex for PrimaryAnchor {
type IntoPartSource = PrimaryAnchor;

type IntoTarget = AnchorIndex;

fn source_part(&self) -> &Self::IntoPartSource {
self
}

fn partial_into(self, target: AnchorIndex) -> Self::IntoTarget {
target
}
}

impl PartialIntoAnchorIndex for SecondaryAnchor {
type IntoPartSource = SecondaryAnchor;

type IntoTarget = AnchorIndex;

fn source_part(&self) -> &Self::IntoPartSource {
self
}

fn partial_into(self, target: AnchorIndex) -> Self::IntoTarget {
target
}
}

impl<T> PartialIntoAnchorIndex for (PrimaryAnchor, T) {
type IntoPartSource = PrimaryAnchor;

type IntoTarget = (AnchorIndex, T);

fn source_part(&self) -> &Self::IntoPartSource {
&self.0
}

fn partial_into(self, target: AnchorIndex) -> Self::IntoTarget {
(target, self.1)
}
}

impl<T> PartialIntoAnchorIndex for (SecondaryAnchor, T) {
type IntoPartSource = SecondaryAnchor;

type IntoTarget = (AnchorIndex, T);

fn source_part(&self) -> &Self::IntoPartSource {
&self.0
}

fn partial_into(self, target: AnchorIndex) -> Self::IntoTarget {
(target, self.1)
}
}
58 changes: 54 additions & 4 deletions lib_ts_chainalign/src/chain_align.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use generic_a_star::{
open_lists::{AStarOpenList, linear_heap::LinearHeap},
};
use indicatif::ProgressBar;
use itertools::Itertools;
use lib_tsalign::a_star_aligner::{
alignment_result::AlignmentResult,
template_switch_distance::{EqualCostRange, TemplateSwitchDirection},
Expand All @@ -23,6 +24,7 @@ use log::{debug, trace};
use rustc_hash::FxHashMapSeed;
use std::{
fmt::Write,
iter,
time::{Duration, Instant},
};

Expand All @@ -40,12 +42,17 @@ use crate::{
mod chainer;
mod evaluation;

pub struct AlignmentParameters {
pub struct AlignmentParameters<Cost> {
/// The step width for generating successors during chaining.
///
/// At most `max_successors` will be generated at a time, but at least all with minimum chaining cost.
pub max_successors: usize,

/// The cost until which the cost function is initialised exactly.
///
/// Anchor chainings with a higher exact cost are initialised based on the lower bound.
pub max_exact_cost_function_cost: Cost,

/// The closed list type to use for chaining.
pub closed_list: ChainingClosedList,

Expand Down Expand Up @@ -76,7 +83,7 @@ pub fn align<AlphabetType: Alphabet, Cost: AStarCost>(
sequences: &AlignmentSequences,
start: AlignmentCoordinates,
end: AlignmentCoordinates,
parameters: &AlignmentParameters,
parameters: &AlignmentParameters<Cost>,
alignment_costs: &AlignmentCosts<Cost>,
rc_fn: &dyn Fn(u8) -> u8,
max_match_run: u32,
Expand Down Expand Up @@ -120,7 +127,7 @@ pub fn choose_closed_list<
sequences: &AlignmentSequences,
start: AlignmentCoordinates,
end: AlignmentCoordinates,
parameters: &AlignmentParameters,
parameters: &AlignmentParameters<Cost>,
alignment_costs: &AlignmentCosts<Cost>,
rc_fn: &dyn Fn(u8) -> u8,
max_match_run: u32,
Expand Down Expand Up @@ -167,7 +174,7 @@ fn actually_align<
sequences: &AlignmentSequences,
start: AlignmentCoordinates,
end: AlignmentCoordinates,
parameters: &AlignmentParameters,
parameters: &AlignmentParameters<Cost>,
alignment_costs: &AlignmentCosts<Cost>,
rc_fn: &dyn Fn(u8) -> u8,
max_match_run: u32,
Expand Down Expand Up @@ -324,6 +331,25 @@ fn actually_align<
};

progress_bar.finish_and_clear();
let total_gap_fill_alignments: u32 =
chain_evaluator.gap_fill_alignments_per_chain().iter().sum();
let chains_with_at_most_exp2x_gap_alignments_amount: Vec<_> = iter::once(
chain_evaluator
.gap_fill_alignments_per_chain()
.iter()
.filter(|a| **a == 0)
.count(),
)
.chain((0..u32::BITS).map(|e| {
chain_evaluator
.gap_fill_alignments_per_chain()
.iter()
.filter(|a| **a <= 1 << e)
.count()
}))
.take_while_inclusive(|amount| *amount < chaining_execution_count)
.collect();

debug!("Computed {chaining_execution_count} chains");
debug!("Chaining took {:.1}s", chaining_duration.as_secs_f64());
debug!("Evaluation took {:.1}s", evaluation_duration.as_secs_f64());
Expand All @@ -335,6 +361,30 @@ fn actually_align<
);
debug!("Chaining closed nodes: {total_chaining_closed_nodes}");
debug!("Total chain gaps: {}", chain_evaluator.total_gaps());
debug!(
"Total chain gap alignments: {} ({:.0}% of total gaps)",
total_gap_fill_alignments,
total_gap_fill_alignments as f64 / chain_evaluator.total_gaps() as f64 * 1e2
);
debug!(
"Fraction of chains with [0, 0] gap alignments: {:.0}%",
*chains_with_at_most_exp2x_gap_alignments_amount
.first()
.unwrap() as f64
/ chaining_execution_count as f64
* 1e2,
);
for (e, amount) in chains_with_at_most_exp2x_gap_alignments_amount
.windows(2)
.enumerate()
{
debug!(
"Fraction of chains with [{}, {}] gap alignments: {:.0}%",
if e > 0 { (1u32 << (e - 1)) + 1 } else { 1 },
1 << e,
(amount[1] - amount[0]) as f64 / chaining_execution_count as f64 * 1e2,
);
}
debug!(
"Total chain gap fillings: {} ({:.2}x total gaps, {:.0}% redundant)",
chain_evaluator.total_gap_fillings(),
Expand Down
Loading