From 22ec43d7d3b19fa73214d9a5884f9663cb40ff15 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Fri, 19 Dec 2025 15:08:24 +0200 Subject: [PATCH 1/3] Compute 12-jump exact cost function only if anchors can be reached according to lower bound. --- .../src/chaining_cost_function.rs | 134 +++++++++++++----- .../src/exact_chaining/ts_12_jump.rs | 10 +- 2 files changed, 107 insertions(+), 37 deletions(-) diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index ea620367..45399819 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -1,8 +1,8 @@ -use std::time::Instant; +use std::time::{Duration, Instant}; use generic_a_star::cost::AStarCost; use itertools::Itertools; -use log::{debug, info}; +use log::{debug, info, trace}; use num_traits::Zero; use crate::{ @@ -72,7 +72,8 @@ impl ChainingCostFunction { let mut additional_primary_targets_output = Vec::new(); let mut additional_secondary_targets_output = Vec::new(); - // Initialise primary with infinitiy. + // Initialise primary with infinity. + trace!("Initialise primary with infinity"); let mut primary = ChainingCostArray::new_from_cost( [primary_anchor_amount, primary_anchor_amount], Cost::max_value(), @@ -80,6 +81,7 @@ impl ChainingCostFunction { ); // Fill primary from start with exact values. + trace!("Fill primary"); additional_primary_targets_output.clear(); primary_aligner.align_until_cost_limit( sequences.primary_start(), @@ -188,6 +190,7 @@ impl ChainingCostFunction { } // Initialise secondaries with infinity. + trace!("Initialise secondaries with infinity"); let mut secondaries = TsKind::iter() .map(|ts_kind| { ChainingCostArray::new_from_cost( @@ -201,7 +204,9 @@ impl ChainingCostFunction { }) .collect_array() .unwrap(); + trace!("Fill secondaries"); for (ts_kind, secondary) in TsKind::iter().zip(&mut secondaries) { + trace!("Fill secondaries S{}", ts_kind.digits()); for (from_index, from_anchor) in anchors.enumerate_secondaries(ts_kind) { // Fill secondary from from_index with exact values. additional_secondary_targets_output.clear(); @@ -248,6 +253,7 @@ impl ChainingCostFunction { } // Initialise 12-jumps with infinity. + trace!("Initialise 12-jumps with infinity"); let mut jump_12s = TsKind::iter() .map(|ts_kind| { ChainingCostArray::new_from_cost( @@ -258,50 +264,107 @@ impl ChainingCostFunction { }) .collect_array() .unwrap(); + + let mut total_12_jump_exact_align_time = Duration::default(); + let mut total_12_jump_exact_evaluation_time = Duration::default(); + let mut total_12_jump_exact_evaluations = 0; + let mut total_12_jump_exact_evaluation_opened_nodes = 0; for (ts_kind, jump_12) in TsKind::iter().zip(&mut jump_12s) { + trace!("Filling 12-jumps to S{}", ts_kind.digits()); for (from_index, from_anchor) in anchors.enumerate_primaries() { let from_index = from_index + 1; - // Fill 12-jumps from from_index with exact values. - additional_secondary_targets_output.clear(); - ts_12_jump_aligner.align_until_cost_limit( - anchors.primary(from_index - 1).end(k), - ts_kind, - max_exact_cost_function_cost - + chaining_lower_bounds.alignment_costs().ts_base_cost, - &mut additional_secondary_targets_output, - ); - additional_secondary_targets_output.sort_unstable(); - for (to_index, cost) in anchors.secondary_anchor_to_index_iter( - additional_secondary_targets_output.iter().copied(), - ts_kind, - ) { - jump_12[[from_index, to_index]] = cost; - if cost - <= max_exact_cost_function_cost - + chaining_lower_bounds.alignment_costs().ts_base_cost - { - jump_12.set_exact(from_index, to_index); + // Fill 12-jumps with lower bound. + let mut eligible_anchors = Vec::new(); + for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { + if let Some(gap) = from_anchor.chaining_jump_gap(&to_anchor, ts_kind, k) { + let lower_bound = chaining_lower_bounds.jump_12_lower_bound(gap); + jump_12[[from_index, to_index]] = lower_bound; + if lower_bound + <= max_exact_cost_function_cost + + chaining_lower_bounds.alignment_costs().ts_base_cost + { + eligible_anchors.push(to_anchor); + } } } - // Fill remaining 12-jumps with lower bound. - for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { - if let Some(gap) = from_anchor.chaining_jump_gap(&to_anchor, ts_kind, k) { - jump_12[[from_index, to_index]] = chaining_lower_bounds - .jump_12_lower_bound(gap) - .max( - max_exact_cost_function_cost - + chaining_lower_bounds.alignment_costs().ts_base_cost - + Cost::from_usize(1), - ) - .min(jump_12[[from_index, to_index]]); + if !eligible_anchors.is_empty() { + total_12_jump_exact_evaluations += 1; + eligible_anchors.sort_unstable_by_key(|anchor| { + anchor.start(ts_kind).secondary_ordinate_ancestor().unwrap() + }); + // TODO: use eligible anchors for much more detailed filtering or for an A* lower bound in alignment. + let eligible_anchors = eligible_anchors; + let min_eligible_ancestor = eligible_anchors + .first() + .unwrap() + .start(ts_kind) + .secondary_ordinate_ancestor() + .unwrap(); + let align_end = AlignmentCoordinates::new_secondary( + min_eligible_ancestor, + sequences + .secondary_end(ts_kind) + .secondary_ordinate_descendant() + .unwrap(), + ts_kind, + ); + + // Correct 12-jumps from from_index to exact values. + additional_secondary_targets_output.clear(); + // FIXME: this finds all positions in the ancestor, but almost none of them belong to any anchor. + let start = Instant::now(); + total_12_jump_exact_evaluation_opened_nodes += ts_12_jump_aligner + .align_until_cost_limit( + anchors.primary(from_index - 1).end(k), + align_end, + ts_kind, + max_exact_cost_function_cost + + chaining_lower_bounds.alignment_costs().ts_base_cost, + &mut additional_secondary_targets_output, + ); + let after_align = Instant::now(); + additional_secondary_targets_output.sort_unstable(); + let mut additional_secondary_anchor_count = 0; + for (to_index, cost) in anchors.secondary_anchor_to_index_iter( + additional_secondary_targets_output.iter().copied(), + ts_kind, + ) { + additional_secondary_anchor_count += 1; + jump_12[[from_index, to_index]] = cost; + if cost + <= max_exact_cost_function_cost + + chaining_lower_bounds.alignment_costs().ts_base_cost + { + jump_12.set_exact(from_index, to_index); + } } + let end = Instant::now(); + total_12_jump_exact_align_time += after_align - start; + total_12_jump_exact_evaluation_time += end - after_align; + trace!( + "Found {additional_secondary_anchor_count}/{} additional anchors from P[{}] to S{}", + additional_secondary_targets_output.len(), + from_index - 1, + ts_kind.digits(), + ); } } } + debug!( + "Exact cost evaluation for 12-jumps took {:.0}s to align and {:.0}s to evaluate", + total_12_jump_exact_align_time.as_secs_f64(), + total_12_jump_exact_evaluation_time.as_secs_f64(), + ); + debug!( + "Exact cost evaluation for 12-jumps opened on average {} nodes (without skipped evaluations)", + total_12_jump_exact_evaluation_opened_nodes / total_12_jump_exact_evaluations, + ); + // Initialise 34-jumps with infinity. + trace!("Initialise 34-jumps with infinity"); let mut jump_34s = TsKind::iter() .map(|ts_kind| { ChainingCostArray::new_from_cost( @@ -313,6 +376,7 @@ impl ChainingCostFunction { .collect_array() .unwrap(); for (ts_kind, jump_34) in TsKind::iter().zip(&mut jump_34s) { + trace!("Filling 34-jumps from S{}", ts_kind.digits()); for (from_index, from_anchor) in anchors.enumerate_secondaries(ts_kind) { // Fill 34-jumps from from_index with exact values. additional_primary_targets_output.clear(); @@ -356,6 +420,7 @@ impl ChainingCostFunction { } } + trace!("Fill jumps from start and to end"); for (ts_kind, (jump_12, jump_34)) in TsKind::iter().zip(jump_12s.iter_mut().zip(&mut jump_34s)) { @@ -363,6 +428,7 @@ impl ChainingCostFunction { additional_secondary_targets_output.clear(); ts_12_jump_aligner.align_until_cost_limit( start, + sequences.secondary_end(ts_kind), ts_kind, max_exact_cost_function_cost + chaining_lower_bounds.alignment_costs().ts_base_cost, &mut additional_secondary_targets_output, diff --git a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs index 788b93d3..1534ddb9 100644 --- a/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs +++ b/lib_ts_chainalign/src/exact_chaining/ts_12_jump.rs @@ -97,12 +97,13 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> pub fn align_until_cost_limit( &mut self, start: AlignmentCoordinates, + end: AlignmentCoordinates, ts_kind: TsKind, cost_limit: Cost, additional_secondary_targets_output: &mut impl Extend<(SecondaryAnchor, Cost)>, - ) { + ) -> usize { assert!(start.is_primary()); - let end = self.sequences.secondary_end(ts_kind); + assert!(end.is_secondary()); let context = Context::new( self.alignment_costs, @@ -117,7 +118,7 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> a_star.initialise(); a_star.search_until_with_target_policy(|_, node| node.cost > cost_limit, true); - let descendant_start = match end.ts_kind().unwrap().descendant { + let descendant_start = match ts_kind.descendant { TsDescendant::Seq1 => start.primary_ordinate_a(), TsDescendant::Seq2 => start.primary_ordinate_b(), } @@ -127,7 +128,10 @@ impl<'sequences, 'alignment_costs, 'rc_fn, Cost: AStarCost> descendant_start, additional_secondary_targets_output, ); + + let opened_node_amount = a_star.performance_counters().opened_nodes; self.a_star_buffers = Some(a_star.into_buffers()); + opened_node_amount } fn fill_additional_targets( From b2bca14ab6ca2e080dae30480aac747217da1a97 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Fri, 19 Dec 2025 15:11:43 +0200 Subject: [PATCH 2/3] Handle crash when all 12-jump exact evaluations are skipped. --- lib_ts_chainalign/src/chaining_cost_function.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 45399819..f095c240 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -268,7 +268,7 @@ impl ChainingCostFunction { let mut total_12_jump_exact_align_time = Duration::default(); let mut total_12_jump_exact_evaluation_time = Duration::default(); let mut total_12_jump_exact_evaluations = 0; - let mut total_12_jump_exact_evaluation_opened_nodes = 0; + let mut total_12_jump_exact_evaluation_opened_nodes = 0usize; for (ts_kind, jump_12) in TsKind::iter().zip(&mut jump_12s) { trace!("Filling 12-jumps to S{}", ts_kind.digits()); for (from_index, from_anchor) in anchors.enumerate_primaries() { @@ -360,7 +360,9 @@ impl ChainingCostFunction { ); debug!( "Exact cost evaluation for 12-jumps opened on average {} nodes (without skipped evaluations)", - total_12_jump_exact_evaluation_opened_nodes / total_12_jump_exact_evaluations, + total_12_jump_exact_evaluation_opened_nodes + .checked_div(total_12_jump_exact_evaluations) + .unwrap_or(0), ); // Initialise 34-jumps with infinity. From 5be3b32742ab86e786601b46b038b5b2a10cfab5 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Fri, 19 Dec 2025 15:21:02 +0200 Subject: [PATCH 3/3] Use max_chain_cf_cost as lower bound for 12-jump again. --- lib_ts_chainalign/src/chaining_cost_function.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index f095c240..70d73008 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -279,7 +279,11 @@ impl ChainingCostFunction { for (to_index, to_anchor) in anchors.enumerate_secondaries(ts_kind) { if let Some(gap) = from_anchor.chaining_jump_gap(&to_anchor, ts_kind, k) { let lower_bound = chaining_lower_bounds.jump_12_lower_bound(gap); - jump_12[[from_index, to_index]] = lower_bound; + jump_12[[from_index, to_index]] = lower_bound.max( + max_exact_cost_function_cost + + chaining_lower_bounds.alignment_costs().ts_base_cost + + Cost::from_usize(1), + ); if lower_bound <= max_exact_cost_function_cost + chaining_lower_bounds.alignment_costs().ts_base_cost