From c18e262f174ebcf3ef32bf2509baf31a8ac81407 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 8 Dec 2025 14:41:52 +0200 Subject: [PATCH 1/2] Track first secondary anchor index of TS. --- lib_ts_chainalign/src/chain_align.rs | 19 +++++++++---- lib_ts_chainalign/src/chain_align/chainer.rs | 28 +++++++++++++++++--- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index e7666fe6..9fccb8a5 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -101,11 +101,16 @@ pub fn align( Identifier::Primary { index } => { write!(s, "P{}", anchors.primary[*index]).unwrap() } - Identifier::Secondary { index, ts_kind } => write!( + Identifier::Secondary { + index, + ts_kind, + first_secondary_index, + } => write!( s, - "S{}{}", + "S{}{}->{}", ts_kind.digits(), - anchors.secondary(*ts_kind)[*index] + anchors.secondary(*ts_kind)[*first_secondary_index], + anchors.secondary(*ts_kind)[*index], ) .unwrap(), Identifier::End => write!(s, "end").unwrap(), @@ -344,7 +349,7 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.primary_from_start(index); } - (Identifier::Start, Identifier::Secondary { index, ts_kind }) => { + (Identifier::Start, Identifier::Secondary { index, ts_kind, .. }) => { let end = anchors.secondary(ts_kind)[index].start(ts_kind); if final_evaluation || !chaining_cost_function.is_jump_12_from_start_exact(index, ts_kind) @@ -399,7 +404,7 @@ fn evaluate_chain( } current_upper_bound += chaining_cost_function.primary_to_end(index); } - (Identifier::Secondary { index, ts_kind }, Identifier::End) => { + (Identifier::Secondary { index, ts_kind, .. }, Identifier::End) => { let start = anchors.secondary(ts_kind)[index].end(ts_kind, k); if final_evaluation || !chaining_cost_function.is_jump_34_to_end_exact(index, ts_kind) @@ -478,6 +483,7 @@ fn evaluate_chain( Identifier::Secondary { index: to_index, ts_kind, + .. }, ) => { let start = anchors.primary[from_index].end(k); @@ -519,10 +525,12 @@ fn evaluate_chain( Identifier::Secondary { index: from_index, ts_kind, + .. }, Identifier::Secondary { index: to_index, ts_kind: to_ts_kind, + .. }, ) => { assert_eq!(ts_kind, to_ts_kind); @@ -572,6 +580,7 @@ fn evaluate_chain( Identifier::Secondary { index: from_index, ts_kind, + .. }, Identifier::Primary { index: to_index }, ) => { diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 156bb20d..0d09d850 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -23,8 +23,17 @@ pub struct Node { #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash)] pub enum Identifier { Start, - Primary { index: usize }, - Secondary { index: usize, ts_kind: TsKind }, + Primary { + index: usize, + }, + Secondary { + index: usize, + ts_kind: TsKind, + /// The first secondary anchor that is part of the current template switch. + /// + /// Used to estimate the length of the resulting template switch. + first_secondary_index: usize, + }, End, } @@ -120,6 +129,7 @@ impl AStarContext for Context<'_, '_, Cost> { identifier: Identifier::Secondary { index: successor_index, ts_kind, + first_secondary_index: successor_index, }, predecessor, cost, @@ -199,6 +209,7 @@ impl AStarContext for Context<'_, '_, Cost> { identifier: Identifier::Secondary { index: successor_index, ts_kind, + first_secondary_index: successor_index, }, predecessor, cost, @@ -221,7 +232,11 @@ impl AStarContext for Context<'_, '_, Cost> { } })), ), - Identifier::Secondary { index, ts_kind } => output.extend( + Identifier::Secondary { + index, + ts_kind, + first_secondary_index, + } => output.extend( (0..self.anchors.secondary(ts_kind).len()) .flat_map(|successor_index| { if DEBUG_CHAINER { @@ -245,6 +260,7 @@ impl AStarContext for Context<'_, '_, Cost> { identifier: Identifier::Secondary { index: successor_index, ts_kind, + first_secondary_index, }, predecessor, cost, @@ -372,7 +388,11 @@ impl Display for Identifier { match self { Identifier::Start => write!(f, "start"), Identifier::Primary { index } => write!(f, "P-{index}"), - Identifier::Secondary { index, ts_kind } => write!(f, "S{}-{index}", ts_kind.digits()), + Identifier::Secondary { + index, + ts_kind, + first_secondary_index, + } => write!(f, "S{}-{first_secondary_index}-{index}", ts_kind.digits()), Identifier::End => write!(f, "end"), } } From 0a39d0e218433138929bcb0ee8ec7bb3c20b2ff2 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 8 Dec 2025 15:01:27 +0200 Subject: [PATCH 2/2] Restrict TS length. --- lib_ts_chainalign/src/anchors.rs | 20 +++ lib_ts_chainalign/src/chain_align.rs | 8 +- lib_ts_chainalign/src/chain_align/chainer.rs | 131 +++++++++++------- .../src/chaining_cost_function.rs | 8 +- test_files/config/chainalignn/config.tsa | 2 +- 5 files changed, 114 insertions(+), 55 deletions(-) diff --git a/lib_ts_chainalign/src/anchors.rs b/lib_ts_chainalign/src/anchors.rs index 9c033cd6..65078085 100644 --- a/lib_ts_chainalign/src/anchors.rs +++ b/lib_ts_chainalign/src/anchors.rs @@ -383,6 +383,26 @@ impl SecondaryAnchor { pub fn is_direct_predecessor_of(&self, successor: &Self) -> bool { self.ancestor - 1 == successor.ancestor && self.descendant + 1 == successor.descendant } + + /// Returns the length of the 2-3 alignment of a TS that starts in `self` and ends in `until`. + /// + /// The length is the maximum of the difference of the two sequences. + pub fn ts_length_until(&self, until: &Self, ts_kind: TsKind, k: usize) -> usize { + let start = self.start(ts_kind); + let end = until.end(ts_kind, k); + + (start + .secondary_ordinate_ancestor() + .unwrap() + .checked_sub(end.secondary_ordinate_ancestor().unwrap()) + .unwrap()) + .max( + end.secondary_ordinate_descendant() + .unwrap() + .checked_sub(start.secondary_ordinate_descendant().unwrap()) + .unwrap(), + ) + } } impl Display for Anchors { diff --git a/lib_ts_chainalign/src/chain_align.rs b/lib_ts_chainalign/src/chain_align.rs index 9fccb8a5..4e6666a9 100644 --- a/lib_ts_chainalign/src/chain_align.rs +++ b/lib_ts_chainalign/src/chain_align.rs @@ -52,7 +52,13 @@ pub fn align( let mut chaining_duration = Duration::default(); let mut evaluation_duration = Duration::default(); - let context = Context::new(anchors, chaining_cost_function); + let k = usize::try_from(max_match_run + 1).unwrap(); + let context = Context::new( + anchors, + chaining_cost_function, + &alignment_costs.ts_limits, + k, + ); let mut astar = AStar::new(context); let mut chaining_execution_count = 0; let mut current_lower_bound = Cost::zero(); diff --git a/lib_ts_chainalign/src/chain_align/chainer.rs b/lib_ts_chainalign/src/chain_align/chainer.rs index 0d09d850..2aec206f 100644 --- a/lib_ts_chainalign/src/chain_align/chainer.rs +++ b/lib_ts_chainalign/src/chain_align/chainer.rs @@ -4,13 +4,16 @@ use generic_a_star::{AStarContext, AStarNode, cost::AStarCost, reset::Reset}; use crate::{ alignment::ts_kind::TsKind, anchors::Anchors, chaining_cost_function::ChainingCostFunction, + costs::TsLimits, }; const DEBUG_CHAINER: bool = false; -pub struct Context<'anchors, 'chaining_cost_function, Cost> { +pub struct Context<'anchors, 'chaining_cost_function, 'ts_limits, Cost> { pub anchors: &'anchors Anchors, pub chaining_cost_function: &'chaining_cost_function mut ChainingCostFunction, + pub ts_limits: &'ts_limits TsLimits, + pub k: usize, } #[derive(Debug, Clone, Copy, Eq, PartialEq)] @@ -37,19 +40,25 @@ pub enum Identifier { End, } -impl<'anchors, 'chaining_cost_function, Cost> Context<'anchors, 'chaining_cost_function, Cost> { +impl<'anchors, 'chaining_cost_function, 'ts_limits, Cost> + Context<'anchors, 'chaining_cost_function, 'ts_limits, Cost> +{ pub fn new( anchors: &'anchors Anchors, chaining_cost_function: &'chaining_cost_function mut ChainingCostFunction, + ts_limits: &'ts_limits TsLimits, + k: usize, ) -> Self { Self { anchors, chaining_cost_function, + ts_limits, + k, } } } -impl AStarContext for Context<'_, '_, Cost> { +impl AStarContext for Context<'_, '_, '_, Cost> { type Node = Node; fn create_root(&self) -> Self::Node { @@ -236,9 +245,9 @@ impl AStarContext for Context<'_, '_, Cost> { index, ts_kind, first_secondary_index, - } => output.extend( - (0..self.anchors.secondary(ts_kind).len()) - .flat_map(|successor_index| { + } => { + output.extend((0..self.anchors.secondary(ts_kind).len()).flat_map( + |successor_index| { if DEBUG_CHAINER { println!( "Checking anchor S{}-{successor_index}: {}", @@ -265,50 +274,74 @@ impl AStarContext for Context<'_, '_, Cost> { predecessor, cost, }) - }) - .chain((0..self.anchors.primary.len()).flat_map(|successor_index| { - if DEBUG_CHAINER { - println!( - "Checking anchor P-{successor_index}: {}", - self.anchors.primary[successor_index] - ); - } + }, + )); + + let first_anchor = &self.anchors.secondary(ts_kind)[first_secondary_index]; + let ts_length = first_anchor.ts_length_until( + &self.anchors.secondary(ts_kind)[index], + ts_kind, + self.k, + ); - let cost = predecessor_cost.checked_add( - &self - .chaining_cost_function - .jump_34(index, successor_index, ts_kind), - )?; - if DEBUG_CHAINER { - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } + if self.ts_limits.length_23.contains(&ts_length) { + output.extend( + (0..self.anchors.primary.len()) + .flat_map(|successor_index| { + if DEBUG_CHAINER { + println!( + "Checking anchor P-{successor_index}: {}", + self.anchors.primary[successor_index] + ); + } - (cost != Cost::max_value()).then_some(Node { - identifier: Identifier::Primary { - index: successor_index, - }, - predecessor, - cost, - }) - })) - .chain(iter::once({ - let cost = predecessor_cost - .checked_add( - &self.chaining_cost_function.jump_34_to_end(index, ts_kind), - ) - .unwrap(); - if DEBUG_CHAINER { - println!("Checking anchor end"); - println!("Cost: {}+{}", predecessor_cost, cost - predecessor_cost); - } - debug_assert_ne!(cost, Cost::max_value()); - Node { - identifier: Identifier::End, - predecessor, - cost, - } - })), - ), + let cost = predecessor_cost.checked_add( + &self.chaining_cost_function.jump_34( + index, + successor_index, + ts_kind, + ), + )?; + if DEBUG_CHAINER { + println!( + "Cost: {}+{}", + predecessor_cost, + cost - predecessor_cost + ); + } + + (cost != Cost::max_value()).then_some(Node { + identifier: Identifier::Primary { + index: successor_index, + }, + predecessor, + cost, + }) + }) + .chain(iter::once({ + let cost = predecessor_cost + .checked_add( + &self.chaining_cost_function.jump_34_to_end(index, ts_kind), + ) + .unwrap(); + if DEBUG_CHAINER { + println!("Checking anchor end"); + println!( + "Cost: {}+{}", + predecessor_cost, + cost - predecessor_cost + ); + } + debug_assert_ne!(cost, Cost::max_value()); + Node { + identifier: Identifier::End, + predecessor, + cost, + } + })), + ); + } + } Identifier::End => { /* Has no successors */ } } @@ -327,7 +360,7 @@ impl AStarContext for Context<'_, '_, Cost> { } } -impl Reset for Context<'_, '_, Cost> { +impl Reset for Context<'_, '_, '_, Cost> { fn reset(&mut self) { // Nothing to do. } diff --git a/lib_ts_chainalign/src/chaining_cost_function.rs b/lib_ts_chainalign/src/chaining_cost_function.rs index 72029242..a27f45e3 100644 --- a/lib_ts_chainalign/src/chaining_cost_function.rs +++ b/lib_ts_chainalign/src/chaining_cost_function.rs @@ -765,28 +765,28 @@ impl ChainingCostFunction { let target = match (ts_kind.ancestor, ts_kind.descendant) { (TsAncestor::Seq1, TsDescendant::Seq1) => { if is_exact { - self.jump_12_to_11 + self.secondary_11 .set_exact(from_secondary_index, to_secondary_index); } &mut self.secondary_11[[from_secondary_index, to_secondary_index]] } (TsAncestor::Seq1, TsDescendant::Seq2) => { if is_exact { - self.jump_12_to_12 + self.secondary_12 .set_exact(from_secondary_index, to_secondary_index); } &mut self.secondary_12[[from_secondary_index, to_secondary_index]] } (TsAncestor::Seq2, TsDescendant::Seq1) => { if is_exact { - self.jump_12_to_21 + self.secondary_21 .set_exact(from_secondary_index, to_secondary_index); } &mut self.secondary_21[[from_secondary_index, to_secondary_index]] } (TsAncestor::Seq2, TsDescendant::Seq2) => { if is_exact { - self.jump_12_to_22 + self.secondary_22 .set_exact(from_secondary_index, to_secondary_index); } &mut self.secondary_22[[from_secondary_index, to_secondary_index]] diff --git a/test_files/config/chainalignn/config.tsa b/test_files/config/chainalignn/config.tsa index c9d2a9d4..5d321d32 100644 --- a/test_files/config/chainalignn/config.tsa +++ b/test_files/config/chainalignn/config.tsa @@ -21,7 +21,7 @@ Offset inf 0 inf Length - 0 10 200 + 0 4 200 inf 0 inf LengthDifference