From 93dc297e6e6d7c2ad82cae7747efaf6ded28ad6e Mon Sep 17 00:00:00 2001 From: Avishai Weissberg Date: Fri, 21 Nov 2025 11:07:47 +0200 Subject: [PATCH 01/18] feat: add DisjointIntervalSequence API --- docs-src/diseq.rst | 80 +++++++- genome_kit/diseq.py | 228 +++++++++++++++++++++++ tests/test_diseq.py | 435 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 741 insertions(+), 2 deletions(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index 0390381..daad9b7 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -32,7 +32,6 @@ is conceptually distinct in several ways: but intervals within the DIS are described relative to the coordinate space rather than in absolute genomic terms. - Overview ======== @@ -286,6 +285,7 @@ be overridden:: >>> dis = DisjointIntervalSequence.from_transcript( ... transcript, coord_name="my_coord", interval_name="my_interval") + From Intervals ~~~~~~~~~~~~~~ @@ -383,7 +383,6 @@ When ``on_coordinate_strand`` is ``False``, the mapping reverses: Opposite Strand :: - >>> opp = dis.as_opposite_strand() >>> opp.end5_index # same as end when off coordinate strand 7 @@ -400,3 +399,80 @@ available as properties:: >>> dis.end3 # 0-length DIS at the interval's 3' boundary >>> dis.coord_end5 # 0-length DIS at the coordinate space's 5' boundary >>> dis.coord_end3 # 0-length DIS at the coordinate space's 3' boundary + + +Strand Methods +============== + +A DIS interval can sit on either strand independently of the coordinate +intervals. The ``on_coordinate_strand`` property indicates whether the +interval is on the same strand as the coordinate intervals:: + On Coordinate Strand: True + Start Index: 1 + End Index: 6 + DIS Coordinates: 0 1 2 3 4 5 6 7 + DNA Sequence (+): A T C C G A C + |<------------->| + ----------------------------------------------------- + DNA Sequence (-): T A G G C T G + DIS Coordinates: 0 1 2 3 4 5 6 7 + Opposite Strand + + >>> dis.on_coordinate_strand + True + >>> dis.is_positive_strand() + True + +``as_opposite_strand()`` creates a new DIS with the interval on the other +strand. The ``start`` and ``end`` indices are preserved — only +``on_coordinate_strand`` is flipped:: + + Before (on_coordinate_strand=False): + Start Index: 1 + End Index: 6 + DIS Coordinates: 0 1 2 3 4 5 6 7 + DNA Sequence (+): T A A C C C T + ----------------------------------------------------- + DNA Sequence (-): A T T G G G A + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<------------->| + Opposite Strand + + After as_opposite_strand() (on_coordinate_strand=True): + Start Index: 1 + End Index: 6 + DIS Coordinates: 0 1 2 3 4 5 6 7 + DNA Sequence (+): T A A C C C T + |<------------->| + ----------------------------------------------------- + DNA Sequence (-): A T T G G G A + DIS Coordinates: 0 1 2 3 4 5 6 7 + Opposite Strand + +In code:: + >>> dis.on_coordinate_strand + False + >>> dis.is_positive_strand() + False + >>> opposite = dis.as_opposite_strand() + >>> opposite.on_coordinate_strand + True + >>> opposite.is_positive_strand() + True + >>> opposite.start == dis.start # start/end unchanged + True + >>> opposite.end == dis.end + True + >>> opposite.coordinate_intervals == dis.coordinate_intervals + True + +The ``as_positive_strand()`` and ``as_negative_strand()`` methods return +``self`` if the interval is already on the requested strand:: + + >>> dis.as_positive_strand() is dis + True + +.. note:: + + Strand methods only affect the interval layer. The coordinate + intervals always remain unchanged. diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index bff791d..e9c18a1 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -408,6 +408,48 @@ def length(self) -> int: """Length of the interval on the coordinate space.""" return self.end - self.start + def _set_end5(self, end5: int) -> "DisjointIntervalSequence": + """Convenience method to update start/end based on a new end5 index.""" + if end5 == self.end5_index: + return self # No change + new_start, new_end = self._start, self._end + end5_difference = end5 - self.end5_index + is_moved_upstream = end5_difference * self._upstream_index_step() > 0 + if is_moved_upstream and self._upstream_index_step() == -1: + new_start = new_start - abs(end5_difference) + elif is_moved_upstream and self._upstream_index_step() == 1: + new_end = new_end + abs(end5_difference) + elif not is_moved_upstream and self._upstream_index_step() == -1: + new_start = new_start + abs(end5_difference) + elif not is_moved_upstream and self._upstream_index_step() == 1: + new_end = new_end - abs(end5_difference) + if new_start > new_end: + raise ValueError( + f"Invalid end5 update: end5 index {end5} would be downstream of end3 index {self.end3_index}" + ) + return self._from_end_indices(new_start, new_end) + + def _set_end3(self, end3: int) -> "DisjointIntervalSequence": + """Convenience method to update start/end based on a new end3 index.""" + if end3 == self.end3_index: + return self # No change + new_start, new_end = self._start, self._end + end3_difference = end3 - self.end3_index + is_moved_upstream = end3_difference * self._upstream_index_step() > 0 + if is_moved_upstream and self._upstream_index_step() == -1: + new_end = new_end - abs(end3_difference) + elif is_moved_upstream and self._upstream_index_step() == 1: + new_start = new_start + abs(end3_difference) + elif not is_moved_upstream and self._upstream_index_step() == -1: + new_end = new_end + abs(end3_difference) + elif not is_moved_upstream and self._upstream_index_step() == 1: + new_start = new_start - abs(end3_difference) + if new_start > new_end: + raise ValueError( + f"Invalid end3 update: end3 index {end3} would be upstream of end5 index {self.end5_index}" + ) + return self._from_end_indices(new_start, new_end) + def _upstream_index_step(self, on_coordinate_strand: bool | None = None) -> int: """Return +1 or -1 indicating the upstream direction in index space. @@ -422,6 +464,192 @@ def _upstream_index_step(self, on_coordinate_strand: bool | None = None) -> int: # POSITIVE_STRAND_LEFT_TO_RIGHT: effective strand determines direction return -1 if self.strand == "+" else 1 + def _validate_same_coordinate_space( + self, other: "DisjointIntervalSequence" + ) -> None: + """Raise if other does not share the same coordinate space.""" + if not isinstance(other, DisjointIntervalSequence): + raise TypeError( + f"Expected DisjointIntervalSequence, got {type(other).__name__}" + ) + if self._coordinate_intervals != other._coordinate_intervals: + raise ValueError("DIS objects must share the same coordinate intervals") + + def _from_end_indices(self, end5: int, end3: int) -> "DisjointIntervalSequence": + """Return a new DIS with the same coordinate space but different interval indices.""" + # Validate end5 is upstream of or equal to end3 + if self._upstream_index_step() == -1: + if end5 > end3: + raise ValueError( + f"Invalid indices: end5 index {end5} is downstream of end3 index {end3}" + ) + if self._upstream_index_step() == 1: + if end5 < end3: + raise ValueError( + f"Invalid indices: end5 index {end5} is downstream of end3 index {end3}" + ) + return DisjointIntervalSequence( + self._coordinate_intervals, + coord_id=self._coord_metadata.id, + interval_id=self._interval_metadata.id, + on_coordinate_strand=self.on_coordinate_strand, + start=min(end5, end3), + end=max(end5, end3), + ) + + def shift(self, amount: int) -> "DisjointIntervalSequence": + """Shift the interval downstream by amount (negative shifts upstream). + + The coordinate space is unchanged. Only the interval indices move. + """ + downstream_step = -self._upstream_index_step() + delta = amount * downstream_step + return self._from_end_indices( + self.end5_index + delta, + self.end3_index + delta, + ) + + def expand( + self, upstream: int, dnstream: int | None = None + ) -> "DisjointIntervalSequence": + """Expand the interval upstream and/or downstream. + + Negative values contract the interval. Raises ValueError if contraction + would result in end5 being downstream of end3. + + Args: + upstream: Bases to expand (or contract if negative) toward the 5' end. + dnstream: Bases to expand (or contract if negative) toward the 3' end. + Defaults to upstream (symmetric). + """ + if dnstream is None: + dnstream = upstream + up_step = self._upstream_index_step() + down_step = -up_step + new_end5 = self.end5_index + (upstream * up_step) + new_end3 = self.end3_index + (dnstream * down_step) + # Validate end5 is still upstream of or equal to end3 + if (new_end5 - new_end3) * up_step < 0: + raise ValueError( + "Invalid expansion: end5 would be downstream of end3 " + f"(end5={new_end5}, end3={new_end3})" + ) + return self._from_end_indices(new_end5, new_end3) + + def upstream_of(self, other: "DisjointIntervalSequence") -> bool: + """True if self is strictly upstream of other (no overlap). + + Requires the same coordinate space and same on_coordinate_strand. + """ + self._validate_same_coordinate_space(other) + if self.on_coordinate_strand != other.on_coordinate_strand: + raise ValueError("Cannot compare: intervals are on different strands") + if self.length == 0 and other.length == 0 and self.start == other.start: + return False + if self._upstream_index_step() == -1: + return self._end <= other.start + return self._start >= other.end + + def dnstream_of(self, other: "DisjointIntervalSequence") -> bool: + """True if self is strictly downstream of other (no overlap). + + Requires the same coordinate space and same on_coordinate_strand. + """ + self._validate_same_coordinate_space(other) + if self.on_coordinate_strand != other.on_coordinate_strand: + raise ValueError("Cannot compare: intervals are on different strands") + if self.length == 0 and other.length == 0 and self.start == other.start: + return False + if self._upstream_index_step() == -1: + return self._start >= other.end + return self._end <= other.start + + def within(self, other: "DisjointIntervalSequence") -> bool: + """True if self's interval is contained within other's interval. + + Requires the same coordinate space and same on_coordinate_strand. + """ + self._validate_same_coordinate_space(other) + if self.on_coordinate_strand != other.on_coordinate_strand: + raise ValueError("Cannot compare: intervals are on different strands") + return self._start >= other.start and self._end <= other.end + + def is_positive_strand(self) -> bool: + """If the interval is on the positive strand. + + Returns + ------- + :py:class:`bool` + """ + if self.transcript_strand == "+": + return True + return False + + def as_positive_strand(self) -> "DisjointIntervalSequence": + """Return a DIS with the interval on the positive strand. + + Returns ``self`` if already on the positive strand. The coordinate + intervals are unchanged; only the interval strand is affected. + + Returns + ------- + :py:class:`DisjointIntervalSequence` + """ + if self.is_positive_strand(): + return self + return self.as_opposite_strand() + + def as_negative_strand(self) -> "DisjointIntervalSequence": + """Return a DIS with the interval on the negative strand. + + Returns ``self`` if already on the negative strand. The coordinate + intervals are unchanged; only the interval strand is affected. + + Returns + ------- + :py:class:`DisjointIntervalSequence` + """ + if not self.is_positive_strand(): + return self + return self.as_opposite_strand() + + def as_opposite_strand(self) -> "DisjointIntervalSequence": + """Return a new DIS with the interval on the opposite strand. + + The coordinate intervals are unchanged. The interval's + ``on_coordinate_strand`` is flipped. + + Returns + ------- + :py:class:`DisjointIntervalSequence` + """ + return DisjointIntervalSequence( + self._coordinate_intervals, + coord_id=self._coord_metadata.id, + interval_id=self._interval_metadata.id, + on_coordinate_strand=not self.on_coordinate_strand, + start=self._start, + end=self._end, + ) + + def genomic_span(self) -> Interval: + """Smallest single Interval spanning all coordinate intervals. + + Returns + ------- + :py:class:`~genome_kit.Interval` + An interval from the minimum ``start`` to the maximum ``end`` + across all coordinate intervals. + """ + ivs = self._coordinate_intervals + return Interval( + ivs[0].chromosome, + ivs[0].strand, + min(iv.start for iv in ivs), + max(iv.end for iv in ivs), + ivs[0].reference_genome, + ) + def __len__(self) -> int: """Return the length of the interval.""" return self.length diff --git a/tests/test_diseq.py b/tests/test_diseq.py index 322866f..143baed 100644 --- a/tests/test_diseq.py +++ b/tests/test_diseq.py @@ -312,6 +312,106 @@ def test_length_zero(self): self.assertEqual(dis.length, 0) +class TestStrandMethods(unittest.TestCase): + + def test_is_positive_strand_plus_on_coord(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + self.assertTrue(dis.is_positive_strand()) + + def test_is_positive_strand_minus_off_coord(self): + ivs = _make_intervals([("chr1", "-", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False) + self.assertTrue(dis.is_positive_strand()) + + def test_is_positive_strand_false_plus_off_coord(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False) + self.assertFalse(dis.is_positive_strand()) + + def test_is_positive_strand_false_minus_on_coord(self): + ivs = _make_intervals([("chr1", "-", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + self.assertFalse(dis.is_positive_strand()) + + def test_as_positive_strand_already_positive(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + result = dis.as_positive_strand() + self.assertIs(result, dis) + + def test_as_positive_strand_flips(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence( + ivs, on_coordinate_strand=False, start=10, end=80 + ) + expected = DisjointIntervalSequence( + ivs, on_coordinate_strand=True, start=10, end=80 + ) + result = dis.as_positive_strand() + self.assertTrue(result.is_positive_strand()) + self.assertTrue(result.on_coordinate_strand) + self.assertEqual(result.start, 10) + self.assertEqual(result.end, 80) + self.assertEqual(result, expected) + + def test_as_negative_strand_already_negative(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False) + result = dis.as_negative_strand() + self.assertIs(result, dis) + + def test_as_negative_strand_flips(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True, start=10, end=80) + expected = DisjointIntervalSequence( + ivs, on_coordinate_strand=False, start=10, end=80 + ) + result = dis.as_negative_strand() + self.assertFalse(result.is_positive_strand()) + self.assertFalse(result.on_coordinate_strand) + self.assertEqual(result.start, 10) + self.assertEqual(result.end, 80) + self.assertEqual(result, expected) + + def test_as_opposite_strand(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + opp = dis.as_opposite_strand() + self.assertFalse(opp.is_positive_strand()) + opp2 = opp.as_opposite_strand() + self.assertTrue(opp2.is_positive_strand()) + + def test_strand_flip_preserves_coordinate_intervals(self): + ivs = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 300, 400)]) + dis = DisjointIntervalSequence(ivs) + flipped = dis.as_opposite_strand() + self.assertEqual(flipped.coordinate_intervals, dis.coordinate_intervals) + + def test_idempotency(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + self.assertIs(dis.as_positive_strand().as_positive_strand(), dis) + + def test_as_opposite_strand_preserves_start_end(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, start=10, end=80) + opp = dis.as_opposite_strand() + self.assertEqual(opp.start, 10) + self.assertEqual(opp.end, 80) + + def test_end5_end3_swap_on_opposite_strand(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, start=10, end=80) + # On coordinate strand: end5 at start, end3 at end + self.assertEqual(dis.end5_index, 10) + self.assertEqual(dis.end3_index, 80) + # Off coordinate strand: end5 at end, end3 at start + opp = dis.as_opposite_strand() + self.assertEqual(opp.end5_index, 80) + self.assertEqual(opp.end3_index, 10) + + class TestEndProperties(unittest.TestCase): def test_end5_default(self): @@ -519,5 +619,340 @@ def test_eq_non_dis(self): self.assertNotEqual(dis, 42) +# Helper for shift/expand/relational tests +# 2 exons on chr1+: [100,200) and [300,400), coordinate_length=200 +_COORD_IVS = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 300, 400)]) + + +def _dis( + start=0, end=200, on_coordinate_strand=True, coord_id="c", interval_id="i", ivs=None +): + """Quick DIS factory for tests.""" + return DisjointIntervalSequence( + ivs or _COORD_IVS, + coord_id=coord_id, + interval_id=interval_id, + on_coordinate_strand=on_coordinate_strand, + start=start, + end=end, + ) + + +class TestShift(unittest.TestCase): + + def test_shift_positive(self): + dis = _dis(start=30, end=150) + shifted = dis.shift(10) + self.assertEqual(shifted.start, 40) + self.assertEqual(shifted.end, 160) + + def test_shift_negative(self): + dis = _dis(start=30, end=150) + shifted = dis.shift(-10) + self.assertEqual(shifted.start, 20) + self.assertEqual(shifted.end, 140) + + def test_shift_zero(self): + dis = _dis(start=30, end=150) + shifted = dis.shift(0) + self.assertEqual(shifted, dis) + + def test_shift_beyond_coordinate(self): + dis = _dis(start=30, end=150) + shifted = dis.shift(60) + self.assertEqual(shifted.start, 90) + self.assertEqual(shifted.end, 210) + + def test_shift_negative_beyond(self): + dis = _dis(start=30, end=150) + shifted = dis.shift(-40) + self.assertEqual(shifted.start, -10) + self.assertEqual(shifted.end, 110) + + def test_shift_zero_length(self): + dis = _dis(start=50, end=50) + shifted = dis.shift(5) + self.assertEqual(shifted.start, 55) + self.assertEqual(shifted.end, 55) + self.assertEqual(shifted.length, 0) + + def test_shift_opposite_strand(self): + # on_coordinate_strand=False: upstream_step=+1, downstream=-1 + # shift(10) downstream → subtract 10 from both + dis = _dis(start=30, end=150, on_coordinate_strand=False) + shifted = dis.shift(10) + self.assertEqual(shifted.start, 20) + self.assertEqual(shifted.end, 140) + + def test_shift_preserves_metadata(self): + dis = _dis(start=30, end=150, coord_id="mycoord", interval_id="myiv") + shifted = dis.shift(10) + self.assertEqual(shifted.coord_id, "mycoord") + self.assertEqual(shifted.id, "myiv") + self.assertTrue(shifted.on_coordinate_strand) + + def test_shift_preserves_coordinate_intervals(self): + dis = _dis(start=30, end=150) + shifted = dis.shift(10) + self.assertEqual(shifted.coordinate_intervals, dis.coordinate_intervals) + + +class TestExpand(unittest.TestCase): + + def test_expand_symmetric(self): + dis = _dis(start=30, end=150) + expanded = dis.expand(5) + self.assertEqual(expanded.start, 25) + self.assertEqual(expanded.end, 155) + + def test_expand_asymmetric(self): + dis = _dis(start=30, end=150) + expanded = dis.expand(5, 10) + self.assertEqual(expanded.start, 25) + self.assertEqual(expanded.end, 160) + + def test_expand_upstream_only(self): + dis = _dis(start=30, end=150) + expanded = dis.expand(5, 0) + self.assertEqual(expanded.start, 25) + self.assertEqual(expanded.end, 150) + + def test_expand_downstream_only(self): + dis = _dis(start=30, end=150) + expanded = dis.expand(0, 10) + self.assertEqual(expanded.start, 30) + self.assertEqual(expanded.end, 160) + + def test_expand_zero(self): + dis = _dis(start=30, end=150) + expanded = dis.expand(0) + self.assertEqual(expanded, dis) + + def test_expand_negative_contracts(self): + dis = _dis(start=30, end=150) + contracted = dis.expand(-5, -10) + self.assertEqual(contracted.start, 35) + self.assertEqual(contracted.end, 140) + + def test_expand_contract_to_zero_length(self): + dis = _dis(start=30, end=150) # length=120 + contracted = dis.expand(-60, -60) + self.assertEqual(contracted.start, 90) + self.assertEqual(contracted.end, 90) + self.assertEqual(contracted.length, 0) + + def test_expand_over_contraction_raises(self): + dis = _dis(start=30, end=150) # length=120 + with self.assertRaises(ValueError): + dis.expand(-70, -70) + + def test_expand_opposite_strand(self): + # on_coordinate_strand=False: upstream_step=+1 + # end5=150, end3=30. expand(5): end5 moves to 155, end3 moves to 25 + # start=min(155,25)=25, end=max(155,25)=155 + dis = _dis(start=30, end=150, on_coordinate_strand=False) + expanded = dis.expand(5) + self.assertEqual(expanded.start, 25) + self.assertEqual(expanded.end, 155) + + def test_expand_zero_length_interval(self): + dis = _dis(start=50, end=50) + expanded = dis.expand(5) + self.assertEqual(expanded.start, 45) + self.assertEqual(expanded.end, 55) + self.assertEqual(expanded.length, 10) + + def test_expand_beyond_coordinate(self): + dis = _dis(start=30, end=150) + expanded = dis.expand(50, 0) + self.assertEqual(expanded.start, -20) + + def test_expand_preserves_metadata(self): + dis = _dis(start=30, end=150, coord_id="c", interval_id="i") + expanded = dis.expand(5) + self.assertEqual(expanded.coord_id, "c") + self.assertEqual(expanded.id, "i") + self.assertTrue(expanded.on_coordinate_strand) + + def test_expand_preserves_coordinate_intervals(self): + dis = _dis(start=30, end=150) + expanded = dis.expand(5) + self.assertEqual(expanded.coordinate_intervals, dis.coordinate_intervals) + + +class TestUpstreamOf(unittest.TestCase): + + def test_upstream_of_true(self): + a = _dis(start=10, end=30) + b = _dis(start=50, end=80) + self.assertTrue(a.upstream_of(b)) + + def test_upstream_of_false_overlap(self): + a = _dis(start=10, end=60) + b = _dis(start=50, end=80) + self.assertFalse(a.upstream_of(b)) + + def test_upstream_of_adjacent(self): + a = _dis(start=10, end=50) + b = _dis(start=50, end=80) + self.assertTrue(a.upstream_of(b)) + + def test_upstream_of_same_false(self): + a = _dis(start=30, end=50) + self.assertFalse(a.upstream_of(a)) + + def test_upstream_of_zero_length(self): + a = _dis(start=30, end=30) + b = _dis(start=50, end=80) + self.assertTrue(a.upstream_of(b)) + + def test_upstream_of_zero_length_same_pos(self): + a = _dis(start=50, end=50) + b = _dis(start=50, end=80) + self.assertTrue(a.upstream_of(b)) + + def test_upstream_of_both_zero_length_same_pos(self): + a = _dis(start=50, end=50) + b = _dis(start=50, end=50) + self.assertFalse(a.upstream_of(b)) + + def test_upstream_of_opposite_strand(self): + # on_coordinate_strand=False: upstream_step=+1, upstream = higher indices + # a.start(100) >= b.end(80) → True + a = _dis(start=100, end=150, on_coordinate_strand=False) + b = _dis(start=50, end=80, on_coordinate_strand=False) + self.assertTrue(a.upstream_of(b)) + + def test_different_coord_space_raises(self): + a = _dis(start=10, end=30) + other_ivs = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 500, 600)]) + b = _dis(start=10, end=30, ivs=other_ivs) + with self.assertRaises(ValueError): + a.upstream_of(b) + + def test_different_coord_id_allowed(self): + a = _dis(start=10, end=30, coord_id="a") + b = _dis(start=50, end=80, coord_id="b") + self.assertTrue(a.upstream_of(b)) + + def test_different_on_coord_strand_raises(self): + a = _dis(start=10, end=30, on_coordinate_strand=True) + b = _dis(start=50, end=80, on_coordinate_strand=False) + with self.assertRaises(ValueError): + a.upstream_of(b) + + def test_non_dis_raises(self): + a = _dis(start=10, end=30) + with self.assertRaises(TypeError): + a.upstream_of("not a DIS") + + +class TestDnstreamOf(unittest.TestCase): + + def test_dnstream_of_true(self): + a = _dis(start=50, end=80) + b = _dis(start=10, end=30) + self.assertTrue(a.dnstream_of(b)) + + def test_dnstream_of_false(self): + a = _dis(start=10, end=30) + b = _dis(start=50, end=80) + self.assertFalse(a.dnstream_of(b)) + + def test_dnstream_of_adjacent(self): + a = _dis(start=50, end=80) + b = _dis(start=10, end=50) + self.assertTrue(a.dnstream_of(b)) + + def test_dnstream_of_same_false(self): + a = _dis(start=30, end=50) + self.assertFalse(a.dnstream_of(a)) + + def test_dnstream_of_both_zero_length_same_pos(self): + a = _dis(start=50, end=50) + b = _dis(start=50, end=50) + self.assertFalse(a.dnstream_of(b)) + + def test_dnstream_of_opposite_strand(self): + # on_coordinate_strand=False: upstream_step=+1 + # downstream = lower indices. a.end(80) <= b.start(100) → True + a = _dis(start=50, end=80, on_coordinate_strand=False) + b = _dis(start=100, end=150, on_coordinate_strand=False) + self.assertTrue(a.dnstream_of(b)) + + def test_different_coord_space_raises(self): + a = _dis(start=50, end=80) + other_ivs = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 500, 600)]) + b = _dis(start=10, end=30, ivs=other_ivs) + with self.assertRaises(ValueError): + a.dnstream_of(b) + + def test_different_on_coord_strand_raises(self): + a = _dis(start=50, end=80, on_coordinate_strand=True) + b = _dis(start=30, end=60, on_coordinate_strand=False) + with self.assertRaises(ValueError): + a.dnstream_of(b) + + +class TestWithin(unittest.TestCase): + + def test_within_true(self): + a = _dis(start=30, end=50) + b = _dis(start=10, end=80) + self.assertTrue(a.within(b)) + + def test_within_false(self): + a = _dis(start=10, end=80) + b = _dis(start=30, end=50) + self.assertFalse(a.within(b)) + + def test_within_self(self): + a = _dis(start=30, end=50) + self.assertTrue(a.within(a)) + + def test_within_zero_length(self): + a = _dis(start=50, end=50) + b = _dis(start=10, end=80) + self.assertTrue(a.within(b)) + + def test_within_at_boundary(self): + a = _dis(start=10, end=80) + b = _dis(start=10, end=80) + self.assertTrue(a.within(b)) + + def test_within_zero_length_at_boundary(self): + a = _dis(start=10, end=10) + b = _dis(start=10, end=80) + self.assertTrue(a.within(b)) + + def test_within_zero_length_outside(self): + a = _dis(start=5, end=5) + b = _dis(start=10, end=80) + self.assertFalse(a.within(b)) + + def test_within_opposite_strand(self): + a = _dis(start=80, end=120, on_coordinate_strand=False) + b = _dis(start=50, end=150, on_coordinate_strand=False) + self.assertTrue(a.within(b)) + + def test_different_coord_space_raises(self): + a = _dis(start=30, end=50) + other_ivs = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 500, 600)]) + b = _dis(start=10, end=80, ivs=other_ivs) + with self.assertRaises(ValueError): + a.within(b) + + def test_different_on_coord_strand_raises(self): + a = _dis(start=30, end=50, on_coordinate_strand=True) + b = _dis(start=10, end=80, on_coordinate_strand=False) + with self.assertRaises(ValueError): + a.within(b) + + def test_non_dis_raises(self): + a = _dis(start=30, end=50) + with self.assertRaises(TypeError): + a.within("not a DIS") + + if __name__ == "__main__": unittest.main() From 07ec83aaf3917991046d67c604edded67d06d7d2 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Tue, 31 Mar 2026 22:02:51 -0400 Subject: [PATCH 02/18] docs: shift, expand, upstream, downstream --- docs-src/diseq.rst | 184 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index daad9b7..2ebe806 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -476,3 +476,187 @@ The ``as_positive_strand()`` and ``as_negative_strand()`` methods return Strand methods only affect the interval layer. The coordinate intervals always remain unchanged. + +Shifting and Expanding +====================== + +Both ``shift`` and ``expand`` return a **new** DIS with modified interval +indices. The coordinate space is always unchanged. + +shift +~~~~~ + +``shift(amount)`` moves the interval downstream by ``amount`` bases. +A negative value shifts upstream. The interval length is preserved. + +On the coordinate strand, downstream means increasing indices:: + + Before shift(2): + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<--------->| + end5 end3 + + After shift(2): + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<--------->| + end5 end3 + +On the opposite strand, "downstream" is the reverse direction in index +space, so ``shift(2)`` moves the interval toward *lower* indices:: + + Before shift(2) (on_coordinate_strand=False): + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<--------->| + end3 end5 + + After shift(2): + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<--------->| + end3 end5 + +In code:: + + >>> dis.start, dis.end + (30, 150) + >>> shifted = dis.shift(10) + >>> shifted.start, shifted.end + (40, 160) + >>> shifted.coordinate_intervals == dis.coordinate_intervals + True + + >>> # Negative values shift upstream + >>> dis.shift(-10).start, dis.shift(-10).end + (20, 140) + + >>> # On the opposite strand, downstream reverses in index space + >>> opp = dis.as_opposite_strand() + >>> shifted_opp = opp.shift(10) + >>> shifted_opp.start, shifted_opp.end + (20, 140) + +.. note:: + + ``shift`` can move the interval beyond the coordinate space bounds + (``start < 0`` or ``end > coordinate_length``). + +expand +~~~~~~ + +``expand(upstream, dnstream)`` grows (or shrinks) the interval toward +its 5' and 3' ends. When ``dnstream`` is omitted the expansion is +symmetric:: + + Before expand(1): + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<--------->| + end5 end3 + + After expand(1): + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<----------------->| + end5 end3 + +Negative values contract the interval:: + + Before expand(-1, -1): + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<----------------->| + end5 end3 + + After expand(-1, -1): + DIS Coordinates: 0 1 2 3 4 5 6 7 + |<--------->| + end5 end3 + +In code:: + + >>> dis.start, dis.end + (30, 150) + + >>> # Symmetric expansion + >>> dis.expand(5).start, dis.expand(5).end + (25, 155) + + >>> # Asymmetric expansion + >>> dis.expand(5, 10).start, dis.expand(5, 10).end + (25, 160) + + >>> # Upstream-only expansion + >>> dis.expand(5, 0).start, dis.expand(5, 0).end + (25, 150) + + >>> # Contraction with negative values + >>> dis.expand(-10, -20).start, dis.expand(-10, -20).end + (40, 130) + +.. note:: + + Contracting to exactly zero length is valid, but contracting past + zero raises ``ValueError``. + +Positional Comparisons +====================== + +``upstream_of`` and ``dnstream_of`` compare two DIS intervals that share +the same coordinate space and the same ``on_coordinate_strand``. Both +methods require strict separation — any overlap returns ``False``. + +upstream_of +~~~~~~~~~~~ + +``upstream_of(other)`` returns ``True`` if ``self`` is strictly 5' of +``other`` with no overlap. Adjacent intervals (where ``self.end`` equals +``other.start``) count as upstream:: + + DIS Coordinates: 0 1 2 3 4 5 6 7 8 9 + |<->| |<->| + a b + a.upstream_of(b) is True (no overlap) + + DIS Coordinates: 0 1 2 3 4 5 6 7 8 9 + |<----->| + a |<----->| + b + a.upstream_of(b) is True (adjacent: a.end == b.start) + + DIS Coordinates: 0 1 2 3 4 5 6 7 8 9 + |<--------->| + a |<----->| + b + a.upstream_of(b) is False (overlap) + +In code:: + + >>> a = DisjointIntervalSequence(coord_ivs, start=10, end=30) + >>> b = DisjointIntervalSequence(coord_ivs, start=50, end=80) + >>> a.upstream_of(b) + True + >>> b.upstream_of(a) + False + + >>> # Adjacent intervals count as upstream + >>> a2 = DisjointIntervalSequence(coord_ivs, start=10, end=50) + >>> a2.upstream_of(b) + True + +.. note:: + + Both intervals must share the same ``coordinate_intervals`` and the + same ``on_coordinate_strand``, otherwise ``ValueError`` is raised. + Two zero-length intervals at the same position are neither upstream + nor downstream of each other. + +dnstream_of +~~~~~~~~~~~ + +``dnstream_of(other)`` is the mirror of ``upstream_of``: it returns +``True`` if ``self`` is strictly 3' of ``other`` with no overlap. +Adjacent intervals count as downstream. The same requirements on shared +coordinate space and strand apply:: + + >>> a = DisjointIntervalSequence(coord_ivs, start=50, end=80) + >>> b = DisjointIntervalSequence(coord_ivs, start=10, end=30) + >>> a.dnstream_of(b) + True + >>> b.dnstream_of(a) + False From 0ed249d0096550fcb6c7dc3dc271f9b39b715df7 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 00:17:53 -0400 Subject: [PATCH 03/18] update tests with changes in pt1 pr --- genome_kit/diseq.py | 10 +++++----- tests/test_diseq.py | 24 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index e9c18a1..06bc8b3 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -490,8 +490,8 @@ def _from_end_indices(self, end5: int, end3: int) -> "DisjointIntervalSequence": ) return DisjointIntervalSequence( self._coordinate_intervals, - coord_id=self._coord_metadata.id, - interval_id=self._interval_metadata.id, + coord_name=self._coord_metadata.name, + interval_name=self._interval_metadata.name, on_coordinate_strand=self.on_coordinate_strand, start=min(end5, end3), end=max(end5, end3), @@ -581,7 +581,7 @@ def is_positive_strand(self) -> bool: ------- :py:class:`bool` """ - if self.transcript_strand == "+": + if self.strand == "+": return True return False @@ -625,8 +625,8 @@ def as_opposite_strand(self) -> "DisjointIntervalSequence": """ return DisjointIntervalSequence( self._coordinate_intervals, - coord_id=self._coord_metadata.id, - interval_id=self._interval_metadata.id, + coord_name=self._coord_metadata.name, + interval_name=self._interval_metadata.name, on_coordinate_strand=not self.on_coordinate_strand, start=self._start, end=self._end, diff --git a/tests/test_diseq.py b/tests/test_diseq.py index 143baed..68fe147 100644 --- a/tests/test_diseq.py +++ b/tests/test_diseq.py @@ -625,13 +625,13 @@ def test_eq_non_dis(self): def _dis( - start=0, end=200, on_coordinate_strand=True, coord_id="c", interval_id="i", ivs=None + start=0, end=200, on_coordinate_strand=True, coord_name="c", interval_name="i", ivs=None ): """Quick DIS factory for tests.""" return DisjointIntervalSequence( ivs or _COORD_IVS, - coord_id=coord_id, - interval_id=interval_id, + coord_name=coord_name, + interval_name=interval_name, on_coordinate_strand=on_coordinate_strand, start=start, end=end, @@ -685,10 +685,10 @@ def test_shift_opposite_strand(self): self.assertEqual(shifted.end, 140) def test_shift_preserves_metadata(self): - dis = _dis(start=30, end=150, coord_id="mycoord", interval_id="myiv") + dis = _dis(start=30, end=150, coord_name="mycoord", interval_name="myiv") shifted = dis.shift(10) - self.assertEqual(shifted.coord_id, "mycoord") - self.assertEqual(shifted.id, "myiv") + self.assertEqual(shifted.coord_name, "mycoord") + self.assertEqual(shifted.name, "myiv") self.assertTrue(shifted.on_coordinate_strand) def test_shift_preserves_coordinate_intervals(self): @@ -768,10 +768,10 @@ def test_expand_beyond_coordinate(self): self.assertEqual(expanded.start, -20) def test_expand_preserves_metadata(self): - dis = _dis(start=30, end=150, coord_id="c", interval_id="i") + dis = _dis(start=30, end=150, coord_name="c", interval_name="i") expanded = dis.expand(5) - self.assertEqual(expanded.coord_id, "c") - self.assertEqual(expanded.id, "i") + self.assertEqual(expanded.coord_name, "c") + self.assertEqual(expanded.name, "i") self.assertTrue(expanded.on_coordinate_strand) def test_expand_preserves_coordinate_intervals(self): @@ -830,9 +830,9 @@ def test_different_coord_space_raises(self): with self.assertRaises(ValueError): a.upstream_of(b) - def test_different_coord_id_allowed(self): - a = _dis(start=10, end=30, coord_id="a") - b = _dis(start=50, end=80, coord_id="b") + def test_different_coord_name_allowed(self): + a = _dis(start=10, end=30, coord_name="a") + b = _dis(start=50, end=80, coord_name="b") self.assertTrue(a.upstream_of(b)) def test_different_on_coord_strand_raises(self): From 1926c437b2c4254094a0872f1a2d5c9dc9247606 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 01:02:18 -0400 Subject: [PATCH 04/18] add missing test cases --- tests/test_diseq.py | 156 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 154 insertions(+), 2 deletions(-) diff --git a/tests/test_diseq.py b/tests/test_diseq.py index 68fe147..1ed6d9d 100644 --- a/tests/test_diseq.py +++ b/tests/test_diseq.py @@ -192,6 +192,7 @@ class TestFromTranscript(unittest.TestCase): def setUp(self): self.genome = MiniGenome("gencode.v41") self.transcript = self.genome.transcripts["ENST00000233331.12"] + self.neg_transcript = self.genome.transcripts["ENST00000448666.7"] def test_exons_region(self): dis = DisjointIntervalSequence.from_transcript(self.transcript, region="exons") @@ -232,6 +233,23 @@ def test_custom_id_overrides(self): self.assertEqual(dis.coord_name, "custom_coord") self.assertEqual(dis.name, "custom_iv") + def test_negative_strand_exons(self): + dis = DisjointIntervalSequence.from_transcript(self.neg_transcript, region="exons") + expected = tuple(e.interval for e in self.neg_transcript.exons) + self.assertEqual(dis.coordinate_intervals, expected) + self.assertEqual(dis.coord_strand, "-") + + def test_negative_strand_cds(self): + dis = DisjointIntervalSequence.from_transcript(self.neg_transcript, region="cds") + expected = tuple(c.interval for c in self.neg_transcript.cdss) + self.assertEqual(dis.coordinate_intervals, expected) + + def test_negative_strand_metadata(self): + dis = DisjointIntervalSequence.from_transcript(self.neg_transcript) + self.assertEqual(dis.coord_name, self.neg_transcript.id) + self.assertEqual(dis.coord_strand, "-") + self.assertEqual(dis.chromosome, self.neg_transcript.chromosome) + class TestProperties(unittest.TestCase): @@ -400,6 +418,27 @@ def test_as_opposite_strand_preserves_start_end(self): self.assertEqual(opp.start, 10) self.assertEqual(opp.end, 80) + def test_as_opposite_strand_preserves_metadata(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, coord_name="c", interval_name="i") + opp = dis.as_opposite_strand() + self.assertEqual(opp.coord_name, "c") + self.assertEqual(opp.name, "i") + + def test_as_positive_strand_preserves_start_end(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False, start=10, end=80) + result = dis.as_positive_strand() + self.assertEqual(result.start, 10) + self.assertEqual(result.end, 80) + + def test_as_negative_strand_preserves_start_end(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True, start=10, end=80) + result = dis.as_negative_strand() + self.assertEqual(result.start, 10) + self.assertEqual(result.end, 80) + def test_end5_end3_swap_on_opposite_strand(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) dis = DisjointIntervalSequence(ivs, start=10, end=80) @@ -562,13 +601,13 @@ def test_eq_same(self): b = DisjointIntervalSequence(ivs, coord_name="x", interval_name="i") self.assertEqual(a, b) - def test_eq_different_coord_id(self): + def test_eq_different_coord_name(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) a = DisjointIntervalSequence(ivs, coord_name="x") b = DisjointIntervalSequence(ivs, coord_name="y") self.assertNotEqual(a, b) - def test_eq_different_interval_id(self): + def test_eq_different_interval_name(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) a = DisjointIntervalSequence(ivs, interval_name="x") b = DisjointIntervalSequence(ivs, interval_name="y") @@ -622,6 +661,7 @@ def test_eq_non_dis(self): # Helper for shift/expand/relational tests # 2 exons on chr1+: [100,200) and [300,400), coordinate_length=200 _COORD_IVS = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 300, 400)]) +_NEG_COORD_IVS = _make_intervals([("chr1", "-", 100, 200), ("chr1", "-", 300, 400)]) def _dis( @@ -638,6 +678,18 @@ def _dis( ) +def _neg_dis(start=0, end=200, on_coordinate_strand=True): + """Quick DIS factory for negative-strand coordinate interval tests.""" + return DisjointIntervalSequence( + _NEG_COORD_IVS, + coord_name="c", + interval_name="i", + on_coordinate_strand=on_coordinate_strand, + start=start, + end=end, + ) + + class TestShift(unittest.TestCase): def test_shift_positive(self): @@ -684,6 +736,14 @@ def test_shift_opposite_strand(self): self.assertEqual(shifted.start, 20) self.assertEqual(shifted.end, 140) + def test_shift_opposite_strand_negative_shift(self): + # on_coordinate_strand=False: upstream_step=+1, downstream=-1 + # shift 10 upstream → add 10 to both + dis = _dis(start=30, end=150, on_coordinate_strand=False) + shifted = dis.shift(-10) + self.assertEqual(shifted.start, 40) + self.assertEqual(shifted.end, 160) + def test_shift_preserves_metadata(self): dis = _dis(start=30, end=150, coord_name="mycoord", interval_name="myiv") shifted = dis.shift(10) @@ -691,11 +751,33 @@ def test_shift_preserves_metadata(self): self.assertEqual(shifted.name, "myiv") self.assertTrue(shifted.on_coordinate_strand) + def test_shift_preserves_metadata_opposite_strand(self): + dis = _dis( + start=30, end=150, coord_name="mycoord", interval_name="myiv", + on_coordinate_strand=False, + ) + shifted = dis.shift(10) + self.assertEqual(shifted.coord_name, "mycoord") + self.assertEqual(shifted.name, "myiv") + self.assertFalse(shifted.on_coordinate_strand) + def test_shift_preserves_coordinate_intervals(self): dis = _dis(start=30, end=150) shifted = dis.shift(10) self.assertEqual(shifted.coordinate_intervals, dis.coordinate_intervals) + def test_shift_negative_strand_coords(self): + dis = _neg_dis(start=30, end=150) + shifted = dis.shift(10) + self.assertEqual(shifted.start, 40) + self.assertEqual(shifted.end, 160) + + def test_shift_negative_strand_coords_opposite(self): + dis = _neg_dis(start=30, end=150, on_coordinate_strand=False) + shifted = dis.shift(10) + self.assertEqual(shifted.start, 20) + self.assertEqual(shifted.end, 140) + class TestExpand(unittest.TestCase): @@ -755,6 +837,22 @@ def test_expand_opposite_strand(self): self.assertEqual(expanded.start, 25) self.assertEqual(expanded.end, 155) + def test_expand_opposite_strand_upstream_only(self): + # on_coordinate_strand=False: upstream_step=+1 + # end5=150, end3=30. expand(5, 0): end5 moves to 155, end3 stays at 30 + dis = _dis(start=30, end=150, on_coordinate_strand=False) + expanded = dis.expand(5, 0) + self.assertEqual(expanded.start, 30) + self.assertEqual(expanded.end, 155) + + def test_expand_opposite_strand_downstream_only(self): + # on_coordinate_strand=False: upstream_step=+1 + # end5=150, end3=30. expand(0, 5): end5 stays at 150, end3 moves to 25 + dis = _dis(start=30, end=150, on_coordinate_strand=False) + expanded = dis.expand(0, 5) + self.assertEqual(expanded.start, 25) + self.assertEqual(expanded.end, 150) + def test_expand_zero_length_interval(self): dis = _dis(start=50, end=50) expanded = dis.expand(5) @@ -774,11 +872,33 @@ def test_expand_preserves_metadata(self): self.assertEqual(expanded.name, "i") self.assertTrue(expanded.on_coordinate_strand) + def test_expand_preserves_metadata_opposite_strand(self): + dis = _dis( + start=30, end=150, coord_name="c", interval_name="i", + on_coordinate_strand=False, + ) + expanded = dis.expand(5) + self.assertEqual(expanded.coord_name, "c") + self.assertEqual(expanded.name, "i") + self.assertFalse(expanded.on_coordinate_strand) + def test_expand_preserves_coordinate_intervals(self): dis = _dis(start=30, end=150) expanded = dis.expand(5) self.assertEqual(expanded.coordinate_intervals, dis.coordinate_intervals) + def test_expand_negative_strand_coords(self): + dis = _neg_dis(start=30, end=150) + expanded = dis.expand(5, 10) + self.assertEqual(expanded.start, 25) + self.assertEqual(expanded.end, 160) + + def test_expand_negative_strand_coords_opposite(self): + dis = _neg_dis(start=30, end=150, on_coordinate_strand=False) + expanded = dis.expand(5) + self.assertEqual(expanded.start, 25) + self.assertEqual(expanded.end, 155) + class TestUpstreamOf(unittest.TestCase): @@ -846,6 +966,16 @@ def test_non_dis_raises(self): with self.assertRaises(TypeError): a.upstream_of("not a DIS") + def test_upstream_of_negative_strand_coords(self): + a = _neg_dis(start=10, end=30) + b = _neg_dis(start=50, end=80) + self.assertTrue(a.upstream_of(b)) + + def test_upstream_of_negative_strand_coords_false(self): + a = _neg_dis(start=50, end=80) + b = _neg_dis(start=10, end=30) + self.assertFalse(a.upstream_of(b)) + class TestDnstreamOf(unittest.TestCase): @@ -893,6 +1023,16 @@ def test_different_on_coord_strand_raises(self): with self.assertRaises(ValueError): a.dnstream_of(b) + def test_dnstream_of_negative_strand_coords(self): + a = _neg_dis(start=50, end=80) + b = _neg_dis(start=10, end=30) + self.assertTrue(a.dnstream_of(b)) + + def test_dnstream_of_negative_strand_coords_false(self): + a = _neg_dis(start=10, end=30) + b = _neg_dis(start=50, end=80) + self.assertFalse(a.dnstream_of(b)) + class TestWithin(unittest.TestCase): @@ -923,7 +1063,9 @@ def test_within_at_boundary(self): def test_within_zero_length_at_boundary(self): a = _dis(start=10, end=10) b = _dis(start=10, end=80) + c = _dis(start=80, end=80) self.assertTrue(a.within(b)) + self.assertTrue(c.within(b)) def test_within_zero_length_outside(self): a = _dis(start=5, end=5) @@ -953,6 +1095,16 @@ def test_non_dis_raises(self): with self.assertRaises(TypeError): a.within("not a DIS") + def test_within_negative_strand_coords(self): + a = _neg_dis(start=30, end=50) + b = _neg_dis(start=10, end=80) + self.assertTrue(a.within(b)) + + def test_within_negative_strand_coords_false(self): + a = _neg_dis(start=10, end=80) + b = _neg_dis(start=30, end=50) + self.assertFalse(a.within(b)) + if __name__ == "__main__": unittest.main() From 794ca8867381b739e2e9cc3f9582ee23651cc921 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 01:50:09 -0400 Subject: [PATCH 05/18] add flip_strand method --- docs-src/diseq.rst | 57 ++++++++++++++++++----------- genome_kit/diseq.py | 34 ++++++++++++++++-- tests/test_diseq.py | 88 ++++++++++++++++++++++++++++++++------------- 3 files changed, 130 insertions(+), 49 deletions(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index 2ebe806..2c5be20 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -178,7 +178,7 @@ Consider a transcript on the negative strand: DNA Sequence (-): | G T C A G T C A G T C A G T | Genomic Coordinates: 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 Negative Strand -Taking just the exons: +Extracting only the exons: :: 3' Exon3 Exon2 Exon1 5' | |<------->| |<--->| |<--->| | @@ -274,7 +274,7 @@ The most common way to create a DIS is from a >>> dis = DisjointIntervalSequence.from_transcript(transcript) By default, the coordinate space is built from the transcript's exons. -You can also specify a region to use CDS or UTR intervals:: +It's also possible to specify a region to use CDS or UTR intervals:: >>> dis_cds = DisjointIntervalSequence.from_transcript(transcript, region="cds") >>> dis_utr5 = DisjointIntervalSequence.from_transcript(transcript, region="utr5") @@ -289,7 +289,7 @@ be overridden:: From Intervals ~~~~~~~~~~~~~~ -You can also construct a DIS from any sequence of +A DIS can be constructed from any sequence of :py:class:`~genome_kit.Interval` objects:: >>> from genome_kit import Interval @@ -383,6 +383,7 @@ When ``on_coordinate_strand`` is ``False``, the mapping reverses: Opposite Strand :: + >>> opp = dis.as_opposite_strand() >>> opp.end5_index # same as end when off coordinate strand 7 @@ -423,47 +424,61 @@ interval is on the same strand as the coordinate intervals:: >>> dis.is_positive_strand() True -``as_opposite_strand()`` creates a new DIS with the interval on the other -strand. The ``start`` and ``end`` indices are preserved — only -``on_coordinate_strand`` is flipped:: +Three methods change the interval's strand. All preserve ``start``, +``end``, and the coordinate intervals. + +``as_opposite_strand()`` sets ``on_coordinate_strand`` to ``False``, +returning ``self`` if already on the opposite strand:: - Before (on_coordinate_strand=False): + Before as_opposite_strand() (on_coordinate_strand=True): Start Index: 1 End Index: 6 DIS Coordinates: 0 1 2 3 4 5 6 7 DNA Sequence (+): T A A C C C T + |<------------->| ----------------------------------------------------- DNA Sequence (-): A T T G G G A DIS Coordinates: 0 1 2 3 4 5 6 7 - |<------------->| Opposite Strand - After as_opposite_strand() (on_coordinate_strand=True): + After as_opposite_strand() (on_coordinate_strand=False): Start Index: 1 End Index: 6 DIS Coordinates: 0 1 2 3 4 5 6 7 DNA Sequence (+): T A A C C C T - |<------------->| ----------------------------------------------------- DNA Sequence (-): A T T G G G A DIS Coordinates: 0 1 2 3 4 5 6 7 + |<------------->| Opposite Strand -In code:: +:: + >>> dis.on_coordinate_strand - False - >>> dis.is_positive_strand() - False + True >>> opposite = dis.as_opposite_strand() >>> opposite.on_coordinate_strand + False + >>> opposite.start == dis.start # start/end unchanged True - >>> opposite.is_positive_strand() + +``as_same_strand()`` sets ``on_coordinate_strand`` to ``True``, +returning ``self`` if already on the coordinate strand:: + + >>> dis.on_coordinate_strand True - >>> opposite.start == dis.start # start/end unchanged + >>> dis.as_same_strand() is dis True - >>> opposite.end == dis.end + +``flip_strand()`` toggles ``on_coordinate_strand`` (always returns a +new DIS):: + + >>> dis.on_coordinate_strand True - >>> opposite.coordinate_intervals == dis.coordinate_intervals + >>> flipped = dis.flip_strand() + >>> flipped.on_coordinate_strand + False + >>> flipped.flip_strand().on_coordinate_strand True The ``as_positive_strand()`` and ``as_negative_strand()`` methods return @@ -514,7 +529,7 @@ space, so ``shift(2)`` moves the interval toward *lower* indices:: |<--------->| end3 end5 -In code:: +:: >>> dis.start, dis.end (30, 150) @@ -568,7 +583,7 @@ Negative values contract the interval:: |<--------->| end5 end3 -In code:: +:: >>> dis.start, dis.end (30, 150) @@ -625,7 +640,7 @@ upstream_of b a.upstream_of(b) is False (overlap) -In code:: +:: >>> a = DisjointIntervalSequence(coord_ivs, start=10, end=30) >>> b = DisjointIntervalSequence(coord_ivs, start=50, end=80) diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index 06bc8b3..0422890 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -597,7 +597,7 @@ def as_positive_strand(self) -> "DisjointIntervalSequence": """ if self.is_positive_strand(): return self - return self.as_opposite_strand() + return self.flip_strand() def as_negative_strand(self) -> "DisjointIntervalSequence": """Return a DIS with the interval on the negative strand. @@ -611,10 +611,38 @@ def as_negative_strand(self) -> "DisjointIntervalSequence": """ if not self.is_positive_strand(): return self - return self.as_opposite_strand() + return self.flip_strand() def as_opposite_strand(self) -> "DisjointIntervalSequence": - """Return a new DIS with the interval on the opposite strand. + """Return a DIS with the interval on the opposite strand. + + Returns ``self`` if already on the opposite strand. The coordinate + intervals are unchanged; only the interval strand is affected. + + Returns + ------- + :py:class:`DisjointIntervalSequence` + """ + if not self.on_coordinate_strand: + return self + return self.flip_strand() + + def as_same_strand(self) -> "DisjointIntervalSequence": + """Return a DIS with the interval on the coordinate strand. + + Returns ``self`` if already on the coordinate strand. The coordinate + intervals are unchanged; only the interval strand is affected. + + Returns + ------- + :py:class:`DisjointIntervalSequence` + """ + if self.on_coordinate_strand: + return self + return self.flip_strand() + + def flip_strand(self) -> "DisjointIntervalSequence": + """Return a new DIS with ``on_coordinate_strand`` toggled. The coordinate intervals are unchanged. The interval's ``on_coordinate_strand`` is flipped. diff --git a/tests/test_diseq.py b/tests/test_diseq.py index 1ed6d9d..4b5bfb8 100644 --- a/tests/test_diseq.py +++ b/tests/test_diseq.py @@ -392,31 +392,58 @@ def test_as_negative_strand_flips(self): self.assertEqual(result.end, 80) self.assertEqual(result, expected) - def test_as_opposite_strand(self): + def test_flip_strand(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) - opp = dis.as_opposite_strand() - self.assertFalse(opp.is_positive_strand()) - opp2 = opp.as_opposite_strand() - self.assertTrue(opp2.is_positive_strand()) + flipped = dis.flip_strand() + self.assertFalse(flipped.on_coordinate_strand) + flipped2 = flipped.flip_strand() + self.assertTrue(flipped2.on_coordinate_strand) - def test_strand_flip_preserves_coordinate_intervals(self): + def test_flip_strand_preserves_coordinate_intervals(self): ivs = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 300, 400)]) dis = DisjointIntervalSequence(ivs) - flipped = dis.as_opposite_strand() + flipped = dis.flip_strand() self.assertEqual(flipped.coordinate_intervals, dis.coordinate_intervals) - def test_idempotency(self): + def test_flip_strand_preserves_start_end(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) - dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) - self.assertIs(dis.as_positive_strand().as_positive_strand(), dis) + dis = DisjointIntervalSequence(ivs, start=10, end=80) + flipped = dis.flip_strand() + self.assertEqual(flipped.start, 10) + self.assertEqual(flipped.end, 80) - def test_as_opposite_strand_preserves_start_end(self): + def test_flip_strand_preserves_metadata(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, coord_name="c", interval_name="i") + flipped = dis.flip_strand() + self.assertEqual(flipped.coord_name, "c") + self.assertEqual(flipped.name, "i") + + def test_end5_end3_swap_on_flip_strand(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) dis = DisjointIntervalSequence(ivs, start=10, end=80) - opp = dis.as_opposite_strand() - self.assertEqual(opp.start, 10) - self.assertEqual(opp.end, 80) + # On coordinate strand: end5 at start, end3 at end + self.assertEqual(dis.end5_index, 10) + self.assertEqual(dis.end3_index, 80) + # Flipped: end5 at end, end3 at start + flipped = dis.flip_strand() + self.assertEqual(flipped.end5_index, 80) + self.assertEqual(flipped.end3_index, 10) + + def test_as_opposite_strand_already_opposite(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False) + result = dis.as_opposite_strand() + self.assertIs(result, dis) + + def test_as_opposite_strand_from_same(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True, start=10, end=80) + result = dis.as_opposite_strand() + self.assertFalse(result.on_coordinate_strand) + self.assertEqual(result.start, 10) + self.assertEqual(result.end, 80) def test_as_opposite_strand_preserves_metadata(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) @@ -425,6 +452,28 @@ def test_as_opposite_strand_preserves_metadata(self): self.assertEqual(opp.coord_name, "c") self.assertEqual(opp.name, "i") + def test_as_same_strand_already_same(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + result = dis.as_same_strand() + self.assertIs(result, dis) + + def test_as_same_strand_flips(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False, start=10, end=80) + result = dis.as_same_strand() + self.assertTrue(result.on_coordinate_strand) + self.assertEqual(result.start, 10) + self.assertEqual(result.end, 80) + + def test_idempotency(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis_pos = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + self.assertIs(dis_pos.as_positive_strand().as_positive_strand(), dis_pos) + self.assertIs(dis_pos.as_same_strand().as_same_strand(), dis_pos) + dis_opp = DisjointIntervalSequence(ivs, on_coordinate_strand=False) + self.assertIs(dis_opp.as_opposite_strand().as_opposite_strand(), dis_opp) + def test_as_positive_strand_preserves_start_end(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False, start=10, end=80) @@ -439,17 +488,6 @@ def test_as_negative_strand_preserves_start_end(self): self.assertEqual(result.start, 10) self.assertEqual(result.end, 80) - def test_end5_end3_swap_on_opposite_strand(self): - ivs = _make_intervals([("chr1", "+", 100, 200)]) - dis = DisjointIntervalSequence(ivs, start=10, end=80) - # On coordinate strand: end5 at start, end3 at end - self.assertEqual(dis.end5_index, 10) - self.assertEqual(dis.end3_index, 80) - # Off coordinate strand: end5 at end, end3 at start - opp = dis.as_opposite_strand() - self.assertEqual(opp.end5_index, 80) - self.assertEqual(opp.end3_index, 10) - class TestEndProperties(unittest.TestCase): From 4e1238d0e005e53dce5d7dd5b188c7b9e21e1829 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 01:53:41 -0400 Subject: [PATCH 06/18] remove genomic_span --- genome_kit/diseq.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index 0422890..4d58865 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -660,24 +660,6 @@ def flip_strand(self) -> "DisjointIntervalSequence": end=self._end, ) - def genomic_span(self) -> Interval: - """Smallest single Interval spanning all coordinate intervals. - - Returns - ------- - :py:class:`~genome_kit.Interval` - An interval from the minimum ``start`` to the maximum ``end`` - across all coordinate intervals. - """ - ivs = self._coordinate_intervals - return Interval( - ivs[0].chromosome, - ivs[0].strand, - min(iv.start for iv in ivs), - max(iv.end for iv in ivs), - ivs[0].reference_genome, - ) - def __len__(self) -> int: """Return the length of the interval.""" return self.length From 6e0858ab40a836056c7b2312d1a4391a9c3da6ab Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 13:55:14 -0400 Subject: [PATCH 07/18] remove unused private function set_end3/5 --- genome_kit/diseq.py | 42 ------------------------------------------ 1 file changed, 42 deletions(-) diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index 4d58865..28a0a61 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -408,48 +408,6 @@ def length(self) -> int: """Length of the interval on the coordinate space.""" return self.end - self.start - def _set_end5(self, end5: int) -> "DisjointIntervalSequence": - """Convenience method to update start/end based on a new end5 index.""" - if end5 == self.end5_index: - return self # No change - new_start, new_end = self._start, self._end - end5_difference = end5 - self.end5_index - is_moved_upstream = end5_difference * self._upstream_index_step() > 0 - if is_moved_upstream and self._upstream_index_step() == -1: - new_start = new_start - abs(end5_difference) - elif is_moved_upstream and self._upstream_index_step() == 1: - new_end = new_end + abs(end5_difference) - elif not is_moved_upstream and self._upstream_index_step() == -1: - new_start = new_start + abs(end5_difference) - elif not is_moved_upstream and self._upstream_index_step() == 1: - new_end = new_end - abs(end5_difference) - if new_start > new_end: - raise ValueError( - f"Invalid end5 update: end5 index {end5} would be downstream of end3 index {self.end3_index}" - ) - return self._from_end_indices(new_start, new_end) - - def _set_end3(self, end3: int) -> "DisjointIntervalSequence": - """Convenience method to update start/end based on a new end3 index.""" - if end3 == self.end3_index: - return self # No change - new_start, new_end = self._start, self._end - end3_difference = end3 - self.end3_index - is_moved_upstream = end3_difference * self._upstream_index_step() > 0 - if is_moved_upstream and self._upstream_index_step() == -1: - new_end = new_end - abs(end3_difference) - elif is_moved_upstream and self._upstream_index_step() == 1: - new_start = new_start + abs(end3_difference) - elif not is_moved_upstream and self._upstream_index_step() == -1: - new_end = new_end + abs(end3_difference) - elif not is_moved_upstream and self._upstream_index_step() == 1: - new_start = new_start - abs(end3_difference) - if new_start > new_end: - raise ValueError( - f"Invalid end3 update: end3 index {end3} would be upstream of end5 index {self.end5_index}" - ) - return self._from_end_indices(new_start, new_end) - def _upstream_index_step(self, on_coordinate_strand: bool | None = None) -> int: """Return +1 or -1 indicating the upstream direction in index space. From aae2f740aadaedfba88464ca03d7e69e83452ab6 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 14:44:54 -0400 Subject: [PATCH 08/18] add is_same_strand and is_opposite_strand methods --- docs-src/diseq.rst | 11 ++++++++++- genome_kit/diseq.py | 18 ++++++++++++++++++ tests/test_diseq.py | 27 +++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index 2c5be20..26a11a0 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -405,7 +405,7 @@ available as properties:: Strand Methods ============== -A DIS interval can sit on either strand independently of the coordinate +A DIS interval can sit on either 'virtual' strand independently of the coordinate intervals. The ``on_coordinate_strand`` property indicates whether the interval is on the same strand as the coordinate intervals:: On Coordinate Strand: True @@ -421,9 +421,18 @@ interval is on the same strand as the coordinate intervals:: >>> dis.on_coordinate_strand True + >>> dis.is_same_strand() + True + >>> dis.is_opposite_strand() + False >>> dis.is_positive_strand() True +``is_same_strand()`` and ``is_opposite_strand()`` test whether the interval +is on the coordinate strand or its complement. ``is_positive_strand()`` +tests the effective genomic strand (accounting for both ``coord_strand`` +and ``on_coordinate_strand``). + Three methods change the interval's strand. All preserve ``start``, ``end``, and the coordinate intervals. diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index 28a0a61..7404e2f 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -532,6 +532,24 @@ def within(self, other: "DisjointIntervalSequence") -> bool: raise ValueError("Cannot compare: intervals are on different strands") return self._start >= other.start and self._end <= other.end + def is_same_strand(self) -> bool: + """True if the interval is on the same strand as the coordinate intervals. + + Returns + ------- + :py:class:`bool` + """ + return self.on_coordinate_strand + + def is_opposite_strand(self) -> bool: + """True if the interval is on the opposite strand from the coordinate intervals. + + Returns + ------- + :py:class:`bool` + """ + return not self.on_coordinate_strand + def is_positive_strand(self) -> bool: """If the interval is on the positive strand. diff --git a/tests/test_diseq.py b/tests/test_diseq.py index 4b5bfb8..e846836 100644 --- a/tests/test_diseq.py +++ b/tests/test_diseq.py @@ -332,6 +332,33 @@ def test_length_zero(self): class TestStrandMethods(unittest.TestCase): + def test_is_same_strand_true(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + self.assertTrue(dis.is_same_strand()) + + def test_is_same_strand_false(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False) + self.assertFalse(dis.is_same_strand()) + + def test_is_opposite_strand_true(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False) + self.assertTrue(dis.is_opposite_strand()) + + def test_is_opposite_strand_false(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + self.assertFalse(dis.is_opposite_strand()) + + def test_is_same_opposite_strand_mutually_exclusive(self): + ivs = _make_intervals([("chr1", "+", 100, 200)]) + dis_same = DisjointIntervalSequence(ivs, on_coordinate_strand=True) + self.assertNotEqual(dis_same.is_same_strand(), dis_same.is_opposite_strand()) + dis_opp = DisjointIntervalSequence(ivs, on_coordinate_strand=False) + self.assertNotEqual(dis_opp.is_same_strand(), dis_opp.is_opposite_strand()) + def test_is_positive_strand_plus_on_coord(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) From d9fc540cf0bb25059af4b2d380b6bb3a32fd5505 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 14:48:28 -0400 Subject: [PATCH 09/18] remove is_opposite_strand --- docs-src/diseq.rst | 9 +++------ genome_kit/diseq.py | 9 --------- tests/test_diseq.py | 17 ----------------- 3 files changed, 3 insertions(+), 32 deletions(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index 26a11a0..5e823dc 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -423,15 +423,12 @@ interval is on the same strand as the coordinate intervals:: True >>> dis.is_same_strand() True - >>> dis.is_opposite_strand() - False >>> dis.is_positive_strand() True -``is_same_strand()`` and ``is_opposite_strand()`` test whether the interval -is on the coordinate strand or its complement. ``is_positive_strand()`` -tests the effective genomic strand (accounting for both ``coord_strand`` -and ``on_coordinate_strand``). +``is_same_strand()`` tests whether the interval is on the coordinate +strand. ``is_positive_strand()`` tests the effective genomic strand +(accounting for both ``coord_strand`` and ``on_coordinate_strand``). Three methods change the interval's strand. All preserve ``start``, ``end``, and the coordinate intervals. diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index 7404e2f..a9b5957 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -541,15 +541,6 @@ def is_same_strand(self) -> bool: """ return self.on_coordinate_strand - def is_opposite_strand(self) -> bool: - """True if the interval is on the opposite strand from the coordinate intervals. - - Returns - ------- - :py:class:`bool` - """ - return not self.on_coordinate_strand - def is_positive_strand(self) -> bool: """If the interval is on the positive strand. diff --git a/tests/test_diseq.py b/tests/test_diseq.py index e846836..a1ee79d 100644 --- a/tests/test_diseq.py +++ b/tests/test_diseq.py @@ -342,23 +342,6 @@ def test_is_same_strand_false(self): dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False) self.assertFalse(dis.is_same_strand()) - def test_is_opposite_strand_true(self): - ivs = _make_intervals([("chr1", "+", 100, 200)]) - dis = DisjointIntervalSequence(ivs, on_coordinate_strand=False) - self.assertTrue(dis.is_opposite_strand()) - - def test_is_opposite_strand_false(self): - ivs = _make_intervals([("chr1", "+", 100, 200)]) - dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) - self.assertFalse(dis.is_opposite_strand()) - - def test_is_same_opposite_strand_mutually_exclusive(self): - ivs = _make_intervals([("chr1", "+", 100, 200)]) - dis_same = DisjointIntervalSequence(ivs, on_coordinate_strand=True) - self.assertNotEqual(dis_same.is_same_strand(), dis_same.is_opposite_strand()) - dis_opp = DisjointIntervalSequence(ivs, on_coordinate_strand=False) - self.assertNotEqual(dis_opp.is_same_strand(), dis_opp.is_opposite_strand()) - def test_is_positive_strand_plus_on_coord(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) dis = DisjointIntervalSequence(ivs, on_coordinate_strand=True) From aef41f451e3f5999900c68358aa4faa0589f9f3d Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 15:17:53 -0400 Subject: [PATCH 10/18] update docs with strand methods --- docs-src/diseq.rst | 51 ++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index 5e823dc..8eae127 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -430,11 +430,11 @@ interval is on the same strand as the coordinate intervals:: strand. ``is_positive_strand()`` tests the effective genomic strand (accounting for both ``coord_strand`` and ``on_coordinate_strand``). -Three methods change the interval's strand. All preserve ``start``, -``end``, and the coordinate intervals. +Five methods change the interval's strand. All preserve ``start``, +``end``, and the coordinate intervals. These methods return a DIS on the requested +strand, instead of modifying the existing DIS in-place. -``as_opposite_strand()`` sets ``on_coordinate_strand`` to ``False``, -returning ``self`` if already on the opposite strand:: +``as_opposite_strand()`` sets ``on_coordinate_strand`` to ``False``:: Before as_opposite_strand() (on_coordinate_strand=True): Start Index: 1 @@ -468,16 +468,17 @@ returning ``self`` if already on the opposite strand:: >>> opposite.start == dis.start # start/end unchanged True -``as_same_strand()`` sets ``on_coordinate_strand`` to ``True``, -returning ``self`` if already on the coordinate strand:: +``as_same_strand()`` sets ``on_coordinate_strand`` to ``True``:: >>> dis.on_coordinate_strand - True - >>> dis.as_same_strand() is dis + False + >>>> dis.is_same_strand() + False + >>> same_strand_dis = dis.as_same_strand() + >>>> same_strand_dis.is_same_strand() True -``flip_strand()`` toggles ``on_coordinate_strand`` (always returns a -new DIS):: +``flip_strand()`` toggles ``on_coordinate_strand``:: >>> dis.on_coordinate_strand True @@ -487,10 +488,22 @@ new DIS):: >>> flipped.flip_strand().on_coordinate_strand True -The ``as_positive_strand()`` and ``as_negative_strand()`` methods return -``self`` if the interval is already on the requested strand:: +The ``as_positive_strand()`` and ``as_negative_strand()`` methods return a DIS with +the interval on the effective genomic strand:: - >>> dis.as_positive_strand() is dis + >>> dis.coord_strand + '+' + >>> dis.on_coordinate_strand + True + >>> dis.strand + '+' + >>> neg_dis = dis.as_negative_strand() + >>> neg_dis.strand + '-' + >>> pos_dis = neg_dis.as_positive_strand() + >>> pos_dis.strand + '+' + >>> pos_dis.coord_strand == dis.coor_strand == '+' True .. note:: @@ -512,25 +525,25 @@ A negative value shifts upstream. The interval length is preserved. On the coordinate strand, downstream means increasing indices:: - Before shift(2): + Before shift(1): DIS Coordinates: 0 1 2 3 4 5 6 7 |<--------->| end5 end3 - After shift(2): + After shift(1): DIS Coordinates: 0 1 2 3 4 5 6 7 |<--------->| end5 end3 On the opposite strand, "downstream" is the reverse direction in index -space, so ``shift(2)`` moves the interval toward *lower* indices:: +space, so ``shift(1)`` moves the interval toward *lower* indices:: - Before shift(2) (on_coordinate_strand=False): + Before shift(1) (on_coordinate_strand=False): DIS Coordinates: 0 1 2 3 4 5 6 7 |<--------->| end3 end5 - After shift(2): + After shift(1): DIS Coordinates: 0 1 2 3 4 5 6 7 |<--------->| end3 end5 @@ -551,6 +564,8 @@ space, so ``shift(2)`` moves the interval toward *lower* indices:: >>> # On the opposite strand, downstream reverses in index space >>> opp = dis.as_opposite_strand() + >>> opp.start, opp.end + (30, 150) >>> shifted_opp = opp.shift(10) >>> shifted_opp.start, shifted_opp.end (20, 140) From e9f7cd43927dd8b96eede0add2ef47a8ac330665 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 15:22:52 -0400 Subject: [PATCH 11/18] add docs for within method --- docs-src/diseq.rst | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index 8eae127..603b8ca 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -696,3 +696,44 @@ coordinate space and strand apply:: True >>> b.dnstream_of(a) False + +within +~~~~~~ + +``within(other)`` returns ``True`` if ``self``'s interval is fully +contained within ``other``'s interval. Boundary-inclusive: an interval +is within another if it shares the same start and/or end. An interval +is always within itself. The same requirements on shared coordinate +space and strand apply:: + + DIS Coordinates: 0 1 2 3 4 5 6 7 8 9 + |<->| + a + |<------------->| + b + a.within(b) is True + + DIS Coordinates: 0 1 2 3 4 5 6 7 8 9 + |<------------->| + a + |<->| + b + a.within(b) is False + +:: + + >>> a = DisjointIntervalSequence(coord_ivs, start=30, end=50) + >>> b = DisjointIntervalSequence(coord_ivs, start=10, end=80) + >>> a.within(b) + True + >>> b.within(a) + False + + >>> # An interval is within itself + >>> a.within(a) + True + + >>> # Zero-length intervals are within any enclosing interval + >>> z = DisjointIntervalSequence(coord_ivs, start=50, end=50) + >>> z.within(a) + True From b2d56f0b7aed07dd0254e50f184eb3a64727cd3d Mon Sep 17 00:00:00 2001 From: Sophia Perzan <156710899+SophiaPerzan-DG@users.noreply.github.com> Date: Wed, 15 Apr 2026 15:43:28 -0400 Subject: [PATCH 12/18] Update docs-src/diseq.rst Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs-src/diseq.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index 603b8ca..c8f2175 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -472,10 +472,10 @@ strand, instead of modifying the existing DIS in-place. >>> dis.on_coordinate_strand False - >>>> dis.is_same_strand() + >>> dis.is_same_strand() False >>> same_strand_dis = dis.as_same_strand() - >>>> same_strand_dis.is_same_strand() + >>> same_strand_dis.is_same_strand() True ``flip_strand()`` toggles ``on_coordinate_strand``:: From 712bf317c0e86e6dbc3a753ad5e627c5d30719ad Mon Sep 17 00:00:00 2001 From: Sophia Perzan <156710899+SophiaPerzan-DG@users.noreply.github.com> Date: Wed, 15 Apr 2026 15:45:04 -0400 Subject: [PATCH 13/18] Update docs-src/diseq.rst Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs-src/diseq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index c8f2175..26cb1ae 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -503,7 +503,7 @@ the interval on the effective genomic strand:: >>> pos_dis = neg_dis.as_positive_strand() >>> pos_dis.strand '+' - >>> pos_dis.coord_strand == dis.coor_strand == '+' + >>> pos_dis.coord_strand == dis.coord_strand == '+' True .. note:: From 1d290fe0ada15db38bcae07d42eb93a7c74c2efa Mon Sep 17 00:00:00 2001 From: Sophia Perzan <156710899+SophiaPerzan-DG@users.noreply.github.com> Date: Wed, 15 Apr 2026 15:45:55 -0400 Subject: [PATCH 14/18] Update genome_kit/diseq.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- genome_kit/diseq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index a9b5957..46a3cfa 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -542,7 +542,7 @@ def is_same_strand(self) -> bool: return self.on_coordinate_strand def is_positive_strand(self) -> bool: - """If the interval is on the positive strand. + """True if the interval is on the positive strand. Returns ------- From a540ca4b1c8b3bab7631d913bebcba0a22ff3fcb Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 15 Apr 2026 15:54:40 -0400 Subject: [PATCH 15/18] update error message when DISes share coordinate intervals, but are on different strands --- genome_kit/diseq.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index 46a3cfa..cd9caac 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -501,7 +501,12 @@ def upstream_of(self, other: "DisjointIntervalSequence") -> bool: """ self._validate_same_coordinate_space(other) if self.on_coordinate_strand != other.on_coordinate_strand: - raise ValueError("Cannot compare: intervals are on different strands") + raise ValueError( + f"Cannot compare: self is on " + f"{'same' if self.on_coordinate_strand else 'opposite'} " + f"strand but other is on " + f"{'same' if other.on_coordinate_strand else 'opposite'} strand" + ) if self.length == 0 and other.length == 0 and self.start == other.start: return False if self._upstream_index_step() == -1: @@ -515,7 +520,12 @@ def dnstream_of(self, other: "DisjointIntervalSequence") -> bool: """ self._validate_same_coordinate_space(other) if self.on_coordinate_strand != other.on_coordinate_strand: - raise ValueError("Cannot compare: intervals are on different strands") + raise ValueError( + f"Cannot compare: self is on " + f"{'same' if self.on_coordinate_strand else 'opposite'} " + f"strand but other is on " + f"{'same' if other.on_coordinate_strand else 'opposite'} strand" + ) if self.length == 0 and other.length == 0 and self.start == other.start: return False if self._upstream_index_step() == -1: @@ -529,7 +539,12 @@ def within(self, other: "DisjointIntervalSequence") -> bool: """ self._validate_same_coordinate_space(other) if self.on_coordinate_strand != other.on_coordinate_strand: - raise ValueError("Cannot compare: intervals are on different strands") + raise ValueError( + f"Cannot compare: self is on " + f"{'same' if self.on_coordinate_strand else 'opposite'} " + f"strand but other is on " + f"{'same' if other.on_coordinate_strand else 'opposite'} strand" + ) return self._start >= other.start and self._end <= other.end def is_same_strand(self) -> bool: From cc2d56ac67d983f2823c93f3da286d470aee86e4 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Wed, 22 Apr 2026 01:24:42 -0400 Subject: [PATCH 16/18] remove unneeded docs on strand methods --- docs-src/diseq.rst | 85 ++-------------------------------------------- 1 file changed, 2 insertions(+), 83 deletions(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index 26cb1ae..ce93ed0 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -426,90 +426,9 @@ interval is on the same strand as the coordinate intervals:: >>> dis.is_positive_strand() True -``is_same_strand()`` tests whether the interval is on the coordinate -strand. ``is_positive_strand()`` tests the effective genomic strand -(accounting for both ``coord_strand`` and ``on_coordinate_strand``). +Strand methods (`is_same_strand()`, `flip_strand()`, etc.) only affect the +interval layer, not the coordinate intervals. -Five methods change the interval's strand. All preserve ``start``, -``end``, and the coordinate intervals. These methods return a DIS on the requested -strand, instead of modifying the existing DIS in-place. - -``as_opposite_strand()`` sets ``on_coordinate_strand`` to ``False``:: - - Before as_opposite_strand() (on_coordinate_strand=True): - Start Index: 1 - End Index: 6 - DIS Coordinates: 0 1 2 3 4 5 6 7 - DNA Sequence (+): T A A C C C T - |<------------->| - ----------------------------------------------------- - DNA Sequence (-): A T T G G G A - DIS Coordinates: 0 1 2 3 4 5 6 7 - Opposite Strand - - After as_opposite_strand() (on_coordinate_strand=False): - Start Index: 1 - End Index: 6 - DIS Coordinates: 0 1 2 3 4 5 6 7 - DNA Sequence (+): T A A C C C T - ----------------------------------------------------- - DNA Sequence (-): A T T G G G A - DIS Coordinates: 0 1 2 3 4 5 6 7 - |<------------->| - Opposite Strand - -:: - - >>> dis.on_coordinate_strand - True - >>> opposite = dis.as_opposite_strand() - >>> opposite.on_coordinate_strand - False - >>> opposite.start == dis.start # start/end unchanged - True - -``as_same_strand()`` sets ``on_coordinate_strand`` to ``True``:: - - >>> dis.on_coordinate_strand - False - >>> dis.is_same_strand() - False - >>> same_strand_dis = dis.as_same_strand() - >>> same_strand_dis.is_same_strand() - True - -``flip_strand()`` toggles ``on_coordinate_strand``:: - - >>> dis.on_coordinate_strand - True - >>> flipped = dis.flip_strand() - >>> flipped.on_coordinate_strand - False - >>> flipped.flip_strand().on_coordinate_strand - True - -The ``as_positive_strand()`` and ``as_negative_strand()`` methods return a DIS with -the interval on the effective genomic strand:: - - >>> dis.coord_strand - '+' - >>> dis.on_coordinate_strand - True - >>> dis.strand - '+' - >>> neg_dis = dis.as_negative_strand() - >>> neg_dis.strand - '-' - >>> pos_dis = neg_dis.as_positive_strand() - >>> pos_dis.strand - '+' - >>> pos_dis.coord_strand == dis.coord_strand == '+' - True - -.. note:: - - Strand methods only affect the interval layer. The coordinate - intervals always remain unchanged. Shifting and Expanding ====================== From df0d9c4496be55cfd9629ac5e30ca35cff9f7a24 Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Fri, 24 Apr 2026 16:49:41 -0400 Subject: [PATCH 17/18] fail on anchored interval --- genome_kit/diseq.py | 5 +++++ tests/test_diseq.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index cd9caac..268f6da 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -128,6 +128,11 @@ def __init__( raise TypeError( f"coordinate_intervals[{i}] is {type(iv).__name__}, expected Interval" ) + if iv.anchor is not None: + raise ValueError( + f"coordinate_intervals[{i}] has an anchor set; " + f"anchored Intervals are not supported" + ) # Consistent chromosome, strand, reference_genome iv0 = coordinate_intervals[0] diff --git a/tests/test_diseq.py b/tests/test_diseq.py index a1ee79d..0d34de7 100644 --- a/tests/test_diseq.py +++ b/tests/test_diseq.py @@ -56,6 +56,14 @@ def test_overlapping_intervals_negative_strand_raises(self): with self.assertRaises(ValueError): DisjointIntervalSequence(ivs) + def test_anchored_interval_raises(self): + plain = Interval("chr1", "+", 100, 200, REFG) + anchored = Interval("chr1", "+", 300, 400, REFG, anchor=350) + with self.assertRaises(ValueError): + DisjointIntervalSequence([plain, anchored]) + with self.assertRaises(ValueError): + DisjointIntervalSequence([anchored]) + def test_adjacent_intervals_ok(self): ivs = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 200, 300)]) dis = DisjointIntervalSequence(ivs) From 28c5f523f6f68975ef04735402c3f2835bd8627c Mon Sep 17 00:00:00 2001 From: SophiaPerzan-DG Date: Fri, 24 Apr 2026 17:12:37 -0400 Subject: [PATCH 18/18] use segment instead of interval for clarity --- docs-src/diseq.rst | 98 ++++++++++++++++++------------------ genome_kit/diseq.py | 118 ++++++++++++++++++++++---------------------- tests/test_diseq.py | 40 +++++++-------- 3 files changed, 128 insertions(+), 128 deletions(-) diff --git a/docs-src/diseq.rst b/docs-src/diseq.rst index ce93ed0..ccf8122 100644 --- a/docs-src/diseq.rst +++ b/docs-src/diseq.rst @@ -29,7 +29,7 @@ is conceptually distinct in several ways: RNA rather than raw DNA, the concept of ``+``/``-`` strand is replaced by ``on_coordinate_strand`` (same strand as the transcript) versus opposite strand. The underlying genomic strand is accessible via ``coord_strand``, - but intervals within the DIS are described relative to the coordinate + but segments within the DIS are described relative to the coordinate space rather than in absolute genomic terms. Overview @@ -45,10 +45,10 @@ A DIS has two aspects: - A **coordinate space**: the underlying genomic :py:class:`~genome_kit.Interval` objects (e.g. exons) that define the flattened index system. These intervals are sorted 5'→3' and must not overlap. -- An **interval**: a sub-range within that coordinate space, defined by +- A **segment**: a sub-range within that coordinate space, defined by a start and end index, where start <= end. -The following examples illustrate how the coordinate space and interval interact, +The following examples illustrate how the coordinate space and segment interact, using both diagrams and code. Consider a transcript on the + strand with the following genomic layout: @@ -73,7 +73,7 @@ These exon intervals can be represented as :py:class:`~genome_kit.Interval` obje >>> exon2 = Interval("chr1", "+", 159, 161, "hg38") >>> exon3 = Interval("chr1", "+", 165, 167, "hg38") -To define an interval spanning the full exonic sequence (from the start of Exon1 to +To define a segment spanning the full exonic sequence (from the start of Exon1 to the end of Exon3), the exon intervals are first converted into a DIS coordinate space :: DIS Coordinates: 0 1 2 3 4 5 6 7 @@ -81,16 +81,16 @@ the end of Exon3), the exon intervals are first converted into a DIS coordinate | |<------->| |<--->| |<--->| | 5' Exon1 Exon2 Exon3 3' -The default interval spans the entire coordinate space +The default segment spans the entire coordinate space :: DIS Coordinates: 0 1 2 3 4 5 6 7 DNA Sequence: A T G C A G C |<--------------------->| - end5 Interval end3 + end5 Segment end3 Start Index: 0 End Index: 7 -The interval spans the full length of the coordinate space, with a start index of 0 +The segment spans the full length of the coordinate space, with a start index of 0 and an end index of 7:: >>> dis = DisjointIntervalSequence.from_intervals( @@ -104,11 +104,11 @@ and an end index of 7:: True .. note:: - The disjoint interval follows the convention of - :py:class:`~genome_kit.Interval` where intervals are half-open + The DIS segment follows the convention of + :py:class:`~genome_kit.Interval` where ranges are half-open (the end index is exclusive). -A DIS can also represent an interval on the strand opposite the coordinate space. +A DIS can also represent a segment on the strand opposite the coordinate space. This is useful for modeling the complementary sequence or a binding partner. Starting from the coordinate space defined above @@ -129,8 +129,8 @@ The opposite strand shares the same DIS coordinate indices 3' Negative Strand 5' The DIS coordinate indices are identical on both strands. To obtain the complement -of a given interval, the same start and end indices apply; only the -``on_coordinate_strand`` flag changes. The following shows the full-length interval +of a given segment, the same start and end indices apply; only the +``on_coordinate_strand`` flag changes. The following shows the full-length segment on the opposite strand :: 5' Coordinate Strand 3' @@ -141,13 +141,13 @@ on the opposite strand DIS Coordinates: 0 1 2 3 4 5 6 7 Opposite Strand |<--------------------->| - end3 Interval end5 + end3 Segment end5 Start Index: 0 End Index: 7 On Coordinate Strand: False The ``on_coordinate_strand`` flag distinguishes same-strand from opposite-strand -intervals, since the start and end indices alone do not encode strand information:: +segments, since the start and end indices alone do not encode strand information:: >>> dis_opp = DisjointIntervalSequence( ... [exon1, exon2, exon3], @@ -210,17 +210,17 @@ largest index to the 3' end:: >>> dis_neg.coordinate_length 7 -A full-length interval on the coordinate strand +A full-length segment on the coordinate strand :: DIS Coordinates: 0 1 2 3 4 5 6 7 DNA Sequence: T G A C C T G |<--------------------->| - end5 Interval end3 + end5 Segment end3 Start Index: 0 End Index: 7 On Coordinate Strand: True -Despite creating the DIS from the negative strand, the full-length interval on the +Despite creating the DIS from the negative strand, the full-length segment on the coordinate strand is identical to the + strand example. When working with DIS objects, strand is expressed only as "same strand" or "opposite strand":: @@ -231,7 +231,7 @@ objects, strand is expressed only as "same strand" or "opposite strand":: >>> dis_neg.on_coordinate_strand True -The same coordinate space with an opposite-strand interval +The same coordinate space with an opposite-strand segment :: DIS Coordinates: 0 1 2 3 4 5 6 7 DNA Sequence (-): T G A C C T G @@ -239,7 +239,7 @@ The same coordinate space with an opposite-strand interval DNA Sequence (+): A C T G G A C DIS Coordinates: 0 1 2 3 4 5 6 7 |<--------------------->| - end3 Interval end5 + end3 Segment end5 Start Index: 0 End Index: 7 On Coordinate Strand: False @@ -321,10 +321,10 @@ Metadata about the coordinate space is available through properties:: >>> dis.coord_name 'ENST00000...' -Interval Start and End -====================== +Segment Start and End +===================== -The interval within the coordinate space is defined by ``start`` and ``end`` +The segment within the coordinate space is defined by ``start`` and ``end`` indices, following the same half-open convention as :py:class:`~genome_kit.Interval` (``start <= end`` always):: @@ -337,7 +337,7 @@ indices, following the same half-open convention as :py:class:`~genome_kit.Inter >>> len(dis) 250 -By default, the interval spans the full coordinate space (``start=0``, +By default, the segment spans the full coordinate space (``start=0``, ``end=coordinate_length``). Indices can extend beyond ``[0, coordinate_length]``, but the DNA sequence returned by ``genome.dna()`` will be N-padded. @@ -345,8 +345,8 @@ End5 and End3 ~~~~~~~~~~~~~ The ``end5_index`` and ``end3_index`` properties give the 5' and 3' positions -of the interval. These are derived from ``start`` and ``end`` based on the -interval's strand. +of the segment. These are derived from ``start`` and ``end`` based on the +segment's strand. When ``on_coordinate_strand`` is ``True``, ``end5_index`` equals ``start`` and ``end3_index`` equals ``end``:: @@ -393,11 +393,11 @@ When ``on_coordinate_strand`` is ``False``, the mapping reverses: Boundary Properties ~~~~~~~~~~~~~~~~~~~ -Zero-length DIS objects at the interval and coordinate boundaries are +Zero-length DIS objects at the segment and coordinate boundaries are available as properties:: - >>> dis.end5 # 0-length DIS at the interval's 5' boundary - >>> dis.end3 # 0-length DIS at the interval's 3' boundary + >>> dis.end5 # 0-length DIS at the segment's 5' boundary + >>> dis.end3 # 0-length DIS at the segment's 3' boundary >>> dis.coord_end5 # 0-length DIS at the coordinate space's 5' boundary >>> dis.coord_end3 # 0-length DIS at the coordinate space's 3' boundary @@ -405,9 +405,9 @@ available as properties:: Strand Methods ============== -A DIS interval can sit on either 'virtual' strand independently of the coordinate +A DIS segment can sit on either 'virtual' strand independently of the coordinate intervals. The ``on_coordinate_strand`` property indicates whether the -interval is on the same strand as the coordinate intervals:: +segment is on the same strand as the coordinate intervals:: On Coordinate Strand: True Start Index: 1 End Index: 6 @@ -427,20 +427,20 @@ interval is on the same strand as the coordinate intervals:: True Strand methods (`is_same_strand()`, `flip_strand()`, etc.) only affect the -interval layer, not the coordinate intervals. +segment layer, not the coordinate intervals. Shifting and Expanding ====================== -Both ``shift`` and ``expand`` return a **new** DIS with modified interval +Both ``shift`` and ``expand`` return a **new** DIS with modified segment indices. The coordinate space is always unchanged. shift ~~~~~ -``shift(amount)`` moves the interval downstream by ``amount`` bases. -A negative value shifts upstream. The interval length is preserved. +``shift(amount)`` moves the segment downstream by ``amount`` bases. +A negative value shifts upstream. The segment length is preserved. On the coordinate strand, downstream means increasing indices:: @@ -455,7 +455,7 @@ On the coordinate strand, downstream means increasing indices:: end5 end3 On the opposite strand, "downstream" is the reverse direction in index -space, so ``shift(1)`` moves the interval toward *lower* indices:: +space, so ``shift(1)`` moves the segment toward *lower* indices:: Before shift(1) (on_coordinate_strand=False): DIS Coordinates: 0 1 2 3 4 5 6 7 @@ -491,13 +491,13 @@ space, so ``shift(1)`` moves the interval toward *lower* indices:: .. note:: - ``shift`` can move the interval beyond the coordinate space bounds + ``shift`` can move the segment beyond the coordinate space bounds (``start < 0`` or ``end > coordinate_length``). expand ~~~~~~ -``expand(upstream, dnstream)`` grows (or shrinks) the interval toward +``expand(upstream, dnstream)`` grows (or shrinks) the segment toward its 5' and 3' ends. When ``dnstream`` is omitted the expansion is symmetric:: @@ -511,7 +511,7 @@ symmetric:: |<----------------->| end5 end3 -Negative values contract the interval:: +Negative values contract the segment:: Before expand(-1, -1): DIS Coordinates: 0 1 2 3 4 5 6 7 @@ -552,7 +552,7 @@ Negative values contract the interval:: Positional Comparisons ====================== -``upstream_of`` and ``dnstream_of`` compare two DIS intervals that share +``upstream_of`` and ``dnstream_of`` compare two DIS segments that share the same coordinate space and the same ``on_coordinate_strand``. Both methods require strict separation — any overlap returns ``False``. @@ -560,7 +560,7 @@ upstream_of ~~~~~~~~~~~ ``upstream_of(other)`` returns ``True`` if ``self`` is strictly 5' of -``other`` with no overlap. Adjacent intervals (where ``self.end`` equals +``other`` with no overlap. Adjacent segments (where ``self.end`` equals ``other.start``) count as upstream:: DIS Coordinates: 0 1 2 3 4 5 6 7 8 9 @@ -589,16 +589,16 @@ upstream_of >>> b.upstream_of(a) False - >>> # Adjacent intervals count as upstream + >>> # Adjacent segments count as upstream >>> a2 = DisjointIntervalSequence(coord_ivs, start=10, end=50) >>> a2.upstream_of(b) True .. note:: - Both intervals must share the same ``coordinate_intervals`` and the + Both DIS objects must share the same ``coordinate_intervals`` and the same ``on_coordinate_strand``, otherwise ``ValueError`` is raised. - Two zero-length intervals at the same position are neither upstream + Two zero-length segments at the same position are neither upstream nor downstream of each other. dnstream_of @@ -606,7 +606,7 @@ dnstream_of ``dnstream_of(other)`` is the mirror of ``upstream_of``: it returns ``True`` if ``self`` is strictly 3' of ``other`` with no overlap. -Adjacent intervals count as downstream. The same requirements on shared +Adjacent segments count as downstream. The same requirements on shared coordinate space and strand apply:: >>> a = DisjointIntervalSequence(coord_ivs, start=50, end=80) @@ -619,9 +619,9 @@ coordinate space and strand apply:: within ~~~~~~ -``within(other)`` returns ``True`` if ``self``'s interval is fully -contained within ``other``'s interval. Boundary-inclusive: an interval -is within another if it shares the same start and/or end. An interval +``within(other)`` returns ``True`` if ``self``'s segment is fully +contained within ``other``'s segment. Boundary-inclusive: a segment +is within another if it shares the same start and/or end. A segment is always within itself. The same requirements on shared coordinate space and strand apply:: @@ -648,11 +648,11 @@ space and strand apply:: >>> b.within(a) False - >>> # An interval is within itself + >>> # A segment is within itself >>> a.within(a) True - >>> # Zero-length intervals are within any enclosing interval + >>> # Zero-length segments are within any enclosing segment >>> z = DisjointIntervalSequence(coord_ivs, start=50, end=50) >>> z.within(a) True diff --git a/genome_kit/diseq.py b/genome_kit/diseq.py index 268f6da..109f6d8 100644 --- a/genome_kit/diseq.py +++ b/genome_kit/diseq.py @@ -33,7 +33,7 @@ class _CoordinateMetadata: @dataclass(frozen=True) -class _IntervalMetadata: +class _SegmentMetadata: name: str | None on_coordinate_strand: bool @@ -48,8 +48,8 @@ class DisjointIntervalSequence: which are flattened into a contiguous 0-based index space. Indices for the coordinate space are assigned according to the current :py:class:`IndexDirection` value. - - An **interval** within that coordinate space, defined by a 5' and 3' index. - The interval may lie on the same, or opposite, strand as the coordinate space. + - A **segment** within that coordinate space, defined by a 5' and 3' index. + The segment may lie on the same, or opposite, strand as the coordinate space. Use :py:meth:`from_transcript` or :py:meth:`from_intervals` to construct instances rather than calling the constructor directly. @@ -83,7 +83,7 @@ def __init__( coordinate_intervals: Sequence[Interval], *, coord_name: str | None = None, - interval_name: str | None = None, + segment_name: str | None = None, on_coordinate_strand: bool = True, start: int | None = None, end: int | None = None, @@ -100,22 +100,22 @@ def __init__( chromosome, strand, and reference genome. coord_name : :py:class:`str` or None Optional name for the coordinate space. - interval_name : :py:class:`str` or None - Optional name for the interval. + segment_name : :py:class:`str` or None + Optional name for the segment. on_coordinate_strand : :py:class:`bool` - Whether the interval is on the same strand as the coordinate + Whether the segment is on the same strand as the coordinate intervals. Defaults to True. Can be used to represent a sequence that binds to the transcript if set to False. start : :py:class:`int` or None - start index of the interval in the coordinate space. Defaults to 0 + start index of the segment in the coordinate space. Defaults to 0 end : :py:class:`int` or None - end index of the interval in the coordinate space. Defaults to the length + end index of the segment in the coordinate space. Defaults to the length of the coordinate space. Raises ------ ValueError - If intervals are empty, inconsistent, overlapping, or if start + If coordinate intervals are empty, inconsistent, overlapping, or if start is greater than end. TypeError If any element is not an Interval. @@ -179,12 +179,12 @@ def __init__( chromosome=iv0.chromosome, transcript_strand=iv0.strand, ) - self._interval_metadata = _IntervalMetadata( - name=interval_name, + self._segment_metadata = _SegmentMetadata( + name=segment_name, on_coordinate_strand=on_coordinate_strand, ) - # Default interval start/end to span the full coordinate + # Default segment start/end to span the full coordinate if start is None: start = 0 if end is None: @@ -205,7 +205,7 @@ def from_intervals( intervals: Sequence[Interval], *, coord_name: str | None = None, - interval_name: str | None = None, + segment_name: str | None = None, ) -> "DisjointIntervalSequence": """Construct a DIS from a sequence of Intervals (or :py:class:`~genome_kit.Exon`/:py:class:`~genome_kit.Cds`/:py:class:`~genome_kit.Utr` objects). @@ -216,8 +216,8 @@ def from_intervals( Sequence of Interval or annotation objects. coord_name : :py:class:`str` or None Optional name for the coordinate space. - interval_name : :py:class:`str` or None - Optional name for the interval. + segment_name : :py:class:`str` or None + Optional name for the segment. Returns ------- @@ -226,7 +226,7 @@ def from_intervals( coord_intervals = [ iv.interval if hasattr(iv, "interval") else iv for iv in intervals ] - return cls(coord_intervals, coord_name=coord_name, interval_name=interval_name) + return cls(coord_intervals, coord_name=coord_name, segment_name=segment_name) @classmethod def from_transcript( @@ -235,7 +235,7 @@ def from_transcript( *, region: Literal["exons", "cds", "utr5", "utr3"] = "exons", coord_name: str | None = None, - interval_name: str | None = None, + segment_name: str | None = None, ) -> "DisjointIntervalSequence": """Construct a DIS from a transcript's exons, CDS, or UTR regions. @@ -248,8 +248,8 @@ def from_transcript( ``"utr5"``, or ``"utr3"``. coord_name : :py:class:`str` or None Optional name for the coordinate space. Defaults to ``transcript.id``. - interval_name : :py:class:`str` or None - Optional name for the interval. Defaults to ``transcript.id``. + segment_name : :py:class:`str` or None + Optional name for the segment. Defaults to ``transcript.id``. Returns ------- @@ -274,10 +274,10 @@ def from_transcript( coord_intervals = [element.interval for element in region_elements] if coord_name is None: coord_name = transcript.id - if interval_name is None: - interval_name = transcript.id + if segment_name is None: + segment_name = transcript.id - return cls(coord_intervals, coord_name=coord_name, interval_name=interval_name) + return cls(coord_intervals, coord_name=coord_name, segment_name=segment_name) @property def coord_name(self) -> str | None: @@ -301,20 +301,20 @@ def coord_strand(self) -> Literal["+", "-"]: @property def name(self) -> str | None: - """Name of the interval, or None.""" - return self._interval_metadata.name + """Name of the segment, or None.""" + return self._segment_metadata.name @property def on_coordinate_strand(self) -> bool: - """True if the interval is on the same strand as the coordinate intervals.""" - return self._interval_metadata.on_coordinate_strand + """True if the segment is on the same strand as the coordinate intervals.""" + return self._segment_metadata.on_coordinate_strand @property def strand(self) -> Literal["+", "-"]: - """Effective strand of the interval, accounting for on_coordinate_strand.""" + """Effective strand of the segment, accounting for on_coordinate_strand.""" if self.on_coordinate_strand: return self.coord_strand - # Interval is on opposite strand + # Segment is on opposite strand if self.coord_strand == "+": return "-" return "+" @@ -339,26 +339,26 @@ def coordinate_end3_index(self) -> int: @property def end5_index(self) -> int: - """5' index of the interval.""" + """5' index of the segment.""" if self._upstream_index_step() == -1: return self._start return self._end @property def end3_index(self) -> int: - """3' index of the interval.""" + """3' index of the segment.""" if self._upstream_index_step() == -1: return self._end return self._start @property def start(self) -> int: - """Start index of the interval in the coordinate space.""" + """Start index of the segment in the coordinate space.""" return self._start @property def end(self) -> int: - """End index of the interval in the coordinate space.""" + """End index of the segment in the coordinate space.""" return self._end def _at_index( @@ -375,14 +375,14 @@ def _at_index( @property def end5(self) -> "DisjointIntervalSequence": - """0-length DIS at the interval's 5' end.""" + """0-length DIS at the segment's 5' end.""" return self._at_index( self.end5_index, on_coordinate_strand=self.on_coordinate_strand ) @property def end3(self) -> "DisjointIntervalSequence": - """0-length DIS at the interval's 3' end.""" + """0-length DIS at the segment's 3' end.""" return self._at_index( self.end3_index, on_coordinate_strand=self.on_coordinate_strand ) @@ -410,7 +410,7 @@ def coordinate_length(self) -> int: @property def length(self) -> int: - """Length of the interval on the coordinate space.""" + """Length of the segment on the coordinate space.""" return self.end - self.start def _upstream_index_step(self, on_coordinate_strand: bool | None = None) -> int: @@ -418,7 +418,7 @@ def _upstream_index_step(self, on_coordinate_strand: bool | None = None) -> int: Args: on_coordinate_strand: Override for which strand to compute the step for. - Defaults to this interval's on_coordinate_strand. + Defaults to this segment's on_coordinate_strand. """ if on_coordinate_strand is None: on_coordinate_strand = self.on_coordinate_strand @@ -439,7 +439,7 @@ def _validate_same_coordinate_space( raise ValueError("DIS objects must share the same coordinate intervals") def _from_end_indices(self, end5: int, end3: int) -> "DisjointIntervalSequence": - """Return a new DIS with the same coordinate space but different interval indices.""" + """Return a new DIS with the same coordinate space but different segment indices.""" # Validate end5 is upstream of or equal to end3 if self._upstream_index_step() == -1: if end5 > end3: @@ -454,16 +454,16 @@ def _from_end_indices(self, end5: int, end3: int) -> "DisjointIntervalSequence": return DisjointIntervalSequence( self._coordinate_intervals, coord_name=self._coord_metadata.name, - interval_name=self._interval_metadata.name, + segment_name=self._segment_metadata.name, on_coordinate_strand=self.on_coordinate_strand, start=min(end5, end3), end=max(end5, end3), ) def shift(self, amount: int) -> "DisjointIntervalSequence": - """Shift the interval downstream by amount (negative shifts upstream). + """Shift the segment downstream by amount (negative shifts upstream). - The coordinate space is unchanged. Only the interval indices move. + The coordinate space is unchanged. Only the segment indices move. """ downstream_step = -self._upstream_index_step() delta = amount * downstream_step @@ -475,9 +475,9 @@ def shift(self, amount: int) -> "DisjointIntervalSequence": def expand( self, upstream: int, dnstream: int | None = None ) -> "DisjointIntervalSequence": - """Expand the interval upstream and/or downstream. + """Expand the segment upstream and/or downstream. - Negative values contract the interval. Raises ValueError if contraction + Negative values contract the segment. Raises ValueError if contraction would result in end5 being downstream of end3. Args: @@ -538,7 +538,7 @@ def dnstream_of(self, other: "DisjointIntervalSequence") -> bool: return self._end <= other.start def within(self, other: "DisjointIntervalSequence") -> bool: - """True if self's interval is contained within other's interval. + """True if self's segment is contained within other's segment. Requires the same coordinate space and same on_coordinate_strand. """ @@ -553,7 +553,7 @@ def within(self, other: "DisjointIntervalSequence") -> bool: return self._start >= other.start and self._end <= other.end def is_same_strand(self) -> bool: - """True if the interval is on the same strand as the coordinate intervals. + """True if the segment is on the same strand as the coordinate intervals. Returns ------- @@ -562,7 +562,7 @@ def is_same_strand(self) -> bool: return self.on_coordinate_strand def is_positive_strand(self) -> bool: - """True if the interval is on the positive strand. + """True if the segment is on the positive strand. Returns ------- @@ -573,10 +573,10 @@ def is_positive_strand(self) -> bool: return False def as_positive_strand(self) -> "DisjointIntervalSequence": - """Return a DIS with the interval on the positive strand. + """Return a DIS with the segment on the positive strand. Returns ``self`` if already on the positive strand. The coordinate - intervals are unchanged; only the interval strand is affected. + intervals are unchanged; only the segment strand is affected. Returns ------- @@ -587,10 +587,10 @@ def as_positive_strand(self) -> "DisjointIntervalSequence": return self.flip_strand() def as_negative_strand(self) -> "DisjointIntervalSequence": - """Return a DIS with the interval on the negative strand. + """Return a DIS with the segment on the negative strand. Returns ``self`` if already on the negative strand. The coordinate - intervals are unchanged; only the interval strand is affected. + intervals are unchanged; only the segment strand is affected. Returns ------- @@ -601,10 +601,10 @@ def as_negative_strand(self) -> "DisjointIntervalSequence": return self.flip_strand() def as_opposite_strand(self) -> "DisjointIntervalSequence": - """Return a DIS with the interval on the opposite strand. + """Return a DIS with the segment on the opposite strand. Returns ``self`` if already on the opposite strand. The coordinate - intervals are unchanged; only the interval strand is affected. + intervals are unchanged; only the segment strand is affected. Returns ------- @@ -615,10 +615,10 @@ def as_opposite_strand(self) -> "DisjointIntervalSequence": return self.flip_strand() def as_same_strand(self) -> "DisjointIntervalSequence": - """Return a DIS with the interval on the coordinate strand. + """Return a DIS with the segment on the coordinate strand. Returns ``self`` if already on the coordinate strand. The coordinate - intervals are unchanged; only the interval strand is affected. + intervals are unchanged; only the segment strand is affected. Returns ------- @@ -631,7 +631,7 @@ def as_same_strand(self) -> "DisjointIntervalSequence": def flip_strand(self) -> "DisjointIntervalSequence": """Return a new DIS with ``on_coordinate_strand`` toggled. - The coordinate intervals are unchanged. The interval's + The coordinate intervals are unchanged. The segment's ``on_coordinate_strand`` is flipped. Returns @@ -641,14 +641,14 @@ def flip_strand(self) -> "DisjointIntervalSequence": return DisjointIntervalSequence( self._coordinate_intervals, coord_name=self._coord_metadata.name, - interval_name=self._interval_metadata.name, + segment_name=self._segment_metadata.name, on_coordinate_strand=not self.on_coordinate_strand, start=self._start, end=self._end, ) def __len__(self) -> int: - """Return the length of the interval.""" + """Return the length of the segment.""" return self.length def __repr__(self) -> str: @@ -656,7 +656,7 @@ def __repr__(self) -> str: return ( f"DisjointIntervalSequence(" f"coord_name={self._coord_metadata.name!r}, " - f"name={self._interval_metadata.name!r}, " + f"name={self._segment_metadata.name!r}, " f"{self.chromosome}:{self.coord_strand}, " f"len={self.length}, " f"coord_intervals={self._coordinate_intervals}, " @@ -672,7 +672,7 @@ def __eq__(self, other: object) -> bool: return NotImplemented return ( self._coord_metadata == other._coord_metadata - and self._interval_metadata == other._interval_metadata + and self._segment_metadata == other._segment_metadata and self._start == other._start and self._end == other._end and self._coordinate_intervals == other._coordinate_intervals diff --git a/tests/test_diseq.py b/tests/test_diseq.py index 0d34de7..5d5582a 100644 --- a/tests/test_diseq.py +++ b/tests/test_diseq.py @@ -143,7 +143,7 @@ def test_happy_path(self): def test_coord_and_interval_id_independent(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) dis = DisjointIntervalSequence.from_intervals( - ivs, coord_name="c1", interval_name="i1" + ivs, coord_name="c1", segment_name="i1" ) self.assertEqual(dis.coord_name, "c1") self.assertEqual(dis.name, "i1") @@ -236,7 +236,7 @@ def test_invalid_region_raises(self): def test_custom_id_overrides(self): dis = DisjointIntervalSequence.from_transcript( - self.transcript, coord_name="custom_coord", interval_name="custom_iv" + self.transcript, coord_name="custom_coord", segment_name="custom_iv" ) self.assertEqual(dis.coord_name, "custom_coord") self.assertEqual(dis.name, "custom_iv") @@ -263,7 +263,7 @@ class TestProperties(unittest.TestCase): def test_metadata_getters_positive(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) - dis = DisjointIntervalSequence(ivs, coord_name="c", interval_name="i") + dis = DisjointIntervalSequence(ivs, coord_name="c", segment_name="i") self.assertEqual(dis.coord_name, "c") self.assertEqual(dis.name, "i") self.assertEqual(dis.reference_genome, REFG) @@ -433,7 +433,7 @@ def test_flip_strand_preserves_start_end(self): def test_flip_strand_preserves_metadata(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) - dis = DisjointIntervalSequence(ivs, coord_name="c", interval_name="i") + dis = DisjointIntervalSequence(ivs, coord_name="c", segment_name="i") flipped = dis.flip_strand() self.assertEqual(flipped.coord_name, "c") self.assertEqual(flipped.name, "i") @@ -465,7 +465,7 @@ def test_as_opposite_strand_from_same(self): def test_as_opposite_strand_preserves_metadata(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) - dis = DisjointIntervalSequence(ivs, coord_name="c", interval_name="i") + dis = DisjointIntervalSequence(ivs, coord_name="c", segment_name="i") opp = dis.as_opposite_strand() self.assertEqual(opp.coord_name, "c") self.assertEqual(opp.name, "i") @@ -511,7 +511,7 @@ class TestEndProperties(unittest.TestCase): def test_end5_default(self): ivs = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 300, 400)]) - dis = DisjointIntervalSequence(ivs, coord_name="c", interval_name="i") + dis = DisjointIntervalSequence(ivs, coord_name="c", segment_name="i") # On coordinate strand: end5_index == start (0), end3_index == end (200) self.assertEqual(dis.end5_index, 0) self.assertEqual(dis.end3_index, 200) @@ -522,7 +522,7 @@ def test_end5_default(self): self.assertEqual(e5.coord_name, "c") self.assertEqual(e5.name, None) expected = DisjointIntervalSequence( - ivs, coord_name="c", interval_name=None, on_coordinate_strand=True, start=0, end=0 + ivs, coord_name="c", segment_name=None, on_coordinate_strand=True, start=0, end=0 ) self.assertEqual(e5, expected) @@ -638,7 +638,7 @@ def test_len_with_custom_indices(self): def test_repr(self): ivs = _make_intervals([("chr1", "+", 100, 200), ("chr1", "+", 300, 400)]) - dis = DisjointIntervalSequence(ivs, coord_name="ENST0001", interval_name="IV1") + dis = DisjointIntervalSequence(ivs, coord_name="ENST0001", segment_name="IV1") r = repr(dis) self.assertIn("DisjointIntervalSequence(", r) self.assertIn("coord_name='ENST0001'", r) @@ -653,8 +653,8 @@ def test_repr(self): def test_eq_same(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) - a = DisjointIntervalSequence(ivs, coord_name="x", interval_name="i") - b = DisjointIntervalSequence(ivs, coord_name="x", interval_name="i") + a = DisjointIntervalSequence(ivs, coord_name="x", segment_name="i") + b = DisjointIntervalSequence(ivs, coord_name="x", segment_name="i") self.assertEqual(a, b) def test_eq_different_coord_name(self): @@ -663,10 +663,10 @@ def test_eq_different_coord_name(self): b = DisjointIntervalSequence(ivs, coord_name="y") self.assertNotEqual(a, b) - def test_eq_different_interval_name(self): + def test_eq_different_segment_name(self): ivs = _make_intervals([("chr1", "+", 100, 200)]) - a = DisjointIntervalSequence(ivs, interval_name="x") - b = DisjointIntervalSequence(ivs, interval_name="y") + a = DisjointIntervalSequence(ivs, segment_name="x") + b = DisjointIntervalSequence(ivs, segment_name="y") self.assertNotEqual(a, b) def test_eq_different_on_coordinate_strand(self): @@ -721,13 +721,13 @@ def test_eq_non_dis(self): def _dis( - start=0, end=200, on_coordinate_strand=True, coord_name="c", interval_name="i", ivs=None + start=0, end=200, on_coordinate_strand=True, coord_name="c", segment_name="i", ivs=None ): """Quick DIS factory for tests.""" return DisjointIntervalSequence( ivs or _COORD_IVS, coord_name=coord_name, - interval_name=interval_name, + segment_name=segment_name, on_coordinate_strand=on_coordinate_strand, start=start, end=end, @@ -739,7 +739,7 @@ def _neg_dis(start=0, end=200, on_coordinate_strand=True): return DisjointIntervalSequence( _NEG_COORD_IVS, coord_name="c", - interval_name="i", + segment_name="i", on_coordinate_strand=on_coordinate_strand, start=start, end=end, @@ -801,7 +801,7 @@ def test_shift_opposite_strand_negative_shift(self): self.assertEqual(shifted.end, 160) def test_shift_preserves_metadata(self): - dis = _dis(start=30, end=150, coord_name="mycoord", interval_name="myiv") + dis = _dis(start=30, end=150, coord_name="mycoord", segment_name="myiv") shifted = dis.shift(10) self.assertEqual(shifted.coord_name, "mycoord") self.assertEqual(shifted.name, "myiv") @@ -809,7 +809,7 @@ def test_shift_preserves_metadata(self): def test_shift_preserves_metadata_opposite_strand(self): dis = _dis( - start=30, end=150, coord_name="mycoord", interval_name="myiv", + start=30, end=150, coord_name="mycoord", segment_name="myiv", on_coordinate_strand=False, ) shifted = dis.shift(10) @@ -922,7 +922,7 @@ def test_expand_beyond_coordinate(self): self.assertEqual(expanded.start, -20) def test_expand_preserves_metadata(self): - dis = _dis(start=30, end=150, coord_name="c", interval_name="i") + dis = _dis(start=30, end=150, coord_name="c", segment_name="i") expanded = dis.expand(5) self.assertEqual(expanded.coord_name, "c") self.assertEqual(expanded.name, "i") @@ -930,7 +930,7 @@ def test_expand_preserves_metadata(self): def test_expand_preserves_metadata_opposite_strand(self): dis = _dis( - start=30, end=150, coord_name="c", interval_name="i", + start=30, end=150, coord_name="c", segment_name="i", on_coordinate_strand=False, ) expanded = dis.expand(5)