From 3ac9275d0cfc1bcd8e14d795135199e86b7c073b Mon Sep 17 00:00:00 2001
From: virgesmith <andrew@friarswood.net>
Date: Sun, 14 Jun 2026 12:59:01 +0100
Subject: [PATCH] Release 0.3.0: nth/interleave fixes, add chunk_by

- nth is now 0-based, consistent with Rust's Iterator::nth and Python
  indexing (BREAKING: drop the +1 in callers; nth(0) now valid)
- interleave yields the remainder of the longer iterable once the
  shorter is exhausted, matching Rust (BREAKING: previously truncated)
- add chunk_by: lazy, order-preserving grouping of consecutive runs
  (itertools.groupby / Rust chunk_by semantics; works on infinite iterators)
- document groupby/value_counts as eager (sorts input); note equivalence
  to pandas default groupby; fix nth docstring
- bump version to 0.3.0; refresh apidoc, README and release notes

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .pre-commit-config.yaml           |  8 ++--
 README.md                         |  5 ++-
 doc/apidoc.md                     | 61 ++++++++++++++++++++-------
 pyproject.toml                    |  2 +-
 relnotes.md                       | 15 +++++++
 src/itrx/itr.py                   | 69 ++++++++++++++++++++++++-------
 src/test/test_collection.py       |  6 ++-
 src/test/test_combine_split.py    | 10 ++---
 src/test/test_transform_filter.py | 13 ++++++
 uv.lock                           |  2 +-
 10 files changed, 148 insertions(+), 43 deletions(-)
 create mode 100644 relnotes.md

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a53ed53..1887c5a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,6 +10,8 @@ repos:
       - id: ruff-check
         args: [--fix]
       - id: ruff-format
-# doesn't exist (yet)
-# - repo: https://github.com/astral-sh/ty-pre-commit
-
+  - repo: https://github.com/astral-sh/ty-pre-commit
+    # ty version.
+    rev: v0.0.49
+    hooks:
+      - id: ty
diff --git a/README.md b/README.md
index ca1c0a1..e00b79c 100644
--- a/README.md
+++ b/README.md
@@ -88,13 +88,14 @@ Note:
 
 Most `Itr` methods are **lazy transformations**, meaning they return a new `Itr` instance without immediately processing any data. This allows for arbitrary chaining and efficient memory usage, as items are only processed as they are requested. In most cases, `Itr` simply acts as a convenient wrapper around `itertools`, enabling this left-to-right chaining syntax.
 
-- **Combining and splitting:**  `partition`, `copy`, `batched`, `pairwise`, `rolling`, `chain`, `cycle`, `repeat`, `product`, `inspect`, `intersperse`, `interleave`, `value_counts`
-- **Transformation and filtering:** `accumulated`, `filter`, `map`, `starmap`, `map_while`, `flatten`, `flat_map`, `skip_while`, `take_while`, `groupby`
+- **Combining and splitting:**  `partition`, `copy`, `batched`, `pairwise`, `rolling`, `chain`, `cycle`, `repeat`, `product`, `inspect`, `intersperse`, `interleave`, `chunk_by`
+- **Transformation and filtering:** `accumulated`, `filter`, `map`, `starmap`, `map_while`, `flatten`, `flat_map`, `skip_while`, `take_while`
 
 However, some methods are **eager consumers**. These methods iterate over and consume the underlying data, returning concrete values, collections, or aggregates. Examples include:
 
 *   **Collection methods:** `collect`, `last`, `next`, `next_chunk`, `nth`, `position`
 *   **Aggregation methods:** `count`, `reduce`, `max`, `min`, `all`, `any`, `consume`, `find`, `fold`
+*   **Sorting/grouping:** `groupby` and `value_counts` sort the entire input up front, so they consume the whole iterator immediately and must not be used on infinite sources. Use the lazy `chunk_by` to group consecutive runs without sorting.
 
 ### Important Considerations
 
diff --git a/doc/apidoc.md b/doc/apidoc.md
index 223e296..027daba 100644
--- a/doc/apidoc.md
+++ b/doc/apidoc.md
@@ -1,4 +1,4 @@
-# `Itr` v0.2.2 class documentation
+# `Itr` v0.3.0 class documentation
 A generic iterator adaptor class inspired by Rust's Iterator trait, providing a composable API for
 functional-style iteration and transformation over Python iterables.
 ## Public methods
@@ -96,6 +96,25 @@ Returns:
 
 
 
+### `chunk_by`
+
+
+Group *consecutive* elements that share the same key, lazily. Unlike `groupby`, the input is not sorted, so
+only adjacent runs are grouped (mirroring `itertools.groupby` and Rust's `chunk_by`). This preserves order and
+works on infinite iterators.
+
+Args:
+    grouper (Callable[[T], U]): The key function applied to each element.
+
+Returns:
+    Itr[tuple[U, tuple[T, ...]]]: An iterator over (key, group) pairs, where each group is a tuple of the
+    consecutive elements sharing that key.
+
+Example:
+    >>> Itr([1, 1, 2, 3, 3, 1]).chunk_by(lambda x: x).map(lambda kv: kv[0]).collect()
+    (1, 2, 3, 1)
+
+
 ### `collect`
 
 Collect all remaining items from the iterator into a sequence (tuple by default).
@@ -231,6 +250,14 @@ Sort and then group an iterable by the supplied key function. Note the following
 - The iterable is pre-sorted because itertools.groupby only works correctly on sorted sequences
 - The resulting groupby objects are realised into tuples
 
+Because the input is sorted, this method is **eager**: it consumes and materialises the whole iterator
+immediately (so it must not be used on an infinite iterator), the output is ordered by key, and the keys must be
+mutually orderable. For lazy, order-preserving grouping of consecutive runs, see `chunk_by`.
+
+Semantically this is equivalent to pandas' default `groupby` (i.e. `sort=True`): all items sharing a key are
+collected into a single group regardless of their position, and groups are emitted in sorted-key order. It has
+no `sort=False` (appearance-order) option, requires mutually-orderable keys, and does not drop `None` keys.
+
 Returns:
     Itr[tuple[U, tuple[T,...]]]: An iterator over the keys and tuples of values
 
@@ -259,20 +286,20 @@ Example:
 ### `interleave`
 
 
-Interleaves elements from this iterator with elements from another iterator.
-Stops when either iterator is exhausted.
+Interleaves elements from this iterator with elements from another iterable, yielding alternately from each.
+When one iterable is exhausted, the remaining elements of the other are yielded in order.
 
 Args:
-    other (Itr[U]): Another iterator to interleave with.
+    other (Iterable[U]): Another iterable to interleave with.
 
 Returns:
     Itr[T | U]: A new iterator yielding elements alternately from self and other.
 
 Example:
-    itr1 = Itr([1, 3, 5])
-    itr2 = Itr([2, 4, 6])
-    result = itr1.interleave(itr2)
-    list(result)  # [1, 2, 3, 4, 5, 6]
+    >>> Itr([1, 3, 5]).interleave([2, 4, 6]).collect()
+    (1, 2, 3, 4, 5, 6)
+    >>> Itr([1, 3, 5, 7]).interleave([2, 4]).collect()
+    (1, 2, 3, 4, 5, 7)
 
 
 ### `intersperse`
@@ -295,6 +322,9 @@ Return the last item from the iterator. Do not use on an open-ended Iterable
 Returns:
     T: The last item.
 
+Raises:
+    ValueError: If the iterator is empty.
+
 
 
 ### `map`
@@ -340,7 +370,7 @@ Returns:
 Return the maximum element from the iterator, optionally using a key function.
 
 Args:
-    key (Callable[[T], object] | None, optional): A function to extract a comparison key from each element. Defaults to None.
+    key (Callable[[T], Any] | None, optional): A function to extract a comparison key from each element. Defaults to None.
 
 Returns:
     T: The maximum element in the iterator.
@@ -355,7 +385,7 @@ Raises:
 Return the minimum element from the iterator, optionally using a key function.
 
 Args:
-    key (Callable[[T], object] | None, optional): A function to extract a comparison key from each element. Defaults to None.
+    key (Callable[[T], Any] | None, optional): A function to extract a comparison key from each element. Defaults to None.
 
 Returns:
     T: The minimum element in the iterator.
@@ -387,17 +417,20 @@ Returns:
 
 ### `nth`
 
-Return the n-th item (1-based) from the iterator, or None if out of range.
+Return the n-th item (0-based) from the iterator, consuming the preceding items.
+
+This matches Rust's ``Iterator::nth`` and Python's 0-based indexing conventions: ``nth(0)`` returns the first
+item, ``nth(1)`` the second, and so on.
 
 Args:
-    n (int): The index (1-based) of the item to return.
+    n (int): The index (0-based) of the item to return.
 
 Returns:
     T: The n-th item.
 
 Raises:
-    StopIteration: if the iterator is exhausted.
-    ValueError: if n < 1
+    StopIteration: if the iterator has fewer than n + 1 items.
+    ValueError: if n < 0
 
 
 
diff --git a/pyproject.toml b/pyproject.toml
index ef32a64..74e7768 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "itrx"
-version = "0.2.3"
+version = "0.3.0"
 description = "A chainable iterator adapter"
 readme = "README.md"
 authors = [
diff --git a/relnotes.md b/relnotes.md
new file mode 100644
index 0000000..110827d
--- /dev/null
+++ b/relnotes.md
@@ -0,0 +1,15 @@
+## 0.3.0
+
+### Breaking changes
+
+- `nth` is now **0-based**, consistent with Rust's `Iterator::nth` and Python's indexing conventions: `nth(0)` returns the first item (previously this raised `ValueError` and `nth(1)` returned the first item). Update callers by dropping the `+ 1`.
+- `interleave` now yields the remaining elements of the longer iterable once the shorter one is exhausted, matching Rust's `interleave` (previously it stopped at the shorter input, silently dropping the tail).
+
+### New features
+
+- `chunk_by`: lazily group *consecutive* elements sharing a key (the semantics of `itertools.groupby` / Rust's `chunk_by`). Unlike `groupby` it does not sort, so it preserves order and works on infinite iterators.
+
+### Documentation
+
+- Clarified that `groupby` (and `value_counts`, which builds on it) is **eager**: it sorts the entire input up front, so it reorders output, requires mutually-orderable keys, and must not be used on infinite sources. Corrected the lazy/eager categorisation in the README.
+- Corrected the `nth` docstring, which previously claimed it returned `None` when out of range (it raises `StopIteration`).
diff --git a/src/itrx/itr.py b/src/itrx/itr.py
index e61319c..ce3367d 100644
--- a/src/itrx/itr.py
+++ b/src/itrx/itr.py
@@ -257,12 +257,41 @@ def for_each(self, func: Callable[[T], None]) -> None:
         for item in self._it:
             func(item)
 
+    def chunk_by[U](self, grouper: Callable[[T], U]) -> "Itr[tuple[U, tuple[T, ...]]]":
+        """
+        Group *consecutive* elements that share the same key, lazily. Unlike `groupby`, the input is not sorted, so
+        only adjacent runs are grouped (mirroring `itertools.groupby` and Rust's `chunk_by`). This preserves order and
+        works on infinite iterators.
+
+        Args:
+            grouper (Callable[[T], U]): The key function applied to each element.
+
+        Returns:
+            Itr[tuple[U, tuple[T, ...]]]: An iterator over (key, group) pairs, where each group is a tuple of the
+            consecutive elements sharing that key.
+
+        Example:
+            >>> Itr([1, 1, 2, 3, 3, 1]).chunk_by(lambda x: x).map(lambda kv: kv[0]).collect()
+            (1, 2, 3, 1)
+        """
+        key_fn = cast("Callable[[T], Any]", grouper)
+        groups = ((k, tuple(v)) for k, v in itertools.groupby(self._it, key=key_fn))
+        return cast("Itr[tuple[U, tuple[T, ...]]]", Itr(groups))
+
     def groupby[U](self, grouper: Callable[[T], U]) -> "Itr[tuple[U, tuple[T,...]]]":
         """
         Sort and then group an iterable by the supplied key function. Note the following differences from itertools:
         - The iterable is pre-sorted because itertools.groupby only works correctly on sorted sequences
         - The resulting groupby objects are realised into tuples
 
+        Because the input is sorted, this method is **eager**: it consumes and materialises the whole iterator
+        immediately (so it must not be used on an infinite iterator), the output is ordered by key, and the keys must be
+        mutually orderable. For lazy, order-preserving grouping of consecutive runs, see `chunk_by`.
+
+        Semantically this is equivalent to pandas' default `groupby` (i.e. `sort=True`): all items sharing a key are
+        collected into a single group regardless of their position, and groups are emitted in sorted-key order. It has
+        no `sort=False` (appearance-order) option, requires mutually-orderable keys, and does not drop `None` keys.
+
         Returns:
             Itr[tuple[U, tuple[T,...]]]: An iterator over the keys and tuples of values
 
@@ -322,23 +351,30 @@ def intersperser(item: U) -> Generator[T | U, None, None]:
 
     def interleave[U](self, other: Iterable[U]) -> "Itr[T | U]":
         """
-        Interleaves elements from this iterator with elements from another iterator.
-        Stops when either iterator is exhausted.
+        Interleaves elements from this iterator with elements from another iterable, yielding alternately from each.
+        When one iterable is exhausted, the remaining elements of the other are yielded in order.
 
         Args:
-            other (Itr[U]): Another iterator to interleave with.
+            other (Iterable[U]): Another iterable to interleave with.
 
         Returns:
             Itr[T | U]: A new iterator yielding elements alternately from self and other.
 
         Example:
-            itr1 = Itr([1, 3, 5])
-            itr2 = Itr([2, 4, 6])
-            result = itr1.interleave(itr2)
-            list(result)  # [1, 2, 3, 4, 5, 6]
+            >>> Itr([1, 3, 5]).interleave([2, 4, 6]).collect()
+            (1, 2, 3, 4, 5, 6)
+            >>> Itr([1, 3, 5, 7]).interleave([2, 4]).collect()
+            (1, 2, 3, 4, 5, 7)
         """
+        _sentinel = object()
+
+        def interleaver() -> Generator[T | U, None, None]:
+            for pair in itertools.zip_longest(self._it, other, fillvalue=_sentinel):
+                for item in pair:
+                    if item is not _sentinel:
+                        yield cast("T | U", item)
 
-        return cast("Itr[T | U]", Itr(self.zip(other).flatten()))
+        return cast("Itr[T | U]", Itr(interleaver()))
 
     def last(self) -> T:
         """Return the last item from the iterator. Do not use on an open-ended Iterable
@@ -442,22 +478,25 @@ def next_chunk(self, n: int) -> tuple[T, ...]:
         return self.take(n).collect()
 
     def nth(self, n: int) -> T:
-        """Return the n-th item (1-based) from the iterator, or None if out of range.
+        """Return the n-th item (0-based) from the iterator, consuming the preceding items.
+
+        This matches Rust's ``Iterator::nth`` and Python's 0-based indexing conventions: ``nth(0)`` returns the first
+        item, ``nth(1)`` the second, and so on.
 
         Args:
-            n (int): The index (1-based) of the item to return.
+            n (int): The index (0-based) of the item to return.
 
         Returns:
             T: The n-th item.
 
         Raises:
-            StopIteration: if the iterator is exhausted.
-            ValueError: if n < 1
+            StopIteration: if the iterator has fewer than n + 1 items.
+            ValueError: if n < 0
 
         """
-        if n < 1:
-            raise ValueError(f"nth index must be >= 1, got {n}")
-        return self.skip(n - 1).next()
+        if n < 0:
+            raise ValueError(f"nth index must be >= 0, got {n}")
+        return self.skip(n).next()
 
     def pairwise(self) -> "Itr[tuple[T, T]]":
         """Returns an iterator that yields consecutive pairs of elements from the iterable.
diff --git a/src/test/test_collection.py b/src/test/test_collection.py
index a65e13d..f655a5a 100644
--- a/src/test/test_collection.py
+++ b/src/test/test_collection.py
@@ -52,8 +52,10 @@ def test_next_chunk_overrun() -> None:
 def test_nth() -> None:
     it = Itr([10, 20, 30, 40])
     with pytest.raises(ValueError):
-        it.nth(0)
-    assert it.nth(3) == 30
+        it.nth(-1)
+    assert it.nth(0) == 10
+    # consumes preceding items, so this advances from the current position
+    assert it.nth(2) == 40
     with pytest.raises(StopIteration):
         it.nth(10)
 
diff --git a/src/test/test_combine_split.py b/src/test/test_combine_split.py
index 29e8683..1febe25 100644
--- a/src/test/test_combine_split.py
+++ b/src/test/test_combine_split.py
@@ -65,29 +65,29 @@ def test_interleave_first_longer() -> None:
     it1 = Itr([1, 3, 5, 7])
     it2 = Itr([2, 4])
     result = it1.interleave(it2)
-    # Stops when either iterator is exhausted
-    assert result.collect() == (1, 2, 3, 4)
+    # When one iterable is exhausted, the remainder of the other is yielded in order
+    assert result.collect() == (1, 2, 3, 4, 5, 7)
 
 
 def test_interleave_second_longer() -> None:
     it1 = Itr([1, 3])
     it2 = Itr([2, 4, 6, 8])
     result = it1.interleave(it2)
-    assert result.collect() == (1, 2, 3, 4)
+    assert result.collect() == (1, 2, 3, 4, 6, 8)
 
 
 def test_interleave_empty_first() -> None:
     it1: Itr[int] = Itr([])
     it2 = Itr([2, 4, 6])
     result = it1.interleave(it2)
-    assert result.collect() == ()
+    assert result.collect() == (2, 4, 6)
 
 
 def test_interleave_empty_second() -> None:
     it1 = Itr([1, 3, 5])
     it2: Itr[int] = Itr([])
     result = it1.interleave(it2)
-    assert result.collect() == ()
+    assert result.collect() == (1, 3, 5)
 
 
 def test_interleave_both_empty() -> None:
diff --git a/src/test/test_transform_filter.py b/src/test/test_transform_filter.py
index be3e8f7..5eab93d 100644
--- a/src/test/test_transform_filter.py
+++ b/src/test/test_transform_filter.py
@@ -1,3 +1,4 @@
+import itertools
 from collections import defaultdict
 from operator import mul
 
@@ -193,3 +194,15 @@ def test_groupby_string() -> None:
     assert tuple(d.keys()) == (5, 6)
     assert d[5] == ("apple",)
     assert d[6] == ("banana", "carrot")
+
+
+def test_chunk_by() -> None:
+    # consecutive runs only, order preserved, no sorting (unlike groupby)
+    it = Itr([1, 1, 2, 3, 3, 1]).chunk_by(lambda x: x)
+    assert it.collect() == ((1, (1, 1)), (2, (2,)), (3, (3, 3)), (1, (1,)))
+
+
+def test_chunk_by_lazy_on_infinite() -> None:
+    # chunk_by is lazy, so it works on unbounded iterators
+    counts = Itr(itertools.count()).chunk_by(lambda n: n // 2).take(3).collect()
+    assert counts == ((0, (0, 1)), (1, (2, 3)), (2, (4, 5)))
diff --git a/uv.lock b/uv.lock
index 7f50d7b..e227335 100644
--- a/uv.lock
+++ b/uv.lock
@@ -323,7 +323,7 @@ wheels = [
 
 [[package]]
 name = "itrx"
-version = "0.2.3"
+version = "0.3.0"
 source = { editable = "." }
 
 [package.optional-dependencies]